Skip to content

Commit acd8179

Browse files
committed
Fix issue where closing square brackets could be included in a URL when the URL was simply wrapped in square brackets
Fixes #228
1 parent dffce1e commit acd8179

File tree

3 files changed

+126
-50
lines changed

3 files changed

+126
-50
lines changed

src/matcher/url-matcher.ts

Lines changed: 53 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -126,32 +126,6 @@ export class UrlMatcher extends Matcher {
126126
protected wordCharRegExp = new RegExp( '[' + alphaNumericAndMarksCharsStr + ']' );
127127

128128

129-
/**
130-
* The regular expression to match opening parenthesis in a URL match.
131-
*
132-
* This is to determine if we have unbalanced parenthesis in the URL, and to
133-
* drop the final parenthesis that was matched if so.
134-
*
135-
* Ex: The text "(check out: wikipedia.com/something_(disambiguation))"
136-
* should only autolink the inner "wikipedia.com/something_(disambiguation)"
137-
* part, so if we find that we have unbalanced parenthesis, we will drop the
138-
* last one for the match.
139-
*
140-
* @protected
141-
* @property {RegExp}
142-
*/
143-
protected openParensRe = /\(/g;
144-
145-
/**
146-
* The regular expression to match closing parenthesis in a URL match. See
147-
* {@link #openParensRe} for more information.
148-
*
149-
* @protected
150-
* @property {RegExp}
151-
*/
152-
protected closeParensRe = /\)/g;
153-
154-
155129
/**
156130
* @method constructor
157131
* @param {Object} cfg The configuration properties for the Match instance,
@@ -207,13 +181,16 @@ export class UrlMatcher extends Matcher {
207181
continue;
208182
}
209183

210-
if( /\?$/.test(matchStr) ) {
211-
matchStr = matchStr.substr(0, matchStr.length-1);
184+
// If the URL ends with a question mark, don't include the question
185+
// mark as part of the URL. We'll assume the question mark was the
186+
// end of a sentence, such as: "Going to google.com?"
187+
if( /\?$/.test( matchStr ) ) {
188+
matchStr = matchStr.substr( 0, matchStr.length-1 );
212189
}
213190

214-
// Handle a closing parenthesis at the end of the match, and exclude
215-
// it if there is not a matching open parenthesis in the match
216-
// itself.
191+
// Handle a closing parenthesis or square bracket at the end of the
192+
// match, and exclude it if there is not a matching open parenthesis
193+
// or square bracket in the match itself.
217194
if( this.matchHasUnbalancedClosingParen( matchStr ) ) {
218195
matchStr = matchStr.substr( 0, matchStr.length - 1 ); // remove the trailing ")"
219196
} else {
@@ -246,38 +223,66 @@ export class UrlMatcher extends Matcher {
246223

247224

248225
/**
249-
* Determines if a match found has an unmatched closing parenthesis. If so,
250-
* this parenthesis will be removed from the match itself, and appended
251-
* after the generated anchor tag.
226+
* Determines if a match found has an unmatched closing parenthesis or
227+
* square bracket. If so, the parenthesis or square bracket will be removed
228+
* from the match itself, and appended after the generated anchor tag.
252229
*
253230
* A match may have an extra closing parenthesis at the end of the match
254231
* because the regular expression must include parenthesis for URLs such as
255232
* "wikipedia.com/something_(disambiguation)", which should be auto-linked.
256233
*
257234
* However, an extra parenthesis *will* be included when the URL itself is
258-
* wrapped in parenthesis, such as in the case of "(wikipedia.com/something_(disambiguation))".
235+
* wrapped in parenthesis, such as in the case of:
236+
* "(wikipedia.com/something_(disambiguation))"
259237
* In this case, the last closing parenthesis should *not* be part of the
260-
* URL itself, and this method will return `true`.
238+
* URL itself, and this method will return `true`.
239+
*
240+
* For square brackets in URLs such as in PHP arrays, the same behavior as
241+
* parenthesis discussed above should happen:
242+
* "[http://www.example.com/foo.php?bar[]=1&bar[]=2&bar[]=3]"
243+
* The closing square bracket should not be part of the URL itself, and this
244+
* method will return `true`.
261245
*
262246
* @protected
263247
* @param {String} matchStr The full match string from the {@link #matcherRegex}.
264-
* @return {Boolean} `true` if there is an unbalanced closing parenthesis at
265-
* the end of the `matchStr`, `false` otherwise.
248+
* @return {Boolean} `true` if there is an unbalanced closing parenthesis or
249+
* square bracket at the end of the `matchStr`, `false` otherwise.
266250
*/
267-
protected matchHasUnbalancedClosingParen( matchStr: string ) {
268-
let lastChar = matchStr.charAt( matchStr.length - 1 );
269-
270-
if( lastChar === ')' ) {
271-
let openParensMatch = matchStr.match( this.openParensRe ),
272-
closeParensMatch = matchStr.match( this.closeParensRe ),
273-
numOpenParens = ( openParensMatch && openParensMatch.length ) || 0,
274-
numCloseParens = ( closeParensMatch && closeParensMatch.length ) || 0;
251+
protected matchHasUnbalancedClosingParen( matchStr: string ): boolean {
252+
let endChar = matchStr.charAt( matchStr.length - 1 );
253+
let startChar: string;
254+
255+
if( endChar === ')' ) {
256+
startChar = '(';
257+
} else if( endChar === ']' ) {
258+
startChar = '[';
259+
} else {
260+
return false; // not a close parenthesis or square bracket
261+
}
275262

276-
if( numOpenParens < numCloseParens ) {
277-
return true;
263+
// Find if there are the same number of open braces as close braces in
264+
// the URL string, minus the last character (which we have already
265+
// determined to be either ')' or ']'
266+
let numOpenBraces = 0;
267+
for( let i = 0, len = matchStr.length - 1; i < len; i++ ) {
268+
const char = matchStr.charAt( i );
269+
270+
if( char === startChar ) {
271+
numOpenBraces++;
272+
} else if( char === endChar ) {
273+
numOpenBraces = Math.max( numOpenBraces - 1, 0 );
278274
}
279275
}
280276

277+
// If the number of open braces matches the number of close braces in
278+
// the URL minus the last character, then the match has *unbalanced*
279+
// braces because of the last character. Example of unbalanced braces
280+
// from the regex match:
281+
// "http://example.com?a[]=1]"
282+
if( numOpenBraces === 0 ) {
283+
return true;
284+
}
285+
281286
return false;
282287
}
283288

tests/autolinker-mention.spec.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ describe( "Autolinker Mention Matching -", () => {
8686

8787

8888
it( `should automatically link ${serviceName} handles surrounded by
89-
braces`,
89+
curly brackets`,
9090
() => {
9191
let result = autolinker.link( `Joe's ${serviceName} is {@joe_the_man12}` );
9292

@@ -95,7 +95,7 @@ describe( "Autolinker Mention Matching -", () => {
9595

9696

9797
it( `should automatically link ${serviceName} handles surrounded by
98-
brackets`,
98+
square brackets`,
9999
() => {
100100
let result = autolinker.link( `Joe's ${serviceName} is [@joe_the_man12]` );
101101

tests/autolinker-url.spec.ts

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,6 +679,77 @@ describe( "Autolinker Url Matching -", () => {
679679
} );
680680

681681

682+
describe( "square bracket handling", () => {
683+
684+
it( `when the url is surrounded by square brackets, it should not include
685+
should not include the final closing bracket in the URL`,
686+
() => {
687+
let result = autolinker.link( "Click here [google.com] for more details" );
688+
expect( result ).toBe( 'Click here [<a href="http://google.com">google.com</a>] for more details' );
689+
} );
690+
691+
692+
it( `when the URL starts with a scheme, and is surrounded by square
693+
brackets, should not include the final closing bracket in the URL
694+
(Issue #228)`,
695+
() => {
696+
let result = autolinker.link( "Click here [http://example.com] for more details" );
697+
expect( result ).toBe( 'Click here [<a href="http://example.com">example.com</a>] for more details' );
698+
} );
699+
700+
701+
it( `when the URL ends with a closing square bracket, but there is no
702+
matching open square bracket, should not include the final closing
703+
bracket in the URL (Issue #228)`,
704+
() => {
705+
let result = autolinker.link( "Click here [cat http://example.com] for more details" );
706+
expect( result ).toBe( 'Click here [cat <a href="http://example.com">example.com</a>] for more details' );
707+
} );
708+
709+
710+
it( "should not include a final closing bracket in the URL when a path exists", function() {
711+
let result = autolinker.link( "Click here [google.com/abc] for more details" );
712+
expect( result ).toBe( 'Click here [<a href="http://google.com/abc">google.com/abc</a>] for more details' );
713+
} );
714+
715+
716+
it( "should not include a final closing bracket in the URL when a query string exists", function() {
717+
let result = autolinker.link( "Click here [google.com?abc=1] for more details" );
718+
expect( result ).toBe( 'Click here [<a href="http://google.com?abc=1">google.com?abc=1</a>] for more details' );
719+
} );
720+
721+
722+
it( "should not include a final closing bracket in the URL when a hash anchor exists", function() {
723+
let result = autolinker.link( "Click here [google.com#abc] for more details" );
724+
expect( result ).toBe( 'Click here [<a href="http://google.com#abc">google.com#abc</a>] for more details' );
725+
} );
726+
727+
728+
it( "should include escaped brackets in the URL", function() {
729+
let result = autolinker.link( "Here's an example from CodingHorror: http://en.wikipedia.org/wiki/PC_Tools_%5BCentral_Point_Software%5D" );
730+
expect( result ).toBe( 'Here\'s an example from CodingHorror: <a href="http://en.wikipedia.org/wiki/PC_Tools_%5BCentral_Point_Software%5D">en.wikipedia.org/wiki/PC_Tools_[Central_Point_Software]</a>' );
731+
} );
732+
733+
734+
it( `should correctly accept square brackets such as PHP array
735+
representation in query strings`,
736+
() => {
737+
let result = autolinker.link( "Here's an example: http://www.example.com/foo.php?bar[]=1&bar[]=2&bar[]=3" );
738+
expect( result ).toBe( `Here's an example: <a href="http://www.example.com/foo.php?bar[]=1&bar[]=2&bar[]=3">example.com/foo.php?bar[]=1&bar[]=2&bar[]=3</a>` );
739+
} );
740+
741+
742+
it( `should correctly accept square brackets such as PHP array
743+
representation in query strings, when the entire URL is surrounded
744+
by square brackets`,
745+
() => {
746+
let result = autolinker.link( "Here's an example: [http://www.example.com/foo.php?bar[]=1&bar[]=2&bar[]=3]" );
747+
expect( result ).toBe( `Here's an example: [<a href="http://www.example.com/foo.php?bar[]=1&bar[]=2&bar[]=3">example.com/foo.php?bar[]=1&bar[]=2&bar[]=3</a>]` );
748+
} );
749+
750+
} );
751+
752+
682753
describe( "Special character handling", function() {
683754

684755
it( "should include $ in URLs", function() {

0 commit comments

Comments
 (0)