|
8 | 8 | * https://github.com/gregjacobs/Autolinker.js
|
9 | 9 | */
|
10 | 10 | /*global define, module */
|
11 |
| -/*jshint smarttabs:true */ |
| 11 | +/*jshint undef:true, smarttabs:true */ |
12 | 12 | // Set up Autolinker appropriately for the environment.
|
13 | 13 | ( function( root, factory ) {
|
14 | 14 | if( typeof define === 'function' && define.amd ) {
|
|
132 | 132 | *
|
133 | 133 | * The regular expression that matches URLs, email addresses, and Twitter handles.
|
134 | 134 | *
|
135 |
| - * Capturing groups: |
| 135 | + * This regular expression has the following capturing groups: |
136 | 136 | *
|
137 |
| - * 1. Group that is used to determine if there is a Twitter handle match (i.e. @someTwitterUser). Simply check for its existence |
138 |
| - * to determine if there is a Twitter handle match. The next couple of capturing groups give information about the Twitter |
139 |
| - * handle match. |
140 |
| - * 2. The whitespace character before the @sign in a Twitter handle. This is needed because there are no lookbehinds in JS regular |
141 |
| - * expressions, and can be used to reconstruct the original string in a replace(). |
| 137 | + * 1. Group that is used to determine if there is a Twitter handle match (i.e. @someTwitterUser). Simply check for its |
| 138 | + * existence to determine if there is a Twitter handle match. The next couple of capturing groups give information |
| 139 | + * about the Twitter handle match. |
| 140 | + * 2. The whitespace character before the @sign in a Twitter handle. This is needed because there are no lookbehinds in |
| 141 | + * JS regular expressions, and can be used to reconstruct the original string in a replace(). |
142 | 142 | * 3. The Twitter handle itself in a Twitter match. If the match is '@someTwitterUser', the handle is 'someTwitterUser'.
|
143 |
| - * 4. Group that matches an email address. Used to determine if the match is an email address, as well as holding the full address. |
144 |
| - |
| 143 | + * 4. Group that matches an email address. Used to determine if the match is an email address, as well as holding the full |
| 144 | + * address. Ex: '[email protected]' |
145 | 145 | * 5. Group that matches a URL in the input text. Ex: 'http://google.com', 'www.google.com', or just 'google.com'.
|
146 | 146 | * This also includes a path, url parameters, or hash anchors. Ex: google.com/path/to/file?q1=1&q2=2#myAnchor
|
| 147 | + * 6. A protocol-relative ('//') match for the case of a 'www.' prefixed URL. Will be an empty string if it is not a |
| 148 | + * protocol-relative match. We need to know the character before the '//' in order to determine if it is a valid match |
| 149 | + * or the // was in a string we don't want to auto-link. |
| 150 | + * 7. A protocol-relative ('//') match for the case of a known TLD prefixed URL. Will be an empty string if it is not a |
| 151 | + * protocol-relative match. See #6 for more info. |
147 | 152 | */
|
148 |
| - matcherRegex: (function() { |
| 153 | + matcherRegex : (function() { |
149 | 154 | var twitterRegex = /(^|[^\w])@(\w{1,15})/, // For matching a twitter handle. Ex: @gregory_jacobs
|
150 | 155 |
|
151 | 156 | emailRegex = /(?:[\-;:&=\+\$,\w\.]+@)/, // something@ for email addresses (a.k.a. local-part)
|
|
159 | 164 | // http://blog.codinghorror.com/the-problem-with-urls/
|
160 | 165 | urlSuffixRegex = /(?:[\-A-Za-z0-9+&@#\/%?=~_()|!:,.;]*[\-A-Za-z0-9+&@#\/%=~_()|])?/; // note: optional part of the full regex
|
161 | 166 |
|
162 |
| - |
163 | 167 | return new RegExp( [
|
164 | 168 | '(', // *** Capturing group $1, which can be used to check for a twitter handle match. Use group $3 for the actual twitter handle though. $2 may be used to reconstruct the original string in a replace()
|
165 | 169 | // *** Capturing group $2, which matches the whitespace character before the '@' sign (needed because of no lookbehinds), and
|
|
179 | 183 |
|
180 | 184 | '(', // *** Capturing group $5, which is used to match a URL
|
181 | 185 | '(?:', // parens to cover match for protocol (optional), and domain
|
182 |
| - '(?:', // non-capturing paren for a protocol-prefixed url (ex: http://google.com) |
| 186 | + '(?:', // non-capturing paren for a protocol-prefixed url (ex: http://google.com) |
183 | 187 | protocolRegex.source,
|
184 | 188 | domainNameRegex.source,
|
185 | 189 | ')',
|
186 | 190 |
|
187 | 191 | '|',
|
188 | 192 |
|
189 | 193 | '(?:', // non-capturing paren for a 'www.' prefixed url (ex: www.google.com)
|
| 194 | + '(.?//)?', // *** Capturing group $6 for an optional protocol-relative URL. Must be at the beginning of the string or start with a non-word character |
190 | 195 | wwwRegex.source,
|
191 | 196 | domainNameRegex.source,
|
192 | 197 | ')',
|
193 | 198 |
|
194 | 199 | '|',
|
195 | 200 |
|
196 | 201 | '(?:', // non-capturing paren for known a TLD url (ex: google.com)
|
| 202 | + '(.?//)?', // *** Capturing group $7 for an optional protocol-relative URL. Must be at the beginning of the string or start with a non-word character |
197 | 203 | domainNameRegex.source,
|
198 | 204 | tldRegex.source,
|
199 | 205 | ')',
|
|
204 | 210 | ].join( "" ), 'g' );
|
205 | 211 | } )(),
|
206 | 212 |
|
| 213 | + /** |
| 214 | + * @private |
| 215 | + * @property {RegExp} protocolRelativeRegex |
| 216 | + * |
| 217 | + * The regular expression used to find protocol-relative URLs. A protocol-relative URL is, for example, "//yahoo.com" |
| 218 | + * |
| 219 | + * This regular expression needs to match the character before the '//', in order to determine if we should actually |
| 220 | + * autolink a protocol-relative URL. For instance, we want to autolink something like "//google.com", but we |
| 221 | + * don't want to autolink something like "abc//google.com" |
| 222 | + */ |
| 223 | + protocolRelativeRegex : /(.)?\/\//, |
| 224 | + |
207 | 225 | /**
|
208 | 226 | * @private
|
209 | 227 | * @property {RegExp} htmlRegex
|
210 | 228 | *
|
211 |
| - * A regular expression used to pull out HTML tags from a string. Handles namespaced HTML tags and |
212 |
| - * attribute names, as specified by http://www.w3.org/TR/html-markup/syntax.html |
| 229 | + * The regular expression used to pull out HTML tags from a string. Handles namespaced HTML tags and |
| 230 | + * attribute names, as specified by http://www.w3.org/TR/html-markup/syntax.html. |
213 | 231 | *
|
214 | 232 | * Capturing groups:
|
215 | 233 | *
|
|
343 | 361 | enableEmailAddresses = this.email,
|
344 | 362 | enableUrls = this.urls;
|
345 | 363 |
|
346 |
| - return text.replace( matcherRegex, function( matchStr, $1, $2, $3, $4, $5 ) { |
| 364 | + return text.replace( matcherRegex, function( matchStr, $1, $2, $3, $4, $5, $6, $7 ) { |
347 | 365 | var twitterMatch = $1,
|
348 | 366 | twitterHandlePrefixWhitespaceChar = $2, // The whitespace char before the @ sign in a Twitter handle match. This is needed because of no lookbehinds in JS regexes
|
349 |
| - twitterHandle = $3, // The actual twitterUser (i.e the word after the @ sign in a Twitter handle match) |
350 |
| - emailAddress = $4, // For both determining if it is an email address, and stores the actual email address |
351 |
| - urlMatch = $5, // The matched URL string |
| 367 | + twitterHandle = $3, // The actual twitterUser (i.e the word after the @ sign in a Twitter handle match) |
| 368 | + emailAddress = $4, // For both determining if it is an email address, and stores the actual email address |
| 369 | + urlMatch = $5, // The matched URL string |
| 370 | + protocolRelativeMatch = $6 || $7, // The '//' for a protocol-relative match, with the character that comes before the '//' |
352 | 371 |
|
353 |
| - prefixStr = "", // A string to use to prefix the anchor tag that is created. This is needed for the Twitter handle match |
354 |
| - suffixStr = ""; // A string to suffix the anchor tag that is created. This is used if there is a trailing parenthesis that should not be auto-linked. |
| 372 | + prefixStr = "", // A string to use to prefix the anchor tag that is created. This is needed for the Twitter handle match |
| 373 | + suffixStr = ""; // A string to suffix the anchor tag that is created. This is used if there is a trailing parenthesis that should not be auto-linked. |
| 374 | + |
| 375 | + // Early exits with no replacements for: |
| 376 | + // 1) Disabled link types |
| 377 | + // 2) URL matches which do not have at least have one period ('.') in the domain name (effectively skipping over |
| 378 | + // matches like "abc:def") |
| 379 | + // 3) A protocol-relative url match (a URL beginning with '//') whose previous character is a word character |
| 380 | + // (effectively skipping over strings like "abc//google.com") |
| 381 | + if( |
| 382 | + ( twitterMatch && !enableTwitter ) || ( emailAddress && !enableEmailAddresses ) || ( urlMatch && !enableUrls ) || |
| 383 | + ( urlMatch && urlMatch.indexOf( '.' ) === -1 ) || // At least one period ('.') must exist in the URL match for us to consider it an actual URL |
| 384 | + ( protocolRelativeMatch && /^[\w]\/\//.test( protocolRelativeMatch ) ) // a protocol-relative match which has a word character in front of it (so we can skip something like "abc//google.com") |
| 385 | + ) { |
| 386 | + return matchStr; |
| 387 | + } |
| 388 | + |
355 | 389 |
|
356 | 390 | // Handle a closing parenthesis at the end of the match, and exclude it if there is not a matching open parenthesis
|
357 | 391 | // in the match. This handles cases like the string "wikipedia.com/something_(disambiguation)" (which should be auto-
|
|
372 | 406 |
|
373 | 407 |
|
374 | 408 | var anchorHref = matchStr, // initialize both of these
|
375 |
| - anchorText = matchStr; // values as the full match |
| 409 | + anchorText = matchStr, // values as the full match |
| 410 | + linkType; |
376 | 411 |
|
377 |
| - // Simply return out for disabled link types, or possibly URL matches which do not have at least have |
378 |
| - // one period ('.') in the domain name (effectively skipping over strings like "abc:def") |
379 |
| - if( |
380 |
| - ( twitterMatch && !enableTwitter ) || |
381 |
| - ( emailAddress && !enableEmailAddresses ) || |
382 |
| - ( urlMatch && !enableUrls ) || |
383 |
| - ( urlMatch && urlMatch.indexOf( '.' ) === -1 ) // At least one period ('.') must exist in the URL match for us to consider it an actual URL |
384 |
| - ) { |
385 |
| - return prefixStr + anchorText + suffixStr; |
386 |
| - } |
387 |
| - |
388 | 412 | // Process the urls that are found. We need to change URLs like "www.yahoo.com" to "http://www.yahoo.com" (or the browser
|
389 | 413 | // will try to direct the user to "http://current-domain.com/www.yahoo.com"), and we need to prefix 'mailto:' to email addresses.
|
390 |
| - var linkType = 'url'; |
391 | 414 | if( twitterMatch ) {
|
392 | 415 | linkType = 'twitter';
|
393 | 416 | prefixStr = twitterHandlePrefixWhitespaceChar;
|
394 | 417 | anchorHref = 'https://twitter.com/' + twitterHandle;
|
395 | 418 | anchorText = '@' + twitterHandle;
|
| 419 | + |
396 | 420 | } else if( emailAddress ) {
|
397 | 421 | linkType = 'email';
|
398 | 422 | anchorHref = 'mailto:' + emailAddress;
|
399 | 423 | anchorText = emailAddress;
|
400 |
| - } else if( !/^[A-Za-z]{3,9}:/i.test( anchorHref ) ) { // url string doesn't begin with a protocol, assume http:// |
401 |
| - anchorHref = 'http://' + anchorHref; |
| 424 | + |
| 425 | + } else { // url match |
| 426 | + linkType = 'url'; |
| 427 | + |
| 428 | + if( protocolRelativeMatch ) { |
| 429 | + // Strip off any protocol-relative '//' from the anchor text (leaving the previous non-word character |
| 430 | + // intact, if there is one) |
| 431 | + var protocolRelRegex = new RegExp( "^" + me.protocolRelativeRegex.source ), // for this one, we want to only match at the beginning of the string |
| 432 | + charBeforeMatch = protocolRelativeMatch.match( protocolRelRegex )[ 1 ] || ""; |
| 433 | + |
| 434 | + prefixStr = charBeforeMatch + prefixStr; // re-add the character before the '//' to what will be placed before the <a> tag |
| 435 | + anchorHref = anchorHref.replace( protocolRelRegex, "//" ); // remove the char before the match for the href |
| 436 | + anchorText = anchorText.replace( protocolRelRegex, "" ); // remove both the char before the match and the '//' for the anchor text |
| 437 | + |
| 438 | + } else if( !/^[A-Za-z]{3,9}:/i.test( anchorHref ) ) { |
| 439 | + // url string doesn't begin with a protocol, assume http:// |
| 440 | + anchorHref = 'http://' + anchorHref; |
| 441 | + } |
402 | 442 | }
|
403 | 443 |
|
404 | 444 | // wrap the match in an anchor tag
|
|
0 commit comments