@@ -40,9 +40,9 @@ import type { StripPrefixConfigObj } from '../autolinker';
40
40
41
41
/**
42
42
* Context object containing all the state needed by the state machine functions.
43
- *
43
+ *
44
44
* ## Historical note
45
- *
45
+ *
46
46
* In v4.1.1, we used nested functions to handle the context via closures, but
47
47
* this necessitated re-creating the functions for each call to `parseMatches()`,
48
48
* which made them difficult for v8 to JIT optimize. In v4.1.2, we lifted all of
@@ -273,7 +273,7 @@ export function parseMatches(text: string, args: ParseMatchesArgs): Match[] {
273
273
// String(charIdx),
274
274
// char,
275
275
// `10: ${char.charCodeAt(0)}\n0x: ${char.charCodeAt(0).toString(16)}\nU+${char.codePointAt(0)}`,
276
- // stateMachines.map(machine => `${machine.type}${'matchType' in machine ? ` (${machine.matchType})` : ''}`).join('\n') || '(none)',
276
+ // stateMachines.map(machine => `${StateMachineType[ machine.type] }${'matchType' in machine ? ` (${UrlStateMachineMatchType[ machine.matchType] })` : ''}`).join('\n') || '(none)',
277
277
// stateMachines.map(machine => State[machine.state]).join('\n') || '(none)',
278
278
// String(charIdx),
279
279
// stateMachines.map(m => m.startIdx).join('\n'),
@@ -1102,7 +1102,7 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
1102
1102
matchedText = excludeUnbalancedTrailingBracesAndPunctuation ( matchedText ) ;
1103
1103
1104
1104
switch ( stateMachine . type ) {
1105
- case 'url' : {
1105
+ case StateMachineType . Url : {
1106
1106
// We don't want to accidentally match a URL that is preceded by an
1107
1107
// '@' character, which would be an email address
1108
1108
const charBeforeUrlMatch = text . charAt ( stateMachine . startIdx - 1 ) ;
@@ -1116,9 +1116,8 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
1116
1116
// that begin with 'www.' so that users may turn off 'www'
1117
1117
// matches. As such, we need to correct for that now if the
1118
1118
// URL begins with 'www.'
1119
- const urlMatchType : UrlMatchType = stateMachine . matchType ;
1120
- switch ( urlMatchType ) {
1121
- case 'scheme' : {
1119
+ switch ( stateMachine . matchType ) {
1120
+ case UrlStateMachineMatchType . Scheme : {
1122
1121
// Autolinker accepts many characters in a url's scheme (like `fake://test.com`).
1123
1122
// However, in cases where a URL is missing whitespace before an obvious link,
1124
1123
// (for example: `nowhitespacehttp://www.test.com`), we only want the match to start
@@ -1139,14 +1138,14 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
1139
1138
break ;
1140
1139
}
1141
1140
1142
- case 'tld' : {
1141
+ case UrlStateMachineMatchType . Tld : {
1143
1142
if ( ! isValidTldMatch ( matchedText ) ) {
1144
1143
return ; // not a valid match
1145
1144
}
1146
1145
break ;
1147
1146
}
1148
1147
1149
- case 'ipV4' : {
1148
+ case UrlStateMachineMatchType . IpV4 : {
1150
1149
if ( ! isValidIpV4Address ( matchedText ) ) {
1151
1150
return ; // not a valid match
1152
1151
}
@@ -1155,15 +1154,15 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
1155
1154
1156
1155
/* istanbul ignore next */
1157
1156
default :
1158
- assertNever ( urlMatchType ) ;
1157
+ assertNever ( stateMachine ) ;
1159
1158
}
1160
1159
1161
1160
matches . push (
1162
1161
new UrlMatch ( {
1163
1162
tagBuilder : tagBuilder ,
1164
1163
matchedText : matchedText ,
1165
1164
offset : startIdx ,
1166
- urlMatchType : urlMatchType ,
1165
+ urlMatchType : toUrlMatchType ( stateMachine . matchType ) ,
1167
1166
url : matchedText ,
1168
1167
protocolRelativeMatch : matchedText . slice ( 0 , 2 ) === '//' ,
1169
1168
@@ -1177,7 +1176,7 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
1177
1176
break ;
1178
1177
}
1179
1178
1180
- case 'email' : {
1179
+ case StateMachineType . Email : {
1181
1180
// if the email address has a valid TLD, add it to the list of matches
1182
1181
if ( isValidEmail ( matchedText ) ) {
1183
1182
matches . push (
@@ -1192,7 +1191,7 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
1192
1191
break ;
1193
1192
}
1194
1193
1195
- case 'hashtag' : {
1194
+ case StateMachineType . Hashtag : {
1196
1195
if ( isValidHashtag ( matchedText ) ) {
1197
1196
matches . push (
1198
1197
new HashtagMatch ( {
@@ -1207,7 +1206,7 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
1207
1206
break ;
1208
1207
}
1209
1208
1210
- case 'mention' : {
1209
+ case StateMachineType . Mention : {
1211
1210
if ( isValidMention ( matchedText , mentionServiceName ) ) {
1212
1211
matches . push (
1213
1212
new MentionMatch ( {
@@ -1222,7 +1221,7 @@ function captureMatchIfValidAndRemove(context: ParseMatchesContext, stateMachine
1222
1221
break ;
1223
1222
}
1224
1223
1225
- case 'phone' : {
1224
+ case StateMachineType . Phone : {
1226
1225
// remove any trailing spaces that were considered as "separator"
1227
1226
// chars by the state machine
1228
1227
matchedText = matchedText . replace ( / + $ / g, '' ) ;
@@ -1266,6 +1265,25 @@ const oppositeBrace: { [char: string]: string } = {
1266
1265
']' : '[' ,
1267
1266
} ;
1268
1267
1268
+ /**
1269
+ * Helper function to convert a UrlStateMachineMatchType value to its
1270
+ * UrlMatchType equivalent.
1271
+ */
1272
+ function toUrlMatchType ( stateMachineMatchType : UrlStateMachineMatchType ) : UrlMatchType {
1273
+ switch ( stateMachineMatchType ) {
1274
+ case UrlStateMachineMatchType . Scheme :
1275
+ return 'scheme' ;
1276
+ case UrlStateMachineMatchType . Tld :
1277
+ return 'tld' ;
1278
+ case UrlStateMachineMatchType . IpV4 :
1279
+ return 'ipV4' ;
1280
+
1281
+ /* istanbul ignore next */
1282
+ default :
1283
+ assertNever ( stateMachineMatchType ) ;
1284
+ }
1285
+ }
1286
+
1269
1287
/**
1270
1288
* Determines if a match found has unmatched closing parenthesis,
1271
1289
* square brackets or curly brackets. If so, these unbalanced symbol(s) will be
@@ -1398,6 +1416,16 @@ const enum State {
1398
1416
PhoneNumberPoundChar , // '#' for pound character
1399
1417
}
1400
1418
1419
+ // The type of state machine
1420
+ // For debugging: temporarily remove `const` from `const enum`
1421
+ const enum StateMachineType {
1422
+ Url = 0 ,
1423
+ Email ,
1424
+ Hashtag ,
1425
+ Mention ,
1426
+ Phone ,
1427
+ }
1428
+
1401
1429
type StateMachine =
1402
1430
| UrlStateMachine
1403
1431
| EmailStateMachine
@@ -1411,8 +1439,16 @@ interface AbstractStateMachine {
1411
1439
acceptStateReached : boolean ;
1412
1440
}
1413
1441
1442
+ // The type of URL state machine
1443
+ // For debugging: temporarily remove `const` from `const enum`
1444
+ const enum UrlStateMachineMatchType {
1445
+ Scheme = 0 , // http://, https://, file://, etc. match
1446
+ Tld , // Top-level Domain (TLD)
1447
+ IpV4 , // 192.168.0.1
1448
+ }
1449
+
1414
1450
interface AbstractUrlStateMachine extends AbstractStateMachine {
1415
- readonly type : 'url' ;
1451
+ readonly type : StateMachineType . Url ;
1416
1452
}
1417
1453
1418
1454
type UrlStateMachine = SchemeUrlStateMachine | TldUrlStateMachine | IpV4UrlStateMachine ;
@@ -1421,44 +1457,44 @@ type UrlStateMachine = SchemeUrlStateMachine | TldUrlStateMachine | IpV4UrlState
1421
1457
* State machine with metadata for capturing TLD (top-level domain) URLs.
1422
1458
*/
1423
1459
interface SchemeUrlStateMachine extends AbstractUrlStateMachine {
1424
- readonly matchType : 'scheme' ;
1460
+ readonly matchType : UrlStateMachineMatchType . Scheme ;
1425
1461
}
1426
1462
1427
1463
/**
1428
1464
* State machine with metadata for capturing TLD (top-level domain) URLs.
1429
1465
*/
1430
1466
interface TldUrlStateMachine extends AbstractUrlStateMachine {
1431
- readonly matchType : 'tld' ;
1467
+ readonly matchType : UrlStateMachineMatchType . Tld ;
1432
1468
}
1433
1469
1434
1470
/**
1435
1471
* State machine for capturing IPv4 addresses that are not prefixed with a
1436
1472
* scheme (such as 'http://').
1437
1473
*/
1438
1474
interface IpV4UrlStateMachine extends AbstractUrlStateMachine {
1439
- readonly matchType : 'ipV4' ;
1475
+ readonly matchType : UrlStateMachineMatchType . IpV4 ;
1440
1476
octetsEncountered : number ; // if we encounter a number of octets other than 4, it's not an IPv4 address
1441
1477
}
1442
1478
1443
1479
/**
1444
1480
* State machine for capturing email addresses.
1445
1481
*/
1446
1482
interface EmailStateMachine extends AbstractStateMachine {
1447
- readonly type : 'email' ;
1483
+ readonly type : StateMachineType . Email ;
1448
1484
}
1449
1485
1450
1486
/**
1451
1487
* State machine for capturing hashtags.
1452
1488
*/
1453
1489
interface HashtagStateMachine extends AbstractStateMachine {
1454
- readonly type : 'hashtag' ;
1490
+ readonly type : StateMachineType . Hashtag ;
1455
1491
}
1456
1492
1457
1493
/**
1458
1494
* State machine for capturing hashtags.
1459
1495
*/
1460
1496
interface MentionStateMachine extends AbstractStateMachine {
1461
- readonly type : 'mention' ;
1497
+ readonly type : StateMachineType . Mention ;
1462
1498
}
1463
1499
1464
1500
/**
@@ -1469,43 +1505,43 @@ interface MentionStateMachine extends AbstractStateMachine {
1469
1505
* otherwise potentially think a phone number is part of a domain label.
1470
1506
*/
1471
1507
interface PhoneNumberStateMachine extends AbstractStateMachine {
1472
- readonly type : 'phone' ;
1508
+ readonly type : StateMachineType . Phone ;
1473
1509
}
1474
1510
1475
1511
function createSchemeUrlStateMachine ( startIdx : number , state : State ) : SchemeUrlStateMachine {
1476
1512
return {
1477
- type : 'url' ,
1513
+ type : StateMachineType . Url ,
1478
1514
startIdx,
1479
1515
state,
1480
1516
acceptStateReached : false ,
1481
- matchType : 'scheme' ,
1517
+ matchType : UrlStateMachineMatchType . Scheme ,
1482
1518
} ;
1483
1519
}
1484
1520
1485
1521
function createTldUrlStateMachine ( startIdx : number , state : State ) : TldUrlStateMachine {
1486
1522
return {
1487
- type : 'url' ,
1523
+ type : StateMachineType . Url ,
1488
1524
startIdx,
1489
1525
state,
1490
1526
acceptStateReached : false ,
1491
- matchType : 'tld' ,
1527
+ matchType : UrlStateMachineMatchType . Tld ,
1492
1528
} ;
1493
1529
}
1494
1530
1495
1531
function createIpV4UrlStateMachine ( startIdx : number , state : State ) : IpV4UrlStateMachine {
1496
1532
return {
1497
- type : 'url' ,
1533
+ type : StateMachineType . Url ,
1498
1534
startIdx,
1499
1535
state,
1500
1536
acceptStateReached : false ,
1501
- matchType : 'ipV4' ,
1537
+ matchType : UrlStateMachineMatchType . IpV4 ,
1502
1538
octetsEncountered : 1 , // starts at 1 because we create this machine when encountering the first octet
1503
1539
} ;
1504
1540
}
1505
1541
1506
1542
function createEmailStateMachine ( startIdx : number , state : State ) : EmailStateMachine {
1507
1543
return {
1508
- type : 'email' ,
1544
+ type : StateMachineType . Email ,
1509
1545
startIdx,
1510
1546
state,
1511
1547
acceptStateReached : false ,
@@ -1514,7 +1550,7 @@ function createEmailStateMachine(startIdx: number, state: State): EmailStateMach
1514
1550
1515
1551
function createHashtagStateMachine ( startIdx : number , state : State ) : HashtagStateMachine {
1516
1552
return {
1517
- type : 'hashtag' ,
1553
+ type : StateMachineType . Hashtag ,
1518
1554
startIdx,
1519
1555
state,
1520
1556
acceptStateReached : false ,
@@ -1523,7 +1559,7 @@ function createHashtagStateMachine(startIdx: number, state: State): HashtagState
1523
1559
1524
1560
function createMentionStateMachine ( startIdx : number , state : State ) : MentionStateMachine {
1525
1561
return {
1526
- type : 'mention' ,
1562
+ type : StateMachineType . Mention ,
1527
1563
startIdx,
1528
1564
state,
1529
1565
acceptStateReached : false ,
@@ -1532,13 +1568,16 @@ function createMentionStateMachine(startIdx: number, state: State): MentionState
1532
1568
1533
1569
function createPhoneNumberStateMachine ( startIdx : number , state : State ) : PhoneNumberStateMachine {
1534
1570
return {
1535
- type : 'phone' ,
1571
+ type : StateMachineType . Phone ,
1536
1572
startIdx,
1537
1573
state,
1538
1574
acceptStateReached : false ,
1539
1575
} ;
1540
1576
}
1541
1577
1542
1578
function isSchemeUrlStateMachine ( machine : StateMachine ) : machine is SchemeUrlStateMachine {
1543
- return machine . type === 'url' && machine . matchType === 'scheme' ;
1579
+ return (
1580
+ machine . type === StateMachineType . Url &&
1581
+ machine . matchType === UrlStateMachineMatchType . Scheme
1582
+ ) ;
1544
1583
}
0 commit comments