Skip to content

Commit 96d3bfc

Browse files
committed
fix char_class, typo in andOr
* brute force character classes * fix undefined op typo * test case to cover both
1 parent 9d7e053 commit 96d3bfc

File tree

3 files changed

+70
-21
lines changed

3 files changed

+70
-21
lines changed

src/regex-query.js

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -118,34 +118,19 @@ var analyze = function(re) {
118118
case "char_class":
119119
info.match = RegexQuery.ALL;
120120

121-
// Special case.
122-
if (re.value.length === 0) {
123-
return RegexInfo.noMatch();
124-
}
121+
var classExpansion = expandCharClass(re.value);
125122

126123
// Special case.
127-
if (re.value.length == 1) {
128-
info.exact = [ re.value ];
129-
break;
124+
if (classExpansion.length === 0) {
125+
return RegexInfo.noMatch();
130126
}
131127

132-
var n = 0;
133-
for (i = 0; i < re.value.length; i += 2) {
134-
n += re.value.charCodeAt(i + 1) - re.value.charCodeAt(i);
135-
}
136128
// If the class is too large, it's okay to overestimate.
137-
if (n > 100) {
129+
if (classExpansion.length > 100) {
138130
return RegexInfo.anyChar();
139131
}
140132

141-
info.exact = [];
142-
for (i = 0; i < re.value.length; i += 2) {
143-
var lo = re.value.charCodeAt(i);
144-
var hi = re.value.charCodeAt(i + 1);
145-
for (var rr = lo; rr <= hi; rr++) {
146-
info.exact.push(String.fromCharCode(rr));
147-
}
148-
}
133+
info.exact = classExpansion;
149134
break;
150135

151136
default:
@@ -155,6 +140,21 @@ var analyze = function(re) {
155140
return info;
156141
};
157142

143+
var expandCharClass = function(charClass) {
144+
var pattern = new RegExp("[" + charClass + "]");
145+
var expansion = [];
146+
for (var code = 0; code < 0xFFFF; code++) {
147+
var char = String.fromCharCode(code);
148+
if (expansion.indexOf(char) != -1) {
149+
continue;
150+
}
151+
if (pattern.test(char)) {
152+
expansion.push(char);
153+
}
154+
}
155+
return expansion;
156+
};
157+
158158
// fold is the usual higher-order function.
159159
var fold = function(f, sub, zero) {
160160
var info;
@@ -387,7 +387,7 @@ RegexQuery.prototype.andOr = function(other, op) {
387387
// Add in factored trigrams.
388388
var otherOp = op == "AND" ? "OR" : "AND";
389389
var t = new RegexQuery(otherOp, common);
390-
return t.andOr(s, t.Op);
390+
return t.andOr(s, t.op);
391391
}
392392

393393
// Otherwise just create the op.

test/regex-peg_test.js

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,25 @@ module.exports = {
136136
]
137137
});
138138
test.done();
139+
},
140+
141+
"/[aA]bc[dD]/": function(test) {
142+
var tree = peg.parse("[aA]bc[dD]");
143+
test.deepEqual(tree, {
144+
type: "concat",
145+
value: [
146+
{ type: "char_class", value: "aA" },
147+
{ type: "concat", value: [
148+
{ type: "literal", value: "b" },
149+
{ type: "concat",
150+
value: [
151+
{ type: "literal", value: "c" },
152+
{ type: "char_class", value: "dD" }
153+
] }
154+
] }
155+
]
156+
});
157+
test.done();
139158
}
140159
}
141160
};

test/regex-trigram_test.js

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,36 @@ module.exports = {
105105
sub: []
106106
});
107107
test.done();
108+
},
109+
110+
"/[aA]bc[dD]/": function(test) {
111+
var re = regex.parse("[aA]bc[dD]");
112+
var q = regex.query(re);
113+
test.deepEqual(q, {
114+
op: 'OR',
115+
trigram: [],
116+
sub:
117+
[
118+
{ op: 'AND',
119+
trigram: [ 'Abc' ],
120+
sub: [
121+
{ op: 'OR',
122+
trigram: [ 'bcD', 'bcd' ],
123+
sub: []
124+
}
125+
]
126+
},
127+
{ op: 'AND',
128+
trigram: [ 'abc', 'bcD' ],
129+
sub: []
130+
},
131+
{ op: 'AND',
132+
trigram: [ 'abc', 'bcd' ],
133+
sub: []
134+
}
135+
]
136+
});
137+
test.done();
108138
}
109139
}
110140
};

0 commit comments

Comments
 (0)