Skip to content

Commit 1f087e9

Browse files
manningStanford NLP
authored and
Stanford NLP
committed
Add and test proper quoting facility in parenthesized arch arguments
1 parent ff480f4 commit 1f087e9

File tree

2 files changed

+38
-10
lines changed

2 files changed

+38
-10
lines changed

src/edu/stanford/nlp/tagger/maxent/Extractor.java

+19-10
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import java.io.Serializable;
1010

11-
// import edu.stanford.nlp.util.logging.Redwood;
11+
import edu.stanford.nlp.util.StringUtils;
1212

1313

1414
/**
@@ -105,7 +105,7 @@ protected void setGlobalHolder(MaxentTagger tagger) {}
105105
* @param tag The possible tag that the feature will be generated for
106106
* @return Whether the feature extractor is applicable (true) or not (false)
107107
*/
108-
@SuppressWarnings({"MethodMayBeStatic", "UnusedDeclaration"})
108+
@SuppressWarnings({"UnusedDeclaration"})
109109
public boolean precondition(String tag) {
110110
return true;
111111
}
@@ -238,22 +238,30 @@ public String toString() {
238238

239239

240240
/** This is used for argument parsing in arch variable.
241-
* It can extract a comma separated argument.
242-
* Assumes the input format is "name(arg,arg,arg)".
241+
* It can extract from a comma separated values argument list.
242+
* Values can be quoted with double quotes (with a second double quote as double quote escape char)
243+
* like in a regular CSV file. It assumes the input format is "name(arg,arg,arg)".
243244
*
244245
* @param str arch variable component input
245-
* @param num Number of argument
246+
* @param num Number of argument. Numbers are 1-indexed (i.e., start from 1 not 0)
246247
* @return The parenthesized String, or null if none.
247248
*/
248249
static String getParenthesizedArg(String str, int num) {
249-
String[] args = str.split("\\s*[,()]\\s*");
250-
if (args.length <= num) {
251-
return null;
250+
int left = str.indexOf('(');
251+
int right = str.lastIndexOf(')');
252+
if (left < 0 || right <= left) {
253+
throw new IllegalArgumentException("getParenthesizedArg: Bad format String: " + str);
252254
}
255+
String argStr = str.substring(left + 1, right);
256+
String[] args = StringUtils.splitOnCharWithQuoting(argStr, ',', '"', '"');
253257
// log.info("getParenthesizedArg split " + str + " into " + args.length + " pieces; returning number " + num);
254258
// for (int i = 0; i < args.length; i++) {
255259
// log.info(" " + args[i]);
256260
// }
261+
num--;
262+
if (args.length <= num || num < 0) {
263+
return null;
264+
}
257265
return args[num];
258266
}
259267

@@ -266,11 +274,12 @@ static String getParenthesizedArg(String str, int num) {
266274
* @param num Number of argument
267275
* @return The int value of the arg or 0 if missing or empty
268276
*/
277+
@SuppressWarnings("ConstantConditions")
269278
static int getParenthesizedNum(String str, int num) {
270-
String[] args = str.split("\\s*[,()]\\s*");
279+
String arg = getParenthesizedArg(str, num);
271280
int ans = 0;
272281
try {
273-
ans = Integer.parseInt(args[num]);
282+
ans = Integer.parseInt(arg);
274283
} catch (NumberFormatException | ArrayIndexOutOfBoundsException nfe) {
275284
// just leave ans as 0
276285
}
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,23 @@
11
package edu.stanford.nlp.tagger.maxent;
22

3+
import org.junit.Assert;
4+
import org.junit.Test;
5+
36
public class ExtractorTest {
7+
8+
@Test
9+
public void testGetParenthesizedArg() {
10+
Assert.assertEquals("yellow", Extractor.getParenthesizedArg("foo(bar,gak,yellow)", 3));
11+
Assert.assertEquals("ye(,)ow", Extractor.getParenthesizedArg("foo(bar,gak,\"ye(,)ow\")", 3));
12+
Assert.assertEquals("ye()ow", Extractor.getParenthesizedArg("foo(bar,gak,ye()ow)", 3));
13+
Assert.assertEquals("'\"", Extractor.getParenthesizedArg("foo(\"'\"\"\")", 1));
14+
Assert.assertEquals("'\"", Extractor.getParenthesizedArg("foo(\"bar\",\"'\"\"\",gak)", 2));
15+
Assert.assertNull(Extractor.getParenthesizedArg("foo(bar,gak,yellow)", 0));
16+
Assert.assertNull(Extractor.getParenthesizedArg("foo(bar,gak,yellow)", 4));
17+
Assert.assertEquals(-15, Extractor.getParenthesizedNum("foo(bar,-15,yellow)", 2));
18+
Assert.assertEquals(0, Extractor.getParenthesizedNum("foo(bar,gak,yellow)", 0));
19+
Assert.assertEquals(0, Extractor.getParenthesizedNum("foo(bar,gak,yellow)", 3));
20+
Assert.assertEquals(0, Extractor.getParenthesizedNum("foo(bar,gak,yellow)", 4));
21+
}
22+
423
}

0 commit comments

Comments
 (0)