@@ -530,8 +530,17 @@ static byte[] decode(String hex) {
530
530
531
531
/**
532
532
* A regex pattern of a line in a CSV file.
533
+ * <ul>
534
+ * <li>{@code (?<=^|,)}: assert that the match is preceded by the start of the line or a comma</li>
535
+ * <li>{@code (?:([^",]*)|"((?:[^"]+|"")*)")}: match either a quoted or unquoted value</li>
536
+ * <ul>
537
+ * <li>- {@code ([^",]*)}: match an unquoted value</li>
538
+ * <li>- {@code "((?:[^"]+|"")*)"}: match a quoted value</li>
539
+ * </ul>
540
+ * <li>{@code (?:,|$)}: match either a comma or the end of the line</li>
541
+ * </ul>
533
542
*/
534
- private static final Pattern CSV_LINE_PATTERN = Pattern .compile ("(?<=^|,)\\ s*([^\" ,]*|\" ([^\" ]|\" \" )*\" )\\ s*(,|$)" );
543
+ private static final Pattern CSV_LINE_PATTERN = Pattern .compile ("(?<=^|,)\\ s*(?:( [^\" ,]*) |\" ((?: [^\" ]+ |\" \" )*) \" )\\ s*(?: ,|$)" );
535
544
536
545
/**
537
546
* Splits the given CSV line into its values.
@@ -550,14 +559,15 @@ static String[] splitCSV(String line) {
550
559
551
560
while (matcher .find ()) {
552
561
if (lastEnd != matcher .start ())
553
- return null ; // other stuff inbetween finds
554
-
555
- String value = matcher .group (1 );
556
- if (value .startsWith ("\" " ))
557
- // Unescape value
558
- result .add (value .substring (1 , value .length () - 1 ).replace ("\" \" " , "\" " ));
559
- else
560
- result .add (value .trim ());
562
+ return null ; // other stuff in between finds
563
+
564
+ if (matcher .group (1 ) != null ) {
565
+ // unquoted, leave as is
566
+ result .add (matcher .group (1 ).trim ());
567
+ } else {
568
+ // quoted, remove quotes
569
+ result .add (matcher .group (2 ).replace ("\" \" " , "\" " ));
570
+ }
561
571
562
572
lastEnd = matcher .end ();
563
573
}
0 commit comments