2929import java .nio .ByteBuffer ;
3030import java .nio .charset .StandardCharsets ;
3131import java .util .Arrays ;
32+ import java .util .BitSet ;
3233import java .util .Collections ;
3334import java .util .Map ;
3435import java .util .Objects ;
@@ -60,6 +61,93 @@ public final class PackageURL implements Serializable {
6061
6162 private static final char PERCENT_CHAR = '%' ;
6263
64+ private static final int NBITS = 128 ;
65+
66+ private static final BitSet DIGIT = new BitSet (NBITS );
67+
68+ static {
69+ for (int i = '0' ; i <= '9' ; i ++) {
70+ DIGIT .set (i );
71+ }
72+ }
73+
74+ private static final BitSet LOWER = new BitSet (NBITS );
75+
76+ static {
77+ for (int i = 'a' ; i <= 'z' ; i ++) {
78+ LOWER .set (i );
79+ }
80+ }
81+
82+ private static final BitSet UPPER = new BitSet (NBITS );
83+
84+ static {
85+ for (int i = 'A' ; i <= 'Z' ; i ++) {
86+ UPPER .set (i );
87+ }
88+ }
89+
90+ private static final BitSet ALPHA = new BitSet (NBITS );
91+
92+ static {
93+ ALPHA .or (LOWER );
94+ ALPHA .or (UPPER );
95+ }
96+
97+ private static final BitSet ALPHA_DIGIT = new BitSet (NBITS );
98+
99+ static {
100+ ALPHA_DIGIT .or (ALPHA );
101+ ALPHA_DIGIT .or (DIGIT );
102+ }
103+
104+ private static final BitSet UNRESERVED = new BitSet (NBITS );
105+
106+ static {
107+ UNRESERVED .or (ALPHA_DIGIT );
108+ UNRESERVED .set ('-' );
109+ UNRESERVED .set ('.' );
110+ UNRESERVED .set ('_' );
111+ UNRESERVED .set ('~' );
112+ }
113+
114+ private static final BitSet SUB_DELIMS = new BitSet (NBITS );
115+
116+ static {
117+ SUB_DELIMS .set ('!' );
118+ SUB_DELIMS .set ('$' );
119+ SUB_DELIMS .set ('&' );
120+ SUB_DELIMS .set ('\'' );
121+ SUB_DELIMS .set ('(' );
122+ SUB_DELIMS .set (')' );
123+ SUB_DELIMS .set ('*' );
124+ SUB_DELIMS .set ('+' );
125+ SUB_DELIMS .set (',' );
126+ SUB_DELIMS .set (';' );
127+ SUB_DELIMS .set ('=' );
128+ }
129+
130+ private static final BitSet PCHAR = new BitSet (NBITS );
131+
132+ static {
133+ PCHAR .or (UNRESERVED );
134+ PCHAR .or (SUB_DELIMS );
135+ PCHAR .set (':' );
136+ // PCHAR.set('@'); Always encode '@' in the path due to version
137+ }
138+
139+ private static final BitSet QUERY = new BitSet (NBITS );
140+
141+ static {
142+ QUERY .or (PCHAR );
143+ QUERY .set ('/' );
144+ QUERY .set ('?' );
145+ QUERY .clear ('&' );
146+ QUERY .clear ('=' ); // XXX
147+ }
148+
149+ private static final BitSet FRAGMENT = QUERY ;
150+
63151 /**
64152 * Constructs a new PackageURL object by parsing the specified string.
65153 *
@@ -498,12 +586,12 @@ private String canonicalize(boolean coordinatesOnly) {
498586 final StringBuilder purl = new StringBuilder ();
499587 purl .append (SCHEME_PART ).append (type ).append ('/' );
500588 if (namespace != null ) {
501- purl .append (encodePath (namespace ));
589+ purl .append (encodePath (namespace , PCHAR ));
502590 purl .append ('/' );
503591 }
504- purl .append (percentEncode (name ));
592+ purl .append (percentEncode (name , PCHAR ));
505593 if (version != null ) {
506- purl .append ('@' ).append (percentEncode (version ));
594+ purl .append ('@' ).append (percentEncode (version , PCHAR ));
507595 }
508596
509597 if (!coordinatesOnly ) {
@@ -517,23 +605,27 @@ private String canonicalize(boolean coordinatesOnly) {
517605 }
518606 purl .append (entry .getKey ());
519607 purl .append ('=' );
520- purl .append (percentEncode (entry .getValue ()));
608+ purl .append (percentEncode (entry .getValue (), QUERY ));
521609 separator = true ;
522610 }
523611 }
524612 if (subpath != null ) {
525- purl .append ('#' ).append (encodePath (subpath ));
613+ purl .append ('#' ).append (encodePath (subpath , FRAGMENT ));
526614 }
527615 }
528616 return purl .toString ();
529617 }
530618
531- private static boolean isUnreserved (int c ) {
532- return (isValidCharForKey (c ) || c == '~' );
619+ private static boolean isUnreserved (int c , BitSet safe ) {
620+ if (c < 0 || c >= NBITS ) {
621+ return false ;
622+ }
623+
624+ return safe .get (c );
533625 }
534626
535- private static boolean shouldEncode (int c ) {
536- return !isUnreserved (c );
627+ private static boolean shouldEncode (int c , BitSet safe ) {
628+ return !isUnreserved (c , safe );
537629 }
538630
539631 private static boolean isAlpha (int c ) {
@@ -598,14 +690,14 @@ private static int indexOfPercentChar(final byte[] bytes, final int start) {
598690 .orElse (-1 );
599691 }
600692
601- private static int indexOfUnsafeChar (final byte [] bytes , final int start ) {
693+ private static int indexOfUnsafeChar (final byte [] bytes , final int start , BitSet safe ) {
602694 return IntStream .range (start , bytes .length )
603- .filter (i -> shouldEncode (bytes [i ]))
695+ .filter (i -> shouldEncode (bytes [i ], safe ))
604696 .findFirst ()
605697 .orElse (-1 );
606698 }
607699
608- private static byte percentDecode (final byte [] bytes , final int start ) {
700+ static byte percentDecode (final byte [] bytes , final int start ) {
609701 if (start + 2 >= bytes .length ) {
610702 throw new ValidationException ("Incomplete percent encoding at offset " + start + " with value '"
611703 + new String (bytes , start , bytes .length - start , StandardCharsets .UTF_8 ) + "'" );
@@ -638,15 +730,15 @@ public static String percentDecode(final String source) {
638730 }
639731
640732 byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
641-
642- int off = 0 ;
643- int idx = indexOfPercentChar (bytes , off );
733+ int idx = indexOfPercentChar (bytes , 0 );
644734
645735 if (idx == -1 ) {
646736 return source ;
647737 }
648738
739+ int off = idx ;
649740 ByteBuffer buffer = ByteBuffer .wrap (bytes );
741+ buffer .position (off );
650742
651743 while (true ) {
652744 int len = idx - off ;
@@ -690,14 +782,18 @@ private static byte[] percentEncode(byte b) {
690782 }
691783
692784 public static String percentEncode (final String source ) {
785+ return percentEncode (source , UNRESERVED );
786+ }
787+
788+ private static String percentEncode (final String source , final BitSet safe ) {
693789 if (source .isEmpty ()) {
694790 return source ;
695791 }
696792
697793 byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
698794
699795 int off = 0 ;
700- int idx = indexOfUnsafeChar (bytes , off );
796+ int idx = indexOfUnsafeChar (bytes , off , safe );
701797
702798 if (idx == -1 ) {
703799 return source ;
@@ -714,7 +810,7 @@ public static String percentEncode(final String source) {
714810 }
715811
716812 buffer .put (percentEncode (bytes [off ++]));
717- idx = indexOfUnsafeChar (bytes , off );
813+ idx = indexOfUnsafeChar (bytes , off , safe );
718814
719815 if (idx == -1 ) {
720816 int rem = bytes .length - off ;
@@ -883,8 +979,10 @@ private String[] parsePath(final String path, final boolean isSubpath) {
883979 .toArray (String []::new );
884980 }
885981
886- private String encodePath (final String path ) {
887- return Arrays .stream (path .split ("/" )).map (PackageURL ::percentEncode ).collect (Collectors .joining ("/" ));
982+ private String encodePath (final String path , BitSet safe ) {
983+ return Arrays .stream (path .split ("/" ))
984+ .map (source -> percentEncode (source , safe ))
985+ .collect (Collectors .joining ("/" ));
888986 }
889987
890988 /**
0 commit comments