2929import java .nio .ByteBuffer ;
3030import java .nio .charset .StandardCharsets ;
3131import java .util .Arrays ;
32+ import java .util .BitSet ;
3233import java .util .Collections ;
3334import java .util .Map ;
3435import java .util .Objects ;
@@ -59,6 +60,79 @@ public final class PackageURL implements Serializable {
5960
6061 private static final char PERCENT_CHAR = '%' ;
6162
63+ private static final int NBITS = 256 ;
64+
65+ private static final BitSet DIGIT = new BitSet (NBITS );
66+ static {
67+ for (int i = '0' ; i <= '9' ; i ++) {
68+ DIGIT .set (i );
69+ }
70+ }
71+
72+ private static final BitSet LOWER = new BitSet (NBITS );
73+ static {
74+ for (int i = 'a' ; i <= 'z' ; i ++) {
75+ LOWER .set (i );
76+ }
77+ }
78+
79+ private static final BitSet UPPER = new BitSet (NBITS );
80+ static {
81+ for (int i = 'A' ; i <= 'Z' ; i ++) {
82+ UPPER .set (i );
83+ }
84+ }
85+
86+ private static final BitSet ALPHA = new BitSet (NBITS );
87+ static {
88+ ALPHA .or (LOWER );
89+ ALPHA .or (UPPER );
90+ }
91+
92+ private static final BitSet ALPHA_DIGIT = new BitSet (NBITS );
93+ static {
94+ ALPHA_DIGIT .or (ALPHA );
95+ ALPHA_DIGIT .or (DIGIT );
96+ }
97+
98+ private static final BitSet UNRESERVED = new BitSet (NBITS );
99+ static {
100+ UNRESERVED .or (ALPHA_DIGIT );
101+ UNRESERVED .set ('-' );
102+ UNRESERVED .set ('.' );
103+ UNRESERVED .set ('_' );
104+ UNRESERVED .set ('~' );
105+ }
106+ private static final BitSet SUB_DELIMS = new BitSet (NBITS );
107+ static {
108+ SUB_DELIMS .set ('!' );
109+ SUB_DELIMS .set ('$' );
110+ SUB_DELIMS .set ('&' );
111+ SUB_DELIMS .set ('\'' );
112+ SUB_DELIMS .set ('(' );
113+ SUB_DELIMS .set (')' );
114+ SUB_DELIMS .set ('*' );
115+ SUB_DELIMS .set ('+' );
116+ SUB_DELIMS .set (',' );
117+ SUB_DELIMS .set (';' );
118+ SUB_DELIMS .set ('=' );
119+
120+ }
121+ private static final BitSet PCHAR = new BitSet (NBITS );
122+ static {
123+ PCHAR .or (UNRESERVED );
124+ PCHAR .or (SUB_DELIMS );
125+ PCHAR .set (':' );
126+ // PCHAR.set('@'); Always encode '@' in the path due to version
127+ }
128+ private static final BitSet QUERY = new BitSet (NBITS );
129+ static {
130+ QUERY .or (PCHAR );
131+ QUERY .set ('/' );
132+ QUERY .set ('?' );
133+ }
134+ private static final BitSet FRAGMENT = QUERY ;
135+
62136 /**
63137 * Constructs a new PackageURL object by parsing the specified string.
64138 *
@@ -472,37 +546,42 @@ private String canonicalize(boolean coordinatesOnly) {
472546 final StringBuilder purl = new StringBuilder ();
473547 purl .append (SCHEME_PART ).append (type ).append ("/" );
474548 if (namespace != null ) {
475- purl .append (encodePath (namespace ));
549+ purl .append (encodePath (namespace , PCHAR ));
476550 purl .append ("/" );
477551 }
478- purl .append (percentEncode (name ));
552+ purl .append (percentEncode (name , PCHAR ));
479553 if (version != null ) {
480- purl .append ("@" ).append (percentEncode (version ));
554+ purl .append ("@" ).append (percentEncode (version , PCHAR ));
481555 }
482556 if (! coordinatesOnly ) {
483557 if (qualifiers != null ) {
484558 purl .append ("?" );
485559 qualifiers .forEach ((key , value ) -> {
486560 purl .append (toLowerCase (key ));
487561 purl .append ("=" );
488- purl .append (percentEncode (value ));
562+ purl .append (percentEncode (value , QUERY ));
489563 purl .append ("&" );
490564 });
491565 purl .setLength (purl .length () - 1 );
492566 }
493567 if (subpath != null ) {
494- purl .append ("#" ).append (encodePath (subpath ));
568+ purl .append ("#" ).append (encodePath (subpath , FRAGMENT ));
495569 }
496570 }
497571 return purl .toString ();
498572 }
499573
500- private static boolean isUnreserved (int c ) {
501- return (isValidCharForKey (c ) || c == '~' );
574+ private static boolean isUnreserved (int c , BitSet safe ) {
575+ if (c < 0 || c >= NBITS ) {
576+ return false ;
577+ }
578+
579+ return safe .get (c );
580+
502581 }
503582
504- private static boolean shouldEncode (int c ) {
505- return !isUnreserved (c );
583+ private static boolean shouldEncode (int c , BitSet safe ) {
584+ return !isUnreserved (c , safe );
506585 }
507586
508587 private static boolean isAlpha (int c ) {
@@ -564,8 +643,8 @@ private static int indexOfPercentChar(final byte[] bytes, final int start) {
564643 return IntStream .range (start , bytes .length ).filter (i -> isPercent (bytes [i ])).findFirst ().orElse (-1 );
565644 }
566645
567- private static int indexOfUnsafeChar (final byte [] bytes , final int start ) {
568- return IntStream .range (start , bytes .length ).filter (i -> shouldEncode (bytes [i ])).findFirst ().orElse (-1 );
646+ private static int indexOfUnsafeChar (final byte [] bytes , final int start , BitSet safe ) {
647+ return IntStream .range (start , bytes .length ).filter (i -> shouldEncode (bytes [i ], safe )).findFirst ().orElse (-1 );
569648 }
570649
571650 private static byte percentDecode (final byte [] bytes , final int start ) {
@@ -649,15 +728,15 @@ private static byte[] percentEncode(byte b) {
649728 return new byte [] {(byte ) PERCENT_CHAR , b1 , b2 };
650729 }
651730
652- public static String percentEncode (final String source ) {
731+ public static String percentEncode (final String source , BitSet safe ) {
653732 if (source .isEmpty ()) {
654733 return source ;
655734 }
656735
657736 byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
658737
659738 int off = 0 ;
660- int idx = indexOfUnsafeChar (bytes , off );
739+ int idx = indexOfUnsafeChar (bytes , off , safe );
661740
662741 if (idx == -1 ) {
663742 return source ;
@@ -674,7 +753,7 @@ public static String percentEncode(final String source) {
674753 }
675754
676755 buffer .put (percentEncode (bytes [off ++]));
677- idx = indexOfUnsafeChar (bytes , off );
756+ idx = indexOfUnsafeChar (bytes , off , safe );
678757
679758 if (idx == -1 ) {
680759 int rem = bytes .length - off ;
@@ -835,8 +914,8 @@ private String[] parsePath(final String path, final boolean isSubpath) {
835914 .toArray (String []::new );
836915 }
837916
838- private String encodePath (final String path ) {
839- return Arrays .stream (path .split ("/" )).map (PackageURL :: percentEncode ).collect (Collectors .joining ("/" ));
917+ private String encodePath (final String path , BitSet safe ) {
918+ return Arrays .stream (path .split ("/" )).map (source -> percentEncode ( source , safe ) ).collect (Collectors .joining ("/" ));
840919 }
841920
842921 /**
0 commit comments