Skip to content

Commit 52dd44f

Browse files
committed
Use list of characters that need encoding
1 parent 8ab171f commit 52dd44f

File tree

1 file changed

+23
-26
lines changed

1 file changed

+23
-26
lines changed

src/main/java/com/github/packageurl/PackageURL.java

Lines changed: 23 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@ public final class PackageURL implements Serializable {
5555
private static final long serialVersionUID = 3243226021636427586L;
5656
private static final Pattern PATH_SPLITTER = Pattern.compile("/");
5757

58+
private static final String COMMON_CHARS = "\"<>";
59+
60+
private static final String FRAGMENT_CHARS = COMMON_CHARS + "`";
61+
62+
private static final String QUERY_CHARS = COMMON_CHARS + "#";
63+
64+
private static final String PATH_CHARS = QUERY_CHARS + "?`{}:";
65+
5866
/**
5967
* Constructs a new PackageURL object by parsing the specified string.
6068
*
@@ -403,72 +411,61 @@ private String canonicalize(boolean coordinatesOnly) {
403411
}
404412
purl.append("/");
405413
if (namespace != null) {
406-
purl.append(encodePath(namespace, ":"));
414+
purl.append(encodePath(namespace, PATH_CHARS));
407415
purl.append("/");
408416
}
409417
if (name != null) {
410-
purl.append(percentEncode(name, ":"));
418+
purl.append(percentEncode(name, PATH_CHARS));
411419
}
412420
if (version != null) {
413-
purl.append("@").append(percentEncode(version));
421+
purl.append("@").append(percentEncode(version, PATH_CHARS));
414422
}
415423
if (! coordinatesOnly) {
416424
if (qualifiers != null && qualifiers.size() > 0) {
417425
purl.append("?");
418426
qualifiers.entrySet().stream().forEachOrdered((entry) -> {
419427
purl.append(entry.getKey().toLowerCase());
420428
purl.append("=");
421-
purl.append(percentEncode(entry.getValue(), ":/"));
429+
purl.append(percentEncode(entry.getValue(), QUERY_CHARS));
422430
purl.append("&");
423431
});
424432
purl.setLength(purl.length() - 1);
425433
}
426434
if (subpath != null) {
427-
purl.append("#").append(encodePath(subpath, "?#+&="));
435+
purl.append("#").append(encodePath(subpath, FRAGMENT_CHARS));
428436
}
429437
}
430438
return purl.toString();
431439
}
432440

433-
private String percentEncode(String input, Charset charset, String charsToExclude) {
434-
return uriEncode(input, charset, charsToExclude);
441+
private String percentEncode(String input, Charset charset, String additionalChars) {
442+
return uriEncode(input, charset, additionalChars);
435443
}
436444

437-
private String percentEncode(String input, String charsToExclude) {
438-
return percentEncode(input, StandardCharsets.UTF_8, charsToExclude);
439-
}
440-
441-
/**
442-
* Encodes the input in conformance with RFC 3986.
443-
*
444-
* @param input the String to encode
445-
* @return an encoded String
446-
*/
447-
private String percentEncode(final String input) {
448-
return uriEncode(input, StandardCharsets.UTF_8, null);
445+
private String percentEncode(String input, String additionalChars) {
446+
return percentEncode(input, StandardCharsets.UTF_8, additionalChars);
449447
}
450448

451-
private static String uriEncode(String source, Charset charset, String chars) {
449+
private static String uriEncode(String source, Charset charset, String additionalChars) {
452450
if (source == null || source.length() == 0) {
453451
return source;
454452
}
455453

456454
StringBuilder builder = new StringBuilder();
457455
for (byte b : source.getBytes(charset)) {
458-
if (isUnreserved(b) || chars != null && chars.indexOf(b) != -1) {
459-
builder.append((char) b);
460-
}
461-
else {
456+
if (needsEncode(b) || (additionalChars != null && additionalChars.indexOf(b) != -1)) {
462457
// Substitution: A '%' followed by the hexadecimal representation of the ASCII value of the replaced character
463458
builder.append('%');
464459
builder.append(Integer.toHexString(b).toUpperCase());
460+
} else {
461+
builder.append((char) b);
465462
}
466463
}
467464
return builder.toString();
468465
}
469466

470-
private static boolean isUnreserved(int c) {
471-
return (isAlpha(c) || isDigit(c) || '-' == c || '.' == c || '_' == c || '~' == c);
467+
private static boolean needsEncode(int c) {
468+
return ((c >= 0x0000 && c <= 0x0020) || c >= 0x007F || c == '@' || c == '#' || c == '?' || c == '+' || c == '%');
472469
}
473470

474471
private static boolean isAlpha(int c) {

0 commit comments

Comments
 (0)