Skip to content

Commit 81dfd23

Browse files
committed
fix: don't encode ':' or '/' as part of the canonical representation
This makes the Java canonical representation match the majority of other implementations. Fixes package-url#122 Fixes package-url#92
1 parent 8925c06 commit 81dfd23

File tree

2 files changed

+106
-24
lines changed

2 files changed

+106
-24
lines changed

src/main/java/com/github/packageurl/PackageURL.java

+102-20
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.nio.ByteBuffer;
3030
import java.nio.charset.StandardCharsets;
3131
import java.util.Arrays;
32+
import java.util.BitSet;
3233
import java.util.Collections;
3334
import java.util.Map;
3435
import java.util.Objects;
@@ -59,6 +60,79 @@ public final class PackageURL implements Serializable {
5960

6061
private static final char PERCENT_CHAR = '%';
6162

63+
private static final int NBITS = 128;
64+
65+
private static final BitSet DIGIT = new BitSet(NBITS);
66+
static {
67+
for (int i = '0'; i <= '9'; i++) {
68+
DIGIT.set(i);
69+
}
70+
}
71+
72+
private static final BitSet LOWER = new BitSet(NBITS);
73+
static {
74+
for (int i = 'a'; i <= 'z'; i++) {
75+
LOWER.set(i);
76+
}
77+
}
78+
79+
private static final BitSet UPPER = new BitSet(NBITS);
80+
static {
81+
for (int i = 'A'; i <= 'Z'; i++) {
82+
UPPER.set(i);
83+
}
84+
}
85+
86+
private static final BitSet ALPHA = new BitSet(NBITS);
87+
static {
88+
ALPHA.or(LOWER);
89+
ALPHA.or(UPPER);
90+
}
91+
92+
private static final BitSet ALPHA_DIGIT = new BitSet(NBITS);
93+
static {
94+
ALPHA_DIGIT.or(ALPHA);
95+
ALPHA_DIGIT.or(DIGIT);
96+
}
97+
98+
private static final BitSet UNRESERVED = new BitSet(NBITS);
99+
static {
100+
UNRESERVED.or(ALPHA_DIGIT);
101+
UNRESERVED.set('-');
102+
UNRESERVED.set('.');
103+
UNRESERVED.set('_');
104+
UNRESERVED.set('~');
105+
}
106+
private static final BitSet SUB_DELIMS = new BitSet(NBITS);
107+
static {
108+
SUB_DELIMS.set('!');
109+
SUB_DELIMS.set('$');
110+
SUB_DELIMS.set('&');
111+
SUB_DELIMS.set('\'');
112+
SUB_DELIMS.set('(');
113+
SUB_DELIMS.set(')');
114+
SUB_DELIMS.set('*');
115+
SUB_DELIMS.set('+');
116+
SUB_DELIMS.set(',');
117+
SUB_DELIMS.set(';');
118+
SUB_DELIMS.set('=');
119+
120+
}
121+
private static final BitSet PCHAR = new BitSet(NBITS);
122+
static {
123+
PCHAR.or(UNRESERVED);
124+
PCHAR.or(SUB_DELIMS);
125+
PCHAR.set(':');
126+
// PCHAR.set('@'); Always encode '@' in the path due to version
127+
}
128+
private static final BitSet QUERY = new BitSet(NBITS);
129+
static {
130+
QUERY.or(PCHAR);
131+
QUERY.set('/');
132+
QUERY.set('?');
133+
}
134+
private static final BitSet FRAGMENT = QUERY;
135+
62136
/**
63137
* Constructs a new PackageURL object by parsing the specified string.
64138
*
@@ -472,37 +546,42 @@ private String canonicalize(boolean coordinatesOnly) {
472546
final StringBuilder purl = new StringBuilder();
473547
purl.append(SCHEME_PART).append(type).append("/");
474548
if (namespace != null) {
475-
purl.append(encodePath(namespace));
549+
purl.append(encodePath(namespace, PCHAR));
476550
purl.append("/");
477551
}
478-
purl.append(percentEncode(name));
552+
purl.append(percentEncode(name, PCHAR));
479553
if (version != null) {
480-
purl.append("@").append(percentEncode(version));
554+
purl.append("@").append(percentEncode(version, PCHAR));
481555
}
482556
if (! coordinatesOnly) {
483557
if (qualifiers != null) {
484558
purl.append("?");
485559
qualifiers.forEach((key, value) -> {
486560
purl.append(toLowerCase(key));
487561
purl.append("=");
488-
purl.append(percentEncode(value));
562+
purl.append(percentEncode(value, QUERY));
489563
purl.append("&");
490564
});
491565
purl.setLength(purl.length() - 1);
492566
}
493567
if (subpath != null) {
494-
purl.append("#").append(encodePath(subpath));
568+
purl.append("#").append(encodePath(subpath, FRAGMENT));
495569
}
496570
}
497571
return purl.toString();
498572
}
499573

500-
private static boolean isUnreserved(int c) {
501-
return (isValidCharForKey(c) || c == '~');
574+
private static boolean isUnreserved(int c, BitSet safe) {
575+
if (c < 0 || c >= NBITS) {
576+
return false;
577+
}
578+
579+
return safe.get(c);
580+
502581
}
503582

504-
private static boolean shouldEncode(int c) {
505-
return !isUnreserved(c);
583+
private static boolean shouldEncode(int c, BitSet safe) {
584+
return !isUnreserved(c, safe);
506585
}
507586

508587
private static boolean isAlpha(int c) {
@@ -564,11 +643,11 @@ private static int indexOfPercentChar(final byte[] bytes, final int start) {
564643
return IntStream.range(start, bytes.length).filter(i -> isPercent(bytes[i])).findFirst().orElse(-1);
565644
}
566645

567-
private static int indexOfUnsafeChar(final byte[] bytes, final int start) {
568-
return IntStream.range(start, bytes.length).filter(i -> shouldEncode(bytes[i])).findFirst().orElse(-1);
646+
private static int indexOfUnsafeChar(final byte[] bytes, final int start, BitSet safe) {
647+
return IntStream.range(start, bytes.length).filter(i -> shouldEncode(bytes[i], safe)).findFirst().orElse(-1);
569648
}
570649

571-
private static byte percentDecode(final byte[] bytes, final int start) {
650+
static byte percentDecode(final byte[] bytes, final int start) {
572651
if (start + 2 >= bytes.length) {
573652
throw new ValidationException("Incomplete percent encoding at offset " + start + " with value '" + new String(bytes, start, bytes.length - start, StandardCharsets.UTF_8) + "'");
574653
}
@@ -598,15 +677,15 @@ public static String percentDecode(final String source) {
598677
}
599678

600679
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
601-
602-
int off = 0;
603-
int idx = indexOfPercentChar(bytes, off);
680+
int idx = indexOfPercentChar(bytes, 0);
604681

605682
if (idx == -1) {
606683
return source;
607684
}
608685

686+
int off = idx;
609687
ByteBuffer buffer = ByteBuffer.wrap(bytes);
688+
buffer.position(off);
610689

611690
while (true) {
612691
int len = idx - off;
@@ -650,14 +729,18 @@ private static byte[] percentEncode(byte b) {
650729
}
651730

652731
public static String percentEncode(final String source) {
732+
return percentEncode(source, new BitSet(0));
733+
}
734+
735+
private static String percentEncode(final String source, final BitSet safe) {
653736
if (source.isEmpty()) {
654737
return source;
655738
}
656739

657740
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
658741

659742
int off = 0;
660-
int idx = indexOfUnsafeChar(bytes, off);
743+
int idx = indexOfUnsafeChar(bytes, off, safe);
661744

662745
if (idx == -1) {
663746
return source;
@@ -674,7 +757,7 @@ public static String percentEncode(final String source) {
674757
}
675758

676759
buffer.put(percentEncode(bytes[off++]));
677-
idx = indexOfUnsafeChar(bytes, off);
760+
idx = indexOfUnsafeChar(bytes, off, safe);
678761

679762
if (idx == -1) {
680763
int rem = bytes.length - off;
@@ -733,7 +816,6 @@ private void parse(final String purl) throws MalformedPackageURLException {
733816
final String rawQuery = uri.getRawQuery();
734817
if (rawQuery != null && !rawQuery.isEmpty()) {
735818
this.qualifiers = parseQualifiers(rawQuery);
736-
737819
}
738820
// this is the rest of the purl that needs to be parsed
739821
String remainder = uri.getRawPath();
@@ -835,8 +917,8 @@ private String[] parsePath(final String path, final boolean isSubpath) {
835917
.toArray(String[]::new);
836918
}
837919

838-
private String encodePath(final String path) {
839-
return Arrays.stream(path.split("/")).map(PackageURL::percentEncode).collect(Collectors.joining("/"));
920+
private String encodePath(final String path, BitSet safe) {
921+
return Arrays.stream(path.split("/")).map(source -> percentEncode(source, safe)).collect(Collectors.joining("/"));
840922
}
841923

842924
/**

src/test/resources/test-suite-data.json

+4-4
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
{
8787
"description": "docker uses qualifiers and hash image id as versions",
8888
"purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io",
89-
"canonical_purl": "pkg:docker/customer/dockerimage@sha256%3A244fd47e07d1004f0aed9c?repository_url=gcr.io",
89+
"canonical_purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io",
9090
"type": "docker",
9191
"namespace": "customer",
9292
"name": "dockerimage",
@@ -110,7 +110,7 @@
110110
{
111111
"description": "maven often uses qualifiers",
112112
"purl": "pkg:Maven/org.apache.xmlgraphics/[email protected]?repositorY_url=repo.spring.io/release&classifier=sources",
113-
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?classifier=sources&repository_url=repo.spring.io%2Frelease",
113+
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?classifier=sources&repository_url=repo.spring.io/release",
114114
"type": "maven",
115115
"namespace": "org.apache.xmlgraphics",
116116
"name": "batik-anim",
@@ -122,7 +122,7 @@
122122
{
123123
"description": "maven pom reference",
124124
"purl": "pkg:Maven/org.apache.xmlgraphics/[email protected]?repositorY_url=repo.spring.io/release&extension=pom",
125-
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?extension=pom&repository_url=repo.spring.io%2Frelease",
125+
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?extension=pom&repository_url=repo.spring.io/release",
126126
"type": "maven",
127127
"namespace": "org.apache.xmlgraphics",
128128
"name": "batik-anim",
@@ -314,7 +314,7 @@
314314
{
315315
"description": "valid debian purl containing a plus in the name and version",
316316
"purl": "pkg:deb/debian/[email protected]+6",
317-
"canonical_purl": "pkg:deb/debian/g%2B%2B[email protected]%2B6",
317+
"canonical_purl": "pkg:deb/debian/g++[email protected]+6",
318318
"type": "deb",
319319
"namespace": "debian",
320320
"name": "g++-10",

0 commit comments

Comments
 (0)