Skip to content

Commit 447ba2a

Browse files
committed
fix: don't encode ':' or '/' as part of the canonical representation
This makes the Java canonical representation match the majority of other implementations. Fixes package-url#122 Fixes package-url#92
1 parent 8925c06 commit 447ba2a

File tree

2 files changed

+99
-20
lines changed

2 files changed

+99
-20
lines changed

src/main/java/com/github/packageurl/PackageURL.java

+95-16
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.nio.ByteBuffer;
3030
import java.nio.charset.StandardCharsets;
3131
import java.util.Arrays;
32+
import java.util.BitSet;
3233
import java.util.Collections;
3334
import java.util.Map;
3435
import java.util.Objects;
@@ -59,6 +60,79 @@ public final class PackageURL implements Serializable {
5960

6061
private static final char PERCENT_CHAR = '%';
6162

63+
private static final int NBITS = 128;
64+
65+
private static final BitSet DIGIT = new BitSet(NBITS);
66+
static {
67+
for (int i = '0'; i <= '9'; i++) {
68+
DIGIT.set(i);
69+
}
70+
}
71+
72+
private static final BitSet LOWER = new BitSet(NBITS);
73+
static {
74+
for (int i = 'a'; i <= 'z'; i++) {
75+
LOWER.set(i);
76+
}
77+
}
78+
79+
private static final BitSet UPPER = new BitSet(NBITS);
80+
static {
81+
for (int i = 'A'; i <= 'Z'; i++) {
82+
UPPER.set(i);
83+
}
84+
}
85+
86+
private static final BitSet ALPHA = new BitSet(NBITS);
87+
static {
88+
ALPHA.or(LOWER);
89+
ALPHA.or(UPPER);
90+
}
91+
92+
private static final BitSet ALPHA_DIGIT = new BitSet(NBITS);
93+
static {
94+
ALPHA_DIGIT.or(ALPHA);
95+
ALPHA_DIGIT.or(DIGIT);
96+
}
97+
98+
private static final BitSet UNRESERVED = new BitSet(NBITS);
99+
static {
100+
UNRESERVED.or(ALPHA_DIGIT);
101+
UNRESERVED.set('-');
102+
UNRESERVED.set('.');
103+
UNRESERVED.set('_');
104+
UNRESERVED.set('~');
105+
}
106+
private static final BitSet SUB_DELIMS = new BitSet(NBITS);
107+
static {
108+
SUB_DELIMS.set('!');
109+
SUB_DELIMS.set('$');
110+
SUB_DELIMS.set('&');
111+
SUB_DELIMS.set('\'');
112+
SUB_DELIMS.set('(');
113+
SUB_DELIMS.set(')');
114+
SUB_DELIMS.set('*');
115+
SUB_DELIMS.set('+');
116+
SUB_DELIMS.set(',');
117+
SUB_DELIMS.set(';');
118+
SUB_DELIMS.set('=');
119+
120+
}
121+
private static final BitSet PCHAR = new BitSet(NBITS);
122+
static {
123+
PCHAR.or(UNRESERVED);
124+
PCHAR.or(SUB_DELIMS);
125+
PCHAR.set(':');
126+
// PCHAR.set('@'); Always encode '@' in the path due to version
127+
}
128+
private static final BitSet QUERY = new BitSet(NBITS);
129+
static {
130+
QUERY.or(PCHAR);
131+
QUERY.set('/');
132+
QUERY.set('?');
133+
}
134+
private static final BitSet FRAGMENT = QUERY;
135+
62136
/**
63137
* Constructs a new PackageURL object by parsing the specified string.
64138
*
@@ -472,37 +546,42 @@ private String canonicalize(boolean coordinatesOnly) {
472546
final StringBuilder purl = new StringBuilder();
473547
purl.append(SCHEME_PART).append(type).append("/");
474548
if (namespace != null) {
475-
purl.append(encodePath(namespace));
549+
purl.append(encodePath(namespace, PCHAR));
476550
purl.append("/");
477551
}
478-
purl.append(percentEncode(name));
552+
purl.append(percentEncode(name, PCHAR));
479553
if (version != null) {
480-
purl.append("@").append(percentEncode(version));
554+
purl.append("@").append(percentEncode(version, PCHAR));
481555
}
482556
if (! coordinatesOnly) {
483557
if (qualifiers != null) {
484558
purl.append("?");
485559
qualifiers.forEach((key, value) -> {
486560
purl.append(toLowerCase(key));
487561
purl.append("=");
488-
purl.append(percentEncode(value));
562+
purl.append(percentEncode(value, QUERY));
489563
purl.append("&");
490564
});
491565
purl.setLength(purl.length() - 1);
492566
}
493567
if (subpath != null) {
494-
purl.append("#").append(encodePath(subpath));
568+
purl.append("#").append(encodePath(subpath, FRAGMENT));
495569
}
496570
}
497571
return purl.toString();
498572
}
499573

500-
private static boolean isUnreserved(int c) {
501-
return (isValidCharForKey(c) || c == '~');
574+
private static boolean isUnreserved(int c, BitSet safe) {
575+
if (c < 0 || c >= NBITS) {
576+
return false;
577+
}
578+
579+
return safe.get(c);
580+
502581
}
503582

504-
private static boolean shouldEncode(int c) {
505-
return !isUnreserved(c);
583+
private static boolean shouldEncode(int c, BitSet safe) {
584+
return !isUnreserved(c, safe);
506585
}
507586

508587
private static boolean isAlpha(int c) {
@@ -564,8 +643,8 @@ private static int indexOfPercentChar(final byte[] bytes, final int start) {
564643
return IntStream.range(start, bytes.length).filter(i -> isPercent(bytes[i])).findFirst().orElse(-1);
565644
}
566645

567-
private static int indexOfUnsafeChar(final byte[] bytes, final int start) {
568-
return IntStream.range(start, bytes.length).filter(i -> shouldEncode(bytes[i])).findFirst().orElse(-1);
646+
private static int indexOfUnsafeChar(final byte[] bytes, final int start, BitSet safe) {
647+
return IntStream.range(start, bytes.length).filter(i -> shouldEncode(bytes[i], safe)).findFirst().orElse(-1);
569648
}
570649

571650
private static byte percentDecode(final byte[] bytes, final int start) {
@@ -649,15 +728,15 @@ private static byte[] percentEncode(byte b) {
649728
return new byte[] {(byte) PERCENT_CHAR, b1, b2};
650729
}
651730

652-
public static String percentEncode(final String source) {
731+
public static String percentEncode(final String source, BitSet safe) {
653732
if (source.isEmpty()) {
654733
return source;
655734
}
656735

657736
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
658737

659738
int off = 0;
660-
int idx = indexOfUnsafeChar(bytes, off);
739+
int idx = indexOfUnsafeChar(bytes, off, safe);
661740

662741
if (idx == -1) {
663742
return source;
@@ -674,7 +753,7 @@ public static String percentEncode(final String source) {
674753
}
675754

676755
buffer.put(percentEncode(bytes[off++]));
677-
idx = indexOfUnsafeChar(bytes, off);
756+
idx = indexOfUnsafeChar(bytes, off, safe);
678757

679758
if (idx == -1) {
680759
int rem = bytes.length - off;
@@ -835,8 +914,8 @@ private String[] parsePath(final String path, final boolean isSubpath) {
835914
.toArray(String[]::new);
836915
}
837916

838-
private String encodePath(final String path) {
839-
return Arrays.stream(path.split("/")).map(PackageURL::percentEncode).collect(Collectors.joining("/"));
917+
private String encodePath(final String path, BitSet safe) {
918+
return Arrays.stream(path.split("/")).map(source -> percentEncode(source, safe)).collect(Collectors.joining("/"));
840919
}
841920

842921
/**

src/test/resources/test-suite-data.json

+4-4
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
{
8787
"description": "docker uses qualifiers and hash image id as versions",
8888
"purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io",
89-
"canonical_purl": "pkg:docker/customer/dockerimage@sha256%3A244fd47e07d1004f0aed9c?repository_url=gcr.io",
89+
"canonical_purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io",
9090
"type": "docker",
9191
"namespace": "customer",
9292
"name": "dockerimage",
@@ -110,7 +110,7 @@
110110
{
111111
"description": "maven often uses qualifiers",
112112
"purl": "pkg:Maven/org.apache.xmlgraphics/[email protected]?repositorY_url=repo.spring.io/release&classifier=sources",
113-
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?classifier=sources&repository_url=repo.spring.io%2Frelease",
113+
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?classifier=sources&repository_url=repo.spring.io/release",
114114
"type": "maven",
115115
"namespace": "org.apache.xmlgraphics",
116116
"name": "batik-anim",
@@ -122,7 +122,7 @@
122122
{
123123
"description": "maven pom reference",
124124
"purl": "pkg:Maven/org.apache.xmlgraphics/[email protected]?repositorY_url=repo.spring.io/release&extension=pom",
125-
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?extension=pom&repository_url=repo.spring.io%2Frelease",
125+
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?extension=pom&repository_url=repo.spring.io/release",
126126
"type": "maven",
127127
"namespace": "org.apache.xmlgraphics",
128128
"name": "batik-anim",
@@ -314,7 +314,7 @@
314314
{
315315
"description": "valid debian purl containing a plus in the name and version",
316316
"purl": "pkg:deb/debian/[email protected]+6",
317-
"canonical_purl": "pkg:deb/debian/g%2B%2B[email protected]%2B6",
317+
"canonical_purl": "pkg:deb/debian/g++[email protected]+6",
318318
"type": "deb",
319319
"namespace": "debian",
320320
"name": "g++-10",

0 commit comments

Comments
 (0)