Skip to content

Commit b6482d2

Browse files
committed
fix: don't encode ':' or '/' as part of the canonical representation
This makes the Java canonical representation match the majority of other implementations. Fixes package-url#122 Fixes package-url#92
1 parent fe90327 commit b6482d2

File tree

2 files changed

+121
-23
lines changed

2 files changed

+121
-23
lines changed

src/main/java/com/github/packageurl/PackageURL.java

+117-19
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.nio.ByteBuffer;
3030
import java.nio.charset.StandardCharsets;
3131
import java.util.Arrays;
32+
import java.util.BitSet;
3233
import java.util.Collections;
3334
import java.util.Map;
3435
import java.util.Objects;
@@ -60,6 +61,93 @@ public final class PackageURL implements Serializable {
6061

6162
private static final char PERCENT_CHAR = '%';
6263

64+
private static final int NBITS = 128;
65+
66+
private static final BitSet DIGIT = new BitSet(NBITS);
67+
68+
static {
69+
for (int i = '0'; i <= '9'; i++) {
70+
DIGIT.set(i);
71+
}
72+
}
73+
74+
private static final BitSet LOWER = new BitSet(NBITS);
75+
76+
static {
77+
for (int i = 'a'; i <= 'z'; i++) {
78+
LOWER.set(i);
79+
}
80+
}
81+
82+
private static final BitSet UPPER = new BitSet(NBITS);
83+
84+
static {
85+
for (int i = 'A'; i <= 'Z'; i++) {
86+
UPPER.set(i);
87+
}
88+
}
89+
90+
private static final BitSet ALPHA = new BitSet(NBITS);
91+
92+
static {
93+
ALPHA.or(LOWER);
94+
ALPHA.or(UPPER);
95+
}
96+
97+
private static final BitSet ALPHA_DIGIT = new BitSet(NBITS);
98+
99+
static {
100+
ALPHA_DIGIT.or(ALPHA);
101+
ALPHA_DIGIT.or(DIGIT);
102+
}
103+
104+
private static final BitSet UNRESERVED = new BitSet(NBITS);
105+
106+
static {
107+
UNRESERVED.or(ALPHA_DIGIT);
108+
UNRESERVED.set('-');
109+
UNRESERVED.set('.');
110+
UNRESERVED.set('_');
111+
UNRESERVED.set('~');
112+
}
113+
114+
private static final BitSet SUB_DELIMS = new BitSet(NBITS);
115+
116+
static {
117+
SUB_DELIMS.set('!');
118+
SUB_DELIMS.set('$');
119+
SUB_DELIMS.set('&');
120+
SUB_DELIMS.set('\'');
121+
SUB_DELIMS.set('(');
122+
SUB_DELIMS.set(')');
123+
SUB_DELIMS.set('*');
124+
SUB_DELIMS.set('+');
125+
SUB_DELIMS.set(',');
126+
SUB_DELIMS.set(';');
127+
SUB_DELIMS.set('=');
128+
}
129+
130+
private static final BitSet PCHAR = new BitSet(NBITS);
131+
132+
static {
133+
PCHAR.or(UNRESERVED);
134+
PCHAR.or(SUB_DELIMS);
135+
PCHAR.set(':');
136+
// PCHAR.set('@'); Always encode '@' in the path due to version
137+
}
138+
139+
private static final BitSet QUERY = new BitSet(NBITS);
140+
141+
static {
142+
QUERY.or(PCHAR);
143+
QUERY.set('/');
144+
// QUERY.set('?');
145+
QUERY.clear('&');
146+
QUERY.clear('=');
147+
}
148+
149+
private static final BitSet FRAGMENT = QUERY;
150+
63151
/**
64152
* Constructs a new PackageURL object by parsing the specified string.
65153
*
@@ -498,12 +586,12 @@ private String canonicalize(boolean coordinatesOnly) {
498586
final StringBuilder purl = new StringBuilder();
499587
purl.append(SCHEME_PART).append(type).append('/');
500588
if (namespace != null) {
501-
purl.append(encodePath(namespace));
589+
purl.append(encodePath(namespace, PCHAR));
502590
purl.append('/');
503591
}
504-
purl.append(percentEncode(name));
592+
purl.append(percentEncode(name, PCHAR));
505593
if (version != null) {
506-
purl.append('@').append(percentEncode(version));
594+
purl.append('@').append(percentEncode(version, PCHAR));
507595
}
508596

509597
if (!coordinatesOnly) {
@@ -517,23 +605,27 @@ private String canonicalize(boolean coordinatesOnly) {
517605
}
518606
purl.append(entry.getKey());
519607
purl.append('=');
520-
purl.append(percentEncode(entry.getValue()));
608+
purl.append(percentEncode(entry.getValue(), QUERY));
521609
separator = true;
522610
}
523611
}
524612
if (subpath != null) {
525-
purl.append('#').append(encodePath(subpath));
613+
purl.append('#').append(encodePath(subpath, FRAGMENT));
526614
}
527615
}
528616
return purl.toString();
529617
}
530618

531-
private static boolean isUnreserved(int c) {
532-
return (isValidCharForKey(c) || c == '~');
619+
private static boolean isUnreserved(int c, BitSet safe) {
620+
if (c < 0 || c >= NBITS) {
621+
return false;
622+
}
623+
624+
return safe.get(c);
533625
}
534626

535-
private static boolean shouldEncode(int c) {
536-
return !isUnreserved(c);
627+
private static boolean shouldEncode(int c, BitSet safe) {
628+
return !isUnreserved(c, safe);
537629
}
538630

539631
private static boolean isAlpha(int c) {
@@ -598,14 +690,14 @@ private static int indexOfPercentChar(final byte[] bytes, final int start) {
598690
.orElse(-1);
599691
}
600692

601-
private static int indexOfUnsafeChar(final byte[] bytes, final int start) {
693+
private static int indexOfUnsafeChar(final byte[] bytes, final int start, BitSet safe) {
602694
return IntStream.range(start, bytes.length)
603-
.filter(i -> shouldEncode(bytes[i]))
695+
.filter(i -> shouldEncode(bytes[i], safe))
604696
.findFirst()
605697
.orElse(-1);
606698
}
607699

608-
private static byte percentDecode(final byte[] bytes, final int start) {
700+
static byte percentDecode(final byte[] bytes, final int start) {
609701
if (start + 2 >= bytes.length) {
610702
throw new ValidationException("Incomplete percent encoding at offset " + start + " with value '"
611703
+ new String(bytes, start, bytes.length - start, StandardCharsets.UTF_8) + "'");
@@ -638,15 +730,15 @@ public static String percentDecode(final String source) {
638730
}
639731

640732
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
641-
642-
int off = 0;
643-
int idx = indexOfPercentChar(bytes, off);
733+
int idx = indexOfPercentChar(bytes, 0);
644734

645735
if (idx == -1) {
646736
return source;
647737
}
648738

739+
int off = idx;
649740
ByteBuffer buffer = ByteBuffer.wrap(bytes);
741+
buffer.position(off);
650742

651743
while (true) {
652744
int len = idx - off;
@@ -690,14 +782,18 @@ private static byte[] percentEncode(byte b) {
690782
}
691783

692784
public static String percentEncode(final String source) {
785+
return percentEncode(source, UNRESERVED);
786+
}
787+
788+
private static String percentEncode(final String source, final BitSet safe) {
693789
if (source.isEmpty()) {
694790
return source;
695791
}
696792

697793
byte[] bytes = source.getBytes(StandardCharsets.UTF_8);
698794

699795
int off = 0;
700-
int idx = indexOfUnsafeChar(bytes, off);
796+
int idx = indexOfUnsafeChar(bytes, off, safe);
701797

702798
if (idx == -1) {
703799
return source;
@@ -714,7 +810,7 @@ public static String percentEncode(final String source) {
714810
}
715811

716812
buffer.put(percentEncode(bytes[off++]));
717-
idx = indexOfUnsafeChar(bytes, off);
813+
idx = indexOfUnsafeChar(bytes, off, safe);
718814

719815
if (idx == -1) {
720816
int rem = bytes.length - off;
@@ -883,8 +979,10 @@ private String[] parsePath(final String path, final boolean isSubpath) {
883979
.toArray(String[]::new);
884980
}
885981

886-
private String encodePath(final String path) {
887-
return Arrays.stream(path.split("/")).map(PackageURL::percentEncode).collect(Collectors.joining("/"));
982+
private String encodePath(final String path, BitSet safe) {
983+
return Arrays.stream(path.split("/"))
984+
.map(source -> percentEncode(source, safe))
985+
.collect(Collectors.joining("/"));
888986
}
889987

890988
/**

src/test/resources/test-suite-data.json

+4-4
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
{
8787
"description": "docker uses qualifiers and hash image id as versions",
8888
"purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io",
89-
"canonical_purl": "pkg:docker/customer/dockerimage@sha256%3A244fd47e07d1004f0aed9c?repository_url=gcr.io",
89+
"canonical_purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io",
9090
"type": "docker",
9191
"namespace": "customer",
9292
"name": "dockerimage",
@@ -110,7 +110,7 @@
110110
{
111111
"description": "maven often uses qualifiers",
112112
"purl": "pkg:Maven/org.apache.xmlgraphics/[email protected]?repositorY_url=repo.spring.io/release&classifier=sources",
113-
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?classifier=sources&repository_url=repo.spring.io%2Frelease",
113+
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?classifier=sources&repository_url=repo.spring.io/release",
114114
"type": "maven",
115115
"namespace": "org.apache.xmlgraphics",
116116
"name": "batik-anim",
@@ -122,7 +122,7 @@
122122
{
123123
"description": "maven pom reference",
124124
"purl": "pkg:Maven/org.apache.xmlgraphics/[email protected]?repositorY_url=repo.spring.io/release&extension=pom",
125-
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?extension=pom&repository_url=repo.spring.io%2Frelease",
125+
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?extension=pom&repository_url=repo.spring.io/release",
126126
"type": "maven",
127127
"namespace": "org.apache.xmlgraphics",
128128
"name": "batik-anim",
@@ -314,7 +314,7 @@
314314
{
315315
"description": "valid debian purl containing a plus in the name and version",
316316
"purl": "pkg:deb/debian/[email protected]+6",
317-
"canonical_purl": "pkg:deb/debian/g%2B%2B[email protected]%2B6",
317+
"canonical_purl": "pkg:deb/debian/g++[email protected]+6",
318318
"type": "deb",
319319
"namespace": "debian",
320320
"name": "g++-10",

0 commit comments

Comments
 (0)