Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 6aeea9b

Browse files
committedMar 20, 2025·
fix: don't encode ':' or '/' as part of the canonical representation
This makes the Java canonical representation match the majority of other implementations. Fixes #122 Fixes #92
1 parent a38ccd7 commit 6aeea9b

File tree

3 files changed

+179
-24
lines changed

3 files changed

+179
-24
lines changed
 

‎src/main/java/com/github/packageurl/PackageURL.java

+135-20
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import java.nio.ByteBuffer;
3030
import java.nio.charset.StandardCharsets;
3131
import java.util.Arrays;
32+
import java.util.BitSet;
3233
import java.util.Collections;
3334
import java.util.Map;
3435
import java.util.Objects;
@@ -60,6 +61,110 @@ public final class PackageURL implements Serializable {
6061

6162
private static final char PERCENT_CHAR = '%';
6263

64+
private static final int NBITS = 128;
65+
66+
private static final BitSet DIGIT = new BitSet(NBITS);
67+
68+
static {
69+
IntStream.rangeClosed('0', '9').forEach(DIGIT::set);
70+
}
71+
72+
private static final BitSet LOWER = new BitSet(NBITS);
73+
74+
static {
75+
IntStream.rangeClosed('a', 'z').forEach(LOWER::set);
76+
}
77+
78+
private static final BitSet UPPER = new BitSet(NBITS);
79+
80+
static {
81+
IntStream.rangeClosed('A', 'Z').forEach(UPPER::set);
82+
}
83+
84+
private static final BitSet ALPHA = new BitSet(NBITS);
85+
86+
static {
87+
ALPHA.or(LOWER);
88+
ALPHA.or(UPPER);
89+
}
90+
91+
private static final BitSet ALPHA_DIGIT = new BitSet(NBITS);
92+
93+
static {
94+
ALPHA_DIGIT.or(ALPHA);
95+
ALPHA_DIGIT.or(DIGIT);
96+
}
97+
98+
private static final BitSet UNRESERVED = new BitSet(NBITS);
99+
100+
static {
101+
UNRESERVED.or(ALPHA_DIGIT);
102+
UNRESERVED.set('-');
103+
UNRESERVED.set('.');
104+
UNRESERVED.set('_');
105+
UNRESERVED.set('~');
106+
}
107+
108+
private static final BitSet GEN_DELIMS = new BitSet(NBITS);
109+
110+
static {
111+
GEN_DELIMS.set(':');
112+
GEN_DELIMS.set('/');
113+
GEN_DELIMS.set('?');
114+
GEN_DELIMS.set('#');
115+
GEN_DELIMS.set('[');
116+
GEN_DELIMS.set(']');
117+
GEN_DELIMS.set('@');
118+
}
119+
120+
private static final BitSet SUB_DELIMS = new BitSet(NBITS);
121+
122+
static {
123+
SUB_DELIMS.set('!');
124+
SUB_DELIMS.set('$');
125+
SUB_DELIMS.set('&');
126+
SUB_DELIMS.set('\'');
127+
SUB_DELIMS.set('(');
128+
SUB_DELIMS.set(')');
129+
SUB_DELIMS.set('*');
130+
SUB_DELIMS.set('+');
131+
SUB_DELIMS.set(',');
132+
SUB_DELIMS.set(';');
133+
SUB_DELIMS.set('=');
134+
}
135+
136+
private static final BitSet PCHAR = new BitSet(NBITS);
137+
138+
static {
139+
PCHAR.or(UNRESERVED);
140+
PCHAR.or(SUB_DELIMS);
141+
PCHAR.set(':');
142+
PCHAR.clear('&'); // XXX: Why?
143+
}
144+
145+
private static final BitSet QUERY = new BitSet(NBITS);
146+
147+
static {
148+
QUERY.or(GEN_DELIMS);
149+
QUERY.or(PCHAR);
150+
QUERY.set('/');
151+
QUERY.set('?');
152+
QUERY.clear('#');
153+
QUERY.clear('&');
154+
QUERY.clear('=');
155+
}
156+
157+
private static final BitSet FRAGMENT = new BitSet(NBITS);
158+
159+
static {
160+
FRAGMENT.or(GEN_DELIMS);
161+
FRAGMENT.or(PCHAR);
162+
FRAGMENT.set('/');
163+
FRAGMENT.set('?');
164+
FRAGMENT.set('&');
165+
FRAGMENT.clear('#');
166+
}
167+
63168
/**
64169
* Constructs a new PackageURL object by parsing the specified string.
65170
*
@@ -82,7 +187,7 @@ public PackageURL(final String purl) throws MalformedPackageURLException {
82187
* @since 1.0.0
83188
*/
84189
public PackageURL(final String type, final String name) throws MalformedPackageURLException {
85-
this(type, null, name, null, null, null);
190+
this(type, null, name, null, (Map<String, String>) null, null);
86191
}
87192

88193
/**
@@ -406,7 +511,7 @@ private String validateName(final String value) throws MalformedPackageURLExcept
406511
}
407512
}
408513

409-
private @Nullable Map<String, String> validateQualifiers(final @Nullable Map<String, String> values)
514+
private static @Nullable Map<String, String> validateQualifiers(final @Nullable Map<String, String> values)
410515
throws MalformedPackageURLException {
411516
if (values == null || values.isEmpty()) {
412517
return null;
@@ -417,6 +522,7 @@ private String validateName(final String value) throws MalformedPackageURLExcept
417522
validateKey(key);
418523
validateValue(key, entry.getValue());
419524
}
525+
420526
return values;
421527
}
422528

@@ -498,12 +604,12 @@ private String canonicalize(boolean coordinatesOnly) {
498604
final StringBuilder purl = new StringBuilder();
499605
purl.append(SCHEME_PART).append(type).append('/');
500606
if (namespace != null) {
501-
purl.append(encodePath(namespace));
607+
purl.append(encodePath(namespace, PCHAR));
502608
purl.append('/');
503609
}
504-
purl.append(percentEncode(name));
610+
purl.append(percentEncode(name, PCHAR));
505611
if (version != null) {
506-
purl.append('@').append(percentEncode(version));
612+
purl.append('@').append(percentEncode(version, PCHAR));
507613
}
508614

509615
if (!coordinatesOnly) {
@@ -517,23 +623,27 @@ private String canonicalize(boolean coordinatesOnly) {
517623
}
518624
purl.append(entry.getKey());
519625
purl.append('=');
520-
purl.append(percentEncode(entry.getValue()));
626+
purl.append(percentEncode(entry.getValue(), QUERY));
521627
separator = true;
522628
}
523629
}
524630
if (subpath != null) {
525-
purl.append('#').append(encodePath(subpath));
631+
purl.append('#').append(encodePath(subpath, FRAGMENT));
526632
}
527633
}
528634
return purl.toString();
529635
}
530636

531-
private static boolean isUnreserved(int c) {
532-
return (isValidCharForKey(c) || c == '~');
637+
private static boolean isUnreserved(int c, BitSet safe) {
638+
if (c < 0 || c >= NBITS) {
639+
return false;
640+
}
641+
642+
return safe.get(c);
533643
}
534644

535-
private static boolean shouldEncode(int c) {
536-
return !isUnreserved(c);
645+
private static boolean shouldEncode(int c, BitSet safe) {
646+
return !isUnreserved(c, safe);
537647
}
538648

539649
private static boolean isAlpha(int c) {
@@ -596,14 +706,14 @@ private static int indexOfPercentChar(final byte[] bytes, final int start) {
596706
.orElse(-1);
597707
}
598708

599-
private static int indexOfUnsafeChar(final byte[] bytes, final int start) {
709+
private static int indexOfUnsafeChar(final byte[] bytes, final int start, BitSet safe) {
600710
return IntStream.range(start, bytes.length)
601-
.filter(i -> shouldEncode(bytes[i]))
711+
.filter(i -> shouldEncode(bytes[i], safe))
602712
.findFirst()
603713
.orElse(-1);
604714
}
605715

606-
private static byte percentDecode(final byte[] bytes, final int start) {
716+
static byte percentDecode(final byte[] bytes, final int start) {
607717
if (start + 2 >= bytes.length) {
608718
throw new ValidationException("Incomplete percent encoding at offset " + start + " with value '"
609719
+ new String(bytes, start, bytes.length - start, StandardCharsets.UTF_8) + "'");
@@ -671,7 +781,11 @@ private static boolean isPercent(int c) {
671781
return (c == PERCENT_CHAR);
672782
}
673783

674-
private static String percentEncode(final String source) {
784+
static String percentEncode(final String source) {
785+
return percentEncode(source, UNRESERVED);
786+
}
787+
788+
private static String percentEncode(final String source, final BitSet safe) {
675789
if (source.isEmpty()) {
676790
return source;
677791
}
@@ -682,7 +796,7 @@ private static String percentEncode(final String source) {
682796
boolean changed = false;
683797

684798
for (byte b : bytes) {
685-
if (shouldEncode(b)) {
799+
if (shouldEncode(b, safe)) {
686800
changed = true;
687801
byte b1 = (byte) Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
688802
byte b2 = (byte) Character.toUpperCase(Character.forDigit(b & 0xF, 16));
@@ -818,8 +932,7 @@ private void verifyTypeConstraints(String type, @Nullable String namespace, @Nul
818932
}
819933
}
820934

821-
@SuppressWarnings("StringSplitter") // reason: surprising behavior is okay in this case
822-
private @Nullable Map<String, String> parseQualifiers(final String encodedString)
935+
static @Nullable Map<String, String> parseQualifiers(final String encodedString)
823936
throws MalformedPackageURLException {
824937
try {
825938
final TreeMap<String, String> results = Arrays.stream(encodedString.split("&"))
@@ -850,8 +963,10 @@ private String[] parsePath(final String path, final boolean isSubpath) {
850963
.toArray(String[]::new);
851964
}
852965

853-
private String encodePath(final String path) {
854-
return Arrays.stream(path.split("/")).map(PackageURL::percentEncode).collect(Collectors.joining("/"));
966+
private String encodePath(final String path, BitSet safe) {
967+
return Arrays.stream(path.split("/"))
968+
.map(source -> percentEncode(source, safe))
969+
.collect(Collectors.joining("/"));
855970
}
856971

857972
/**

‎src/test/java/com/github/packageurl/PackageURLTest.java

+40
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,12 @@
2828
import static org.junit.jupiter.api.Assertions.assertTrue;
2929

3030
import java.io.IOException;
31+
import java.net.URI;
32+
import java.net.URISyntaxException;
33+
import java.util.Arrays;
3134
import java.util.Locale;
35+
import java.util.Map;
36+
import java.util.stream.Collectors;
3237
import java.util.stream.Stream;
3338
import org.jspecify.annotations.Nullable;
3439
import org.junit.jupiter.api.AfterAll;
@@ -278,4 +283,39 @@ void npmCaseSensitive() throws Exception {
278283
assertEquals("Base64", base64Uppercase.getName());
279284
assertEquals("1.0.0", base64Uppercase.getVersion());
280285
}
286+
287+
@Test
288+
void uriEncode() throws URISyntaxException, MalformedPackageURLException {
289+
String genDelims = "?#[]@"; // /
290+
String subDelims = "!$&'()*+,;=";
291+
String pchar = "/" + genDelims + subDelims + ":";
292+
String query = "key=" + pchar.replace("=", "%3D").replace("&", "%26") + "/?";
293+
String fragment = pchar + "/?";
294+
String scheme = "pkg";
295+
String type = "generic";
296+
String subpath = fragment.replaceFirst("^/+", "");
297+
URI uri = new URI(scheme, type, pchar, query, subpath);
298+
PackageURL purl = new PackageURL(uri.toASCIIString());
299+
Map<String, String> qualifiers = Arrays.stream(query.split("&"))
300+
.map(kv -> kv.split("="))
301+
.filter(kvArray -> kvArray.length == 2)
302+
.collect(Collectors.toMap(kv -> kv[0], kv -> kv[1]));
303+
PackageURL purl2 = PackageURLBuilder.aPackageURL()
304+
.withType(type)
305+
.withNamespace("")
306+
.withName(genDelims.replace("@", ""))
307+
.withVersion(subDelims + ":")
308+
.withQualifiers(qualifiers)
309+
.withSubpath(subpath)
310+
.build();
311+
assertEquals(purl, purl2);
312+
assertEquals(
313+
uri.getQuery(),
314+
purl.getQualifiers().entrySet().stream()
315+
.map(Map.Entry::toString)
316+
.collect(Collectors.joining("&")));
317+
assertEquals(uri.getFragment(), purl.getSubpath());
318+
assertEquals(uri.getPath(), "/" + purl.getName() + '@' + purl.getVersion());
319+
assertEquals(uri.toASCIIString().replace("pkg://", "pkg:").replaceFirst("&", "%26"), purl.toString());
320+
}
281321
}

‎src/test/resources/test-suite-data.json

+4-4
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@
8686
{
8787
"description": "docker uses qualifiers and hash image id as versions",
8888
"purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io",
89-
"canonical_purl": "pkg:docker/customer/dockerimage@sha256%3A244fd47e07d1004f0aed9c?repository_url=gcr.io",
89+
"canonical_purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io",
9090
"type": "docker",
9191
"namespace": "customer",
9292
"name": "dockerimage",
@@ -110,7 +110,7 @@
110110
{
111111
"description": "maven often uses qualifiers",
112112
"purl": "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?repositorY_url=repo.spring.io/release&classifier=sources",
113-
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?classifier=sources&repository_url=repo.spring.io%2Frelease",
113+
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?classifier=sources&repository_url=repo.spring.io/release",
114114
"type": "maven",
115115
"namespace": "org.apache.xmlgraphics",
116116
"name": "batik-anim",
@@ -122,7 +122,7 @@
122122
{
123123
"description": "maven pom reference",
124124
"purl": "pkg:Maven/org.apache.xmlgraphics/batik-anim@1.9.1?repositorY_url=repo.spring.io/release&extension=pom",
125-
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?extension=pom&repository_url=repo.spring.io%2Frelease",
125+
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/batik-anim@1.9.1?extension=pom&repository_url=repo.spring.io/release",
126126
"type": "maven",
127127
"namespace": "org.apache.xmlgraphics",
128128
"name": "batik-anim",
@@ -314,7 +314,7 @@
314314
{
315315
"description": "valid debian purl containing a plus in the name and version",
316316
"purl": "pkg:deb/debian/g++-10@10.2.1+6",
317-
"canonical_purl": "pkg:deb/debian/g%2B%2B-10@10.2.1%2B6",
317+
"canonical_purl": "pkg:deb/debian/g++-10@10.2.1+6",
318318
"type": "deb",
319319
"namespace": "debian",
320320
"name": "g++-10",

0 commit comments

Comments
 (0)
Please sign in to comment.