Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: don't encode ':' or '/' as part of the canonical representation #161

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions src/main/java/com/github/packageurl/PackageURL.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,17 @@
*/
package com.github.packageurl;

import static com.github.packageurl.internal.StringUtil.FRAGMENTCHAR;
import static com.github.packageurl.internal.StringUtil.PCHAR;
import static com.github.packageurl.internal.StringUtil.QUERYCHAR;
import static java.util.Objects.requireNonNull;

import com.github.packageurl.internal.StringUtil;
import java.io.Serializable;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Collections;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -451,6 +455,7 @@ private static String validateName(final String type, final String value) throws
validateKey(key);
validateValue(key, entry.getValue());
}

return values;
}

Expand Down Expand Up @@ -531,12 +536,12 @@ private String canonicalize(boolean coordinatesOnly) {
final StringBuilder purl = new StringBuilder();
purl.append(SCHEME_PART).append(type).append('/');
if (namespace != null) {
purl.append(encodePath(namespace));
purl.append(encodePath(namespace, PCHAR));
purl.append('/');
}
purl.append(StringUtil.percentEncode(name));
purl.append(StringUtil.percentEncode(name, PCHAR));
if (version != null) {
purl.append('@').append(StringUtil.percentEncode(version));
purl.append('@').append(StringUtil.percentEncode(version, PCHAR));
}

if (!coordinatesOnly) {
Expand All @@ -550,12 +555,12 @@ private String canonicalize(boolean coordinatesOnly) {
}
purl.append(entry.getKey());
purl.append('=');
purl.append(StringUtil.percentEncode(entry.getValue()));
purl.append(StringUtil.percentEncode(entry.getValue(), QUERYCHAR));
separator = true;
}
}
if (subpath != null) {
purl.append('#').append(encodePath(subpath));
purl.append('#').append(encodePath(subpath, FRAGMENTCHAR));
}
}
return purl.toString();
Expand Down Expand Up @@ -584,7 +589,7 @@ private static void verifyTypeConstraints(String type, @Nullable String namespac
}

try {
final TreeMap<String, String> results = qualifiers.entrySet().stream()
final Map<String, String> results = qualifiers.entrySet().stream()
.filter(entry -> !isEmpty(entry.getValue()))
.collect(
TreeMap::new,
Expand All @@ -596,8 +601,7 @@ private static void verifyTypeConstraints(String type, @Nullable String namespac
}
}

@SuppressWarnings("StringSplitter") // reason: surprising behavior is okay in this case
private static @Nullable Map<String, String> parseQualifiers(final String encodedString)
static @Nullable Map<String, String> parseQualifiers(final String encodedString)
throws MalformedPackageURLException {
try {
final TreeMap<String, String> results = Arrays.stream(encodedString.split("&"))
Expand Down Expand Up @@ -628,8 +632,10 @@ private static String[] parsePath(final String path, final boolean isSubpath) {
.toArray(String[]::new);
}

private static String encodePath(final String path) {
return Arrays.stream(path.split("/")).map(StringUtil::percentEncode).collect(Collectors.joining("/"));
private static String encodePath(final String path, BitSet unreservedChars) {
return Arrays.stream(path.split("/"))
.map(segment -> StringUtil.percentEncode(segment, unreservedChars))
.collect(Collectors.joining("/"));
}

/**
Expand Down
139 changes: 115 additions & 24 deletions src/main/java/com/github/packageurl/internal/StringUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

import com.github.packageurl.ValidationException;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;
import java.util.stream.IntStream;
import org.jspecify.annotations.NonNull;

/**
Expand All @@ -33,25 +35,110 @@
* @since 2.0.0
*/
public final class StringUtil {

private static final byte PERCENT_CHAR = '%';

private static final boolean[] UNRESERVED_CHARS = new boolean[128];
private static final int NBITS = 128;

private static final BitSet DIGIT = new BitSet(NBITS);

static {
for (char c = '0'; c <= '9'; c++) {
UNRESERVED_CHARS[c] = true;
}
for (char c = 'A'; c <= 'Z'; c++) {
UNRESERVED_CHARS[c] = true;
}
for (char c = 'a'; c <= 'z'; c++) {
UNRESERVED_CHARS[c] = true;
}
UNRESERVED_CHARS['-'] = true;
UNRESERVED_CHARS['.'] = true;
UNRESERVED_CHARS['_'] = true;
UNRESERVED_CHARS['~'] = true;
IntStream.rangeClosed('0', '9').forEach(DIGIT::set);
}

private static final BitSet LOWER = new BitSet(NBITS);

static {
IntStream.rangeClosed('a', 'z').forEach(LOWER::set);
}

private static final BitSet UPPER = new BitSet(NBITS);

static {
IntStream.rangeClosed('A', 'Z').forEach(UPPER::set);
}

private static final BitSet ALPHA = new BitSet(NBITS);

static {
ALPHA.or(LOWER);
ALPHA.or(UPPER);
}

private static final BitSet ALPHA_DIGIT = new BitSet(NBITS);

static {
ALPHA_DIGIT.or(ALPHA);
ALPHA_DIGIT.or(DIGIT);
}

private static final BitSet UNRESERVED = new BitSet(NBITS);

static {
UNRESERVED.or(ALPHA_DIGIT);
UNRESERVED.set('-');
UNRESERVED.set('.');
UNRESERVED.set('_');
UNRESERVED.set('~');
}

private static final BitSet GEN_DELIMS = new BitSet(NBITS);

static {
GEN_DELIMS.set(':');
GEN_DELIMS.set('/');
GEN_DELIMS.set('?');
GEN_DELIMS.set('#');
GEN_DELIMS.set('[');
GEN_DELIMS.set(']');
GEN_DELIMS.set('@');
}

private static final BitSet SUB_DELIMS = new BitSet(NBITS);

static {
SUB_DELIMS.set('!');
SUB_DELIMS.set('$');
SUB_DELIMS.set('&');
SUB_DELIMS.set('\'');
SUB_DELIMS.set('(');
SUB_DELIMS.set(')');
SUB_DELIMS.set('*');
SUB_DELIMS.set('+');
SUB_DELIMS.set(',');
SUB_DELIMS.set(';');
SUB_DELIMS.set('=');
}

public static final BitSet PCHAR = new BitSet(NBITS);

static {
PCHAR.or(UNRESERVED);
PCHAR.or(SUB_DELIMS);
PCHAR.set(':');
PCHAR.clear('&'); // XXX: Why?
}

public static final BitSet QUERYCHAR = new BitSet(NBITS);

static {
QUERYCHAR.or(GEN_DELIMS);
QUERYCHAR.or(PCHAR);
QUERYCHAR.set('/');
QUERYCHAR.set('?');
QUERYCHAR.clear('#');
QUERYCHAR.clear('&');
QUERYCHAR.clear('=');
}

public static final BitSet FRAGMENTCHAR = new BitSet(NBITS);

static {
FRAGMENTCHAR.or(GEN_DELIMS);
FRAGMENTCHAR.or(PCHAR);
FRAGMENTCHAR.set('/');
FRAGMENTCHAR.set('?');
FRAGMENTCHAR.set('&');
FRAGMENTCHAR.clear('#');
}

private StringUtil() {
Expand Down Expand Up @@ -121,8 +208,8 @@ private StringUtil() {
*
* @since 2.0.0
*/
public static @NonNull String percentEncode(@NonNull final String source) {
if (!shouldEncode(source)) {
public static @NonNull String percentEncode(@NonNull final String source, BitSet unreservedChars) {
if (!shouldEncode(source, unreservedChars)) {
return source;
}

Expand All @@ -131,7 +218,7 @@ private StringUtil() {

int writePos = 0;
for (byte b : src) {
if (shouldEncode(toUnsignedInt(b))) {
if (shouldEncode(toUnsignedInt(b), unreservedChars)) {
dest[writePos++] = PERCENT_CHAR;
dest[writePos++] = toHexDigit(b >> 4);
dest[writePos++] = toHexDigit(b);
Expand Down Expand Up @@ -191,20 +278,24 @@ private static byte toHexDigit(int b) {
* </p>
* @param c non-negative integer.
*/
private static boolean isUnreserved(int c) {
return c < 128 && UNRESERVED_CHARS[c];
private static boolean isUnreserved(int c, BitSet unreservedChars) {
if (c < 0 || c >= unreservedChars.length()) {
return false;
}

return unreservedChars.get(c);
}

/**
* @param c non-negative integer
*/
private static boolean shouldEncode(int c) {
return !isUnreserved(c);
private static boolean shouldEncode(int c, BitSet unreservedChars) {
return !isUnreserved(c, unreservedChars);
}

private static boolean shouldEncode(String s) {
private static boolean shouldEncode(String s, BitSet unreservedChars) {
for (int i = 0, length = s.length(); i < length; i++) {
if (shouldEncode(s.charAt(i))) {
if (shouldEncode(s.charAt(i), unreservedChars)) {
return true;
}
}
Expand Down
40 changes: 40 additions & 0 deletions src/test/java/com/github/packageurl/PackageURLTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.Locale;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.jspecify.annotations.Nullable;
import org.junit.jupiter.api.AfterAll;
Expand Down Expand Up @@ -272,4 +277,39 @@ void npmCaseSensitive() throws Exception {
assertEquals("Base64", base64Uppercase.getName());
assertEquals("1.0.0", base64Uppercase.getVersion());
}

@Test
void uriEncode() throws URISyntaxException, MalformedPackageURLException {
String genDelims = "?#[]@"; // /
String subDelims = "!$&'()*+,;=";
String pchar = "/" + genDelims + subDelims + ":";
String query = "key=" + pchar.replace("=", "%3D").replace("&", "%26") + "/?";
String fragment = pchar + "/?";
String scheme = "pkg";
String type = "generic";
String subpath = fragment.replaceFirst("^/+", "");
URI uri = new URI(scheme, type, pchar, query, subpath);
PackageURL purl = new PackageURL(uri.toASCIIString());
Map<String, String> qualifiers = Arrays.stream(query.split("&"))
.map(kv -> kv.split("="))
.filter(kvArray -> kvArray.length == 2)
.collect(Collectors.toMap(kv -> kv[0], kv -> kv[1]));
PackageURL purl2 = PackageURLBuilder.aPackageURL()
.withType(type)
.withNamespace("")
.withName(genDelims.replace("@", ""))
.withVersion(subDelims + ":")
.withQualifiers(qualifiers)
.withSubpath(subpath)
.build();
assertEquals(purl, purl2);
assertEquals(
uri.getQuery(),
purl.getQualifiers().entrySet().stream()
.map(Map.Entry::toString)
.collect(Collectors.joining("&")));
assertEquals(uri.getFragment(), purl.getSubpath());
assertEquals(uri.getPath(), "/" + purl.getName() + '@' + purl.getVersion());
assertEquals(uri.toASCIIString().replace("pkg://", "pkg:").replaceFirst("&", "%26"), purl.toString());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
*/
package com.github.packageurl.internal;

import static com.github.packageurl.internal.StringUtil.PCHAR;

import java.nio.charset.StandardCharsets;
import java.util.Locale;
import java.util.Random;
Expand Down Expand Up @@ -92,7 +94,7 @@ private String[] createDecodedData() {
private static String[] encodeData(String[] decodedData) {
String[] encodedData = new String[decodedData.length];
for (int i = 0; i < encodedData.length; i++) {
encodedData[i] = StringUtil.percentEncode(decodedData[i]);
encodedData[i] = StringUtil.percentEncode(decodedData[i], PCHAR);
if (!StringUtil.percentDecode(encodedData[i]).equals(decodedData[i])) {
throw new RuntimeException(
"Invalid implementation of `percentEncode` and `percentDecode`.\nOriginal data: "
Expand Down Expand Up @@ -139,7 +141,7 @@ public void percentDecode(final Blackhole blackhole) {
@Benchmark
public void percentEncode(final Blackhole blackhole) {
for (int i = 0; i < DATA_COUNT; i++) {
blackhole.consume(StringUtil.percentEncode(decodedData[i]));
blackhole.consume(StringUtil.percentEncode(decodedData[i], PCHAR));
}
}
}
8 changes: 4 additions & 4 deletions src/test/resources/test-suite-data.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
{
"description": "docker uses qualifiers and hash image id as versions",
"purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io",
"canonical_purl": "pkg:docker/customer/dockerimage@sha256%3A244fd47e07d1004f0aed9c?repository_url=gcr.io",
"canonical_purl": "pkg:docker/customer/dockerimage@sha256:244fd47e07d1004f0aed9c?repository_url=gcr.io",
"type": "docker",
"namespace": "customer",
"name": "dockerimage",
Expand All @@ -110,7 +110,7 @@
{
"description": "maven often uses qualifiers",
"purl": "pkg:Maven/org.apache.xmlgraphics/[email protected]?repositorY_url=repo.spring.io/release&classifier=sources",
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?classifier=sources&repository_url=repo.spring.io%2Frelease",
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?classifier=sources&repository_url=repo.spring.io/release",
"type": "maven",
"namespace": "org.apache.xmlgraphics",
"name": "batik-anim",
Expand All @@ -122,7 +122,7 @@
{
"description": "maven pom reference",
"purl": "pkg:Maven/org.apache.xmlgraphics/[email protected]?repositorY_url=repo.spring.io/release&extension=pom",
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?extension=pom&repository_url=repo.spring.io%2Frelease",
"canonical_purl": "pkg:maven/org.apache.xmlgraphics/[email protected]?extension=pom&repository_url=repo.spring.io/release",
"type": "maven",
"namespace": "org.apache.xmlgraphics",
"name": "batik-anim",
Expand Down Expand Up @@ -314,7 +314,7 @@
{
"description": "valid debian purl containing a plus in the name and version",
"purl": "pkg:deb/debian/[email protected]+6",
"canonical_purl": "pkg:deb/debian/g%2B%2B[email protected]%2B6",
"canonical_purl": "pkg:deb/debian/g++[email protected]+6",
"type": "deb",
"namespace": "debian",
"name": "g++-10",
Expand Down