29
29
import java .nio .ByteBuffer ;
30
30
import java .nio .charset .StandardCharsets ;
31
31
import java .util .Arrays ;
32
+ import java .util .BitSet ;
32
33
import java .util .Collections ;
33
34
import java .util .Map ;
34
35
import java .util .Objects ;
@@ -59,6 +60,79 @@ public final class PackageURL implements Serializable {
59
60
60
61
private static final char PERCENT_CHAR = '%' ;
61
62
63
+ private static final int NBITS = 128 ;
64
+
65
+ private static final BitSet DIGIT = new BitSet (NBITS );
66
+ static {
67
+ for (int i = '0' ; i <= '9' ; i ++) {
68
+ DIGIT .set (i );
69
+ }
70
+ }
71
+
72
+ private static final BitSet LOWER = new BitSet (NBITS );
73
+ static {
74
+ for (int i = 'a' ; i <= 'z' ; i ++) {
75
+ LOWER .set (i );
76
+ }
77
+ }
78
+
79
+ private static final BitSet UPPER = new BitSet (NBITS );
80
+ static {
81
+ for (int i = 'A' ; i <= 'Z' ; i ++) {
82
+ UPPER .set (i );
83
+ }
84
+ }
85
+
86
+ private static final BitSet ALPHA = new BitSet (NBITS );
87
+ static {
88
+ ALPHA .or (LOWER );
89
+ ALPHA .or (UPPER );
90
+ }
91
+
92
+ private static final BitSet ALPHA_DIGIT = new BitSet (NBITS );
93
+ static {
94
+ ALPHA_DIGIT .or (ALPHA );
95
+ ALPHA_DIGIT .or (DIGIT );
96
+ }
97
+
98
+ private static final BitSet UNRESERVED = new BitSet (NBITS );
99
+ static {
100
+ UNRESERVED .or (ALPHA_DIGIT );
101
+ UNRESERVED .set ('-' );
102
+ UNRESERVED .set ('.' );
103
+ UNRESERVED .set ('_' );
104
+ UNRESERVED .set ('~' );
105
+ }
106
+ private static final BitSet SUB_DELIMS = new BitSet (NBITS );
107
+ static {
108
+ SUB_DELIMS .set ('!' );
109
+ SUB_DELIMS .set ('$' );
110
+ SUB_DELIMS .set ('&' );
111
+ SUB_DELIMS .set ('\'' );
112
+ SUB_DELIMS .set ('(' );
113
+ SUB_DELIMS .set (')' );
114
+ SUB_DELIMS .set ('*' );
115
+ SUB_DELIMS .set ('+' );
116
+ SUB_DELIMS .set (',' );
117
+ SUB_DELIMS .set (';' );
118
+ SUB_DELIMS .set ('=' );
119
+
120
+ }
121
+ private static final BitSet PCHAR = new BitSet (NBITS );
122
+ static {
123
+ PCHAR .or (UNRESERVED );
124
+ PCHAR .or (SUB_DELIMS );
125
+ PCHAR .set (':' );
126
+ // PCHAR.set('@'); Always encode '@' in the path due to version
127
+ }
128
+ private static final BitSet QUERY = new BitSet (NBITS );
129
+ static {
130
+ QUERY .or (PCHAR );
131
+ QUERY .set ('/' );
132
+ QUERY .set ('?' );
133
+ }
134
+ private static final BitSet FRAGMENT = QUERY ;
135
+
62
136
/**
63
137
* Constructs a new PackageURL object by parsing the specified string.
64
138
*
@@ -472,37 +546,42 @@ private String canonicalize(boolean coordinatesOnly) {
472
546
final StringBuilder purl = new StringBuilder ();
473
547
purl .append (SCHEME_PART ).append (type ).append ("/" );
474
548
if (namespace != null ) {
475
- purl .append (encodePath (namespace ));
549
+ purl .append (encodePath (namespace , PCHAR ));
476
550
purl .append ("/" );
477
551
}
478
- purl .append (percentEncode (name ));
552
+ purl .append (percentEncode (name , PCHAR ));
479
553
if (version != null ) {
480
- purl .append ("@" ).append (percentEncode (version ));
554
+ purl .append ("@" ).append (percentEncode (version , PCHAR ));
481
555
}
482
556
if (! coordinatesOnly ) {
483
557
if (qualifiers != null ) {
484
558
purl .append ("?" );
485
559
qualifiers .forEach ((key , value ) -> {
486
560
purl .append (toLowerCase (key ));
487
561
purl .append ("=" );
488
- purl .append (percentEncode (value ));
562
+ purl .append (percentEncode (value , QUERY ));
489
563
purl .append ("&" );
490
564
});
491
565
purl .setLength (purl .length () - 1 );
492
566
}
493
567
if (subpath != null ) {
494
- purl .append ("#" ).append (encodePath (subpath ));
568
+ purl .append ("#" ).append (encodePath (subpath , FRAGMENT ));
495
569
}
496
570
}
497
571
return purl .toString ();
498
572
}
499
573
500
- private static boolean isUnreserved (int c ) {
501
- return (isValidCharForKey (c ) || c == '~' );
574
+ private static boolean isUnreserved (int c , BitSet safe ) {
575
+ if (c < 0 || c >= NBITS ) {
576
+ return false ;
577
+ }
578
+
579
+ return safe .get (c );
580
+
502
581
}
503
582
504
- private static boolean shouldEncode (int c ) {
505
- return !isUnreserved (c );
583
+ private static boolean shouldEncode (int c , BitSet safe ) {
584
+ return !isUnreserved (c , safe );
506
585
}
507
586
508
587
private static boolean isAlpha (int c ) {
@@ -564,11 +643,11 @@ private static int indexOfPercentChar(final byte[] bytes, final int start) {
564
643
return IntStream .range (start , bytes .length ).filter (i -> isPercent (bytes [i ])).findFirst ().orElse (-1 );
565
644
}
566
645
567
- private static int indexOfUnsafeChar (final byte [] bytes , final int start ) {
568
- return IntStream .range (start , bytes .length ).filter (i -> shouldEncode (bytes [i ])).findFirst ().orElse (-1 );
646
+ private static int indexOfUnsafeChar (final byte [] bytes , final int start , BitSet safe ) {
647
+ return IntStream .range (start , bytes .length ).filter (i -> shouldEncode (bytes [i ], safe )).findFirst ().orElse (-1 );
569
648
}
570
649
571
- private static byte percentDecode (final byte [] bytes , final int start ) {
650
+ static byte percentDecode (final byte [] bytes , final int start ) {
572
651
if (start + 2 >= bytes .length ) {
573
652
throw new ValidationException ("Incomplete percent encoding at offset " + start + " with value '" + new String (bytes , start , bytes .length - start , StandardCharsets .UTF_8 ) + "'" );
574
653
}
@@ -598,15 +677,15 @@ public static String percentDecode(final String source) {
598
677
}
599
678
600
679
byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
601
-
602
- int off = 0 ;
603
- int idx = indexOfPercentChar (bytes , off );
680
+ int idx = indexOfPercentChar (bytes , 0 );
604
681
605
682
if (idx == -1 ) {
606
683
return source ;
607
684
}
608
685
686
+ int off = idx ;
609
687
ByteBuffer buffer = ByteBuffer .wrap (bytes );
688
+ buffer .position (off );
610
689
611
690
while (true ) {
612
691
int len = idx - off ;
@@ -650,14 +729,18 @@ private static byte[] percentEncode(byte b) {
650
729
}
651
730
652
731
public static String percentEncode (final String source ) {
732
+ return percentEncode (source , new BitSet (0 ));
733
+ }
734
+
735
+ private static String percentEncode (final String source , final BitSet safe ) {
653
736
if (source .isEmpty ()) {
654
737
return source ;
655
738
}
656
739
657
740
byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
658
741
659
742
int off = 0 ;
660
- int idx = indexOfUnsafeChar (bytes , off );
743
+ int idx = indexOfUnsafeChar (bytes , off , safe );
661
744
662
745
if (idx == -1 ) {
663
746
return source ;
@@ -674,7 +757,7 @@ public static String percentEncode(final String source) {
674
757
}
675
758
676
759
buffer .put (percentEncode (bytes [off ++]));
677
- idx = indexOfUnsafeChar (bytes , off );
760
+ idx = indexOfUnsafeChar (bytes , off , safe );
678
761
679
762
if (idx == -1 ) {
680
763
int rem = bytes .length - off ;
@@ -733,7 +816,6 @@ private void parse(final String purl) throws MalformedPackageURLException {
733
816
final String rawQuery = uri .getRawQuery ();
734
817
if (rawQuery != null && !rawQuery .isEmpty ()) {
735
818
this .qualifiers = parseQualifiers (rawQuery );
736
-
737
819
}
738
820
// this is the rest of the purl that needs to be parsed
739
821
String remainder = uri .getRawPath ();
@@ -835,8 +917,8 @@ private String[] parsePath(final String path, final boolean isSubpath) {
835
917
.toArray (String []::new );
836
918
}
837
919
838
- private String encodePath (final String path ) {
839
- return Arrays .stream (path .split ("/" )).map (PackageURL :: percentEncode ).collect (Collectors .joining ("/" ));
920
+ private String encodePath (final String path , BitSet safe ) {
921
+ return Arrays .stream (path .split ("/" )).map (source -> percentEncode ( source , safe ) ).collect (Collectors .joining ("/" ));
840
922
}
841
923
842
924
/**
0 commit comments