29
29
import java .nio .ByteBuffer ;
30
30
import java .nio .charset .StandardCharsets ;
31
31
import java .util .Arrays ;
32
+ import java .util .BitSet ;
32
33
import java .util .Collections ;
33
34
import java .util .Map ;
34
35
import java .util .Objects ;
@@ -60,6 +61,93 @@ public final class PackageURL implements Serializable {
60
61
61
62
private static final char PERCENT_CHAR = '%' ;
62
63
64
+ private static final int NBITS = 128 ;
65
+
66
+ private static final BitSet DIGIT = new BitSet (NBITS );
67
+
68
+ static {
69
+ for (int i = '0' ; i <= '9' ; i ++) {
70
+ DIGIT .set (i );
71
+ }
72
+ }
73
+
74
+ private static final BitSet LOWER = new BitSet (NBITS );
75
+
76
+ static {
77
+ for (int i = 'a' ; i <= 'z' ; i ++) {
78
+ LOWER .set (i );
79
+ }
80
+ }
81
+
82
+ private static final BitSet UPPER = new BitSet (NBITS );
83
+
84
+ static {
85
+ for (int i = 'A' ; i <= 'Z' ; i ++) {
86
+ UPPER .set (i );
87
+ }
88
+ }
89
+
90
+ private static final BitSet ALPHA = new BitSet (NBITS );
91
+
92
+ static {
93
+ ALPHA .or (LOWER );
94
+ ALPHA .or (UPPER );
95
+ }
96
+
97
+ private static final BitSet ALPHA_DIGIT = new BitSet (NBITS );
98
+
99
+ static {
100
+ ALPHA_DIGIT .or (ALPHA );
101
+ ALPHA_DIGIT .or (DIGIT );
102
+ }
103
+
104
+ private static final BitSet UNRESERVED = new BitSet (NBITS );
105
+
106
+ static {
107
+ UNRESERVED .or (ALPHA_DIGIT );
108
+ UNRESERVED .set ('-' );
109
+ UNRESERVED .set ('.' );
110
+ UNRESERVED .set ('_' );
111
+ UNRESERVED .set ('~' );
112
+ }
113
+
114
+ private static final BitSet SUB_DELIMS = new BitSet (NBITS );
115
+
116
+ static {
117
+ SUB_DELIMS .set ('!' );
118
+ SUB_DELIMS .set ('$' );
119
+ SUB_DELIMS .set ('&' );
120
+ SUB_DELIMS .set ('\'' );
121
+ SUB_DELIMS .set ('(' );
122
+ SUB_DELIMS .set (')' );
123
+ SUB_DELIMS .set ('*' );
124
+ SUB_DELIMS .set ('+' );
125
+ SUB_DELIMS .set (',' );
126
+ SUB_DELIMS .set (';' );
127
+ SUB_DELIMS .set ('=' );
128
+ }
129
+
130
+ private static final BitSet PCHAR = new BitSet (NBITS );
131
+
132
+ static {
133
+ PCHAR .or (UNRESERVED );
134
+ PCHAR .or (SUB_DELIMS );
135
+ PCHAR .set (':' );
136
+ // PCHAR.set('@'); Always encode '@' in the path due to version
137
+ }
138
+
139
+ private static final BitSet QUERY = new BitSet (NBITS );
140
+
141
+ static {
142
+ QUERY .or (PCHAR );
143
+ QUERY .set ('/' );
144
+ // QUERY.set('?');
145
+ QUERY .clear ('&' );
146
+ QUERY .clear ('=' );
147
+ }
148
+
149
+ private static final BitSet FRAGMENT = QUERY ;
150
+
63
151
/**
64
152
* Constructs a new PackageURL object by parsing the specified string.
65
153
*
@@ -498,12 +586,12 @@ private String canonicalize(boolean coordinatesOnly) {
498
586
final StringBuilder purl = new StringBuilder ();
499
587
purl .append (SCHEME_PART ).append (type ).append ('/' );
500
588
if (namespace != null ) {
501
- purl .append (encodePath (namespace ));
589
+ purl .append (encodePath (namespace , PCHAR ));
502
590
purl .append ('/' );
503
591
}
504
- purl .append (percentEncode (name ));
592
+ purl .append (percentEncode (name , PCHAR ));
505
593
if (version != null ) {
506
- purl .append ('@' ).append (percentEncode (version ));
594
+ purl .append ('@' ).append (percentEncode (version , PCHAR ));
507
595
}
508
596
509
597
if (!coordinatesOnly ) {
@@ -517,23 +605,27 @@ private String canonicalize(boolean coordinatesOnly) {
517
605
}
518
606
purl .append (entry .getKey ());
519
607
purl .append ('=' );
520
- purl .append (percentEncode (entry .getValue ()));
608
+ purl .append (percentEncode (entry .getValue (), QUERY ));
521
609
separator = true ;
522
610
}
523
611
}
524
612
if (subpath != null ) {
525
- purl .append ('#' ).append (encodePath (subpath ));
613
+ purl .append ('#' ).append (encodePath (subpath , FRAGMENT ));
526
614
}
527
615
}
528
616
return purl .toString ();
529
617
}
530
618
531
- private static boolean isUnreserved (int c ) {
532
- return (isValidCharForKey (c ) || c == '~' );
619
+ private static boolean isUnreserved (int c , BitSet safe ) {
620
+ if (c < 0 || c >= NBITS ) {
621
+ return false ;
622
+ }
623
+
624
+ return safe .get (c );
533
625
}
534
626
535
- private static boolean shouldEncode (int c ) {
536
- return !isUnreserved (c );
627
+ private static boolean shouldEncode (int c , BitSet safe ) {
628
+ return !isUnreserved (c , safe );
537
629
}
538
630
539
631
private static boolean isAlpha (int c ) {
@@ -598,14 +690,14 @@ private static int indexOfPercentChar(final byte[] bytes, final int start) {
598
690
.orElse (-1 );
599
691
}
600
692
601
- private static int indexOfUnsafeChar (final byte [] bytes , final int start ) {
693
+ private static int indexOfUnsafeChar (final byte [] bytes , final int start , BitSet safe ) {
602
694
return IntStream .range (start , bytes .length )
603
- .filter (i -> shouldEncode (bytes [i ]))
695
+ .filter (i -> shouldEncode (bytes [i ], safe ))
604
696
.findFirst ()
605
697
.orElse (-1 );
606
698
}
607
699
608
- private static byte percentDecode (final byte [] bytes , final int start ) {
700
+ static byte percentDecode (final byte [] bytes , final int start ) {
609
701
if (start + 2 >= bytes .length ) {
610
702
throw new ValidationException ("Incomplete percent encoding at offset " + start + " with value '"
611
703
+ new String (bytes , start , bytes .length - start , StandardCharsets .UTF_8 ) + "'" );
@@ -638,15 +730,15 @@ public static String percentDecode(final String source) {
638
730
}
639
731
640
732
byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
641
-
642
- int off = 0 ;
643
- int idx = indexOfPercentChar (bytes , off );
733
+ int idx = indexOfPercentChar (bytes , 0 );
644
734
645
735
if (idx == -1 ) {
646
736
return source ;
647
737
}
648
738
739
+ int off = idx ;
649
740
ByteBuffer buffer = ByteBuffer .wrap (bytes );
741
+ buffer .position (off );
650
742
651
743
while (true ) {
652
744
int len = idx - off ;
@@ -690,14 +782,18 @@ private static byte[] percentEncode(byte b) {
690
782
}
691
783
692
784
public static String percentEncode (final String source ) {
785
+ return percentEncode (source , UNRESERVED );
786
+ }
787
+
788
+ private static String percentEncode (final String source , final BitSet safe ) {
693
789
if (source .isEmpty ()) {
694
790
return source ;
695
791
}
696
792
697
793
byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
698
794
699
795
int off = 0 ;
700
- int idx = indexOfUnsafeChar (bytes , off );
796
+ int idx = indexOfUnsafeChar (bytes , off , safe );
701
797
702
798
if (idx == -1 ) {
703
799
return source ;
@@ -714,7 +810,7 @@ public static String percentEncode(final String source) {
714
810
}
715
811
716
812
buffer .put (percentEncode (bytes [off ++]));
717
- idx = indexOfUnsafeChar (bytes , off );
813
+ idx = indexOfUnsafeChar (bytes , off , safe );
718
814
719
815
if (idx == -1 ) {
720
816
int rem = bytes .length - off ;
@@ -883,8 +979,10 @@ private String[] parsePath(final String path, final boolean isSubpath) {
883
979
.toArray (String []::new );
884
980
}
885
981
886
- private String encodePath (final String path ) {
887
- return Arrays .stream (path .split ("/" )).map (PackageURL ::percentEncode ).collect (Collectors .joining ("/" ));
982
+ private String encodePath (final String path , BitSet safe ) {
983
+ return Arrays .stream (path .split ("/" ))
984
+ .map (source -> percentEncode (source , safe ))
985
+ .collect (Collectors .joining ("/" ));
888
986
}
889
987
890
988
/**
0 commit comments