29
29
import java .nio .ByteBuffer ;
30
30
import java .nio .charset .StandardCharsets ;
31
31
import java .util .Arrays ;
32
+ import java .util .BitSet ;
32
33
import java .util .Collections ;
33
34
import java .util .Map ;
34
35
import java .util .Objects ;
@@ -59,6 +60,79 @@ public final class PackageURL implements Serializable {
59
60
60
61
private static final char PERCENT_CHAR = '%' ;
61
62
63
+ private static final int NBITS = 128 ;
64
+
65
+ private static final BitSet DIGIT = new BitSet (NBITS );
66
+ static {
67
+ for (int i = '0' ; i <= '9' ; i ++) {
68
+ DIGIT .set (i );
69
+ }
70
+ }
71
+
72
+ private static final BitSet LOWER = new BitSet (NBITS );
73
+ static {
74
+ for (int i = 'a' ; i <= 'z' ; i ++) {
75
+ LOWER .set (i );
76
+ }
77
+ }
78
+
79
+ private static final BitSet UPPER = new BitSet (NBITS );
80
+ static {
81
+ for (int i = 'A' ; i <= 'Z' ; i ++) {
82
+ UPPER .set (i );
83
+ }
84
+ }
85
+
86
+ private static final BitSet ALPHA = new BitSet (NBITS );
87
+ static {
88
+ ALPHA .or (LOWER );
89
+ ALPHA .or (UPPER );
90
+ }
91
+
92
+ private static final BitSet ALPHA_DIGIT = new BitSet (NBITS );
93
+ static {
94
+ ALPHA_DIGIT .or (ALPHA );
95
+ ALPHA_DIGIT .or (DIGIT );
96
+ }
97
+
98
+ private static final BitSet UNRESERVED = new BitSet (NBITS );
99
+ static {
100
+ UNRESERVED .or (ALPHA_DIGIT );
101
+ UNRESERVED .set ('-' );
102
+ UNRESERVED .set ('.' );
103
+ UNRESERVED .set ('_' );
104
+ UNRESERVED .set ('~' );
105
+ }
106
+ private static final BitSet SUB_DELIMS = new BitSet (NBITS );
107
+ static {
108
+ SUB_DELIMS .set ('!' );
109
+ SUB_DELIMS .set ('$' );
110
+ SUB_DELIMS .set ('&' );
111
+ SUB_DELIMS .set ('\'' );
112
+ SUB_DELIMS .set ('(' );
113
+ SUB_DELIMS .set (')' );
114
+ SUB_DELIMS .set ('*' );
115
+ SUB_DELIMS .set ('+' );
116
+ SUB_DELIMS .set (',' );
117
+ SUB_DELIMS .set (';' );
118
+ SUB_DELIMS .set ('=' );
119
+
120
+ }
121
+ private static final BitSet PCHAR = new BitSet (NBITS );
122
+ static {
123
+ PCHAR .or (UNRESERVED );
124
+ PCHAR .or (SUB_DELIMS );
125
+ PCHAR .set (':' );
126
+ // PCHAR.set('@'); Always encode '@' in the path due to version
127
+ }
128
+ private static final BitSet QUERY = new BitSet (NBITS );
129
+ static {
130
+ QUERY .or (PCHAR );
131
+ QUERY .set ('/' );
132
+ QUERY .set ('?' );
133
+ }
134
+ private static final BitSet FRAGMENT = QUERY ;
135
+
62
136
/**
63
137
* Constructs a new PackageURL object by parsing the specified string.
64
138
*
@@ -472,37 +546,42 @@ private String canonicalize(boolean coordinatesOnly) {
472
546
final StringBuilder purl = new StringBuilder ();
473
547
purl .append (SCHEME_PART ).append (type ).append ("/" );
474
548
if (namespace != null ) {
475
- purl .append (encodePath (namespace ));
549
+ purl .append (encodePath (namespace , PCHAR ));
476
550
purl .append ("/" );
477
551
}
478
- purl .append (percentEncode (name ));
552
+ purl .append (percentEncode (name , PCHAR ));
479
553
if (version != null ) {
480
- purl .append ("@" ).append (percentEncode (version ));
554
+ purl .append ("@" ).append (percentEncode (version , PCHAR ));
481
555
}
482
556
if (! coordinatesOnly ) {
483
557
if (qualifiers != null ) {
484
558
purl .append ("?" );
485
559
qualifiers .forEach ((key , value ) -> {
486
560
purl .append (toLowerCase (key ));
487
561
purl .append ("=" );
488
- purl .append (percentEncode (value ));
562
+ purl .append (percentEncode (value , QUERY ));
489
563
purl .append ("&" );
490
564
});
491
565
purl .setLength (purl .length () - 1 );
492
566
}
493
567
if (subpath != null ) {
494
- purl .append ("#" ).append (encodePath (subpath ));
568
+ purl .append ("#" ).append (encodePath (subpath , FRAGMENT ));
495
569
}
496
570
}
497
571
return purl .toString ();
498
572
}
499
573
500
- private static boolean isUnreserved (int c ) {
501
- return (isValidCharForKey (c ) || c == '~' );
574
+ private static boolean isUnreserved (int c , BitSet safe ) {
575
+ if (c < 0 || c >= NBITS ) {
576
+ return false ;
577
+ }
578
+
579
+ return safe .get (c );
580
+
502
581
}
503
582
504
- private static boolean shouldEncode (int c ) {
505
- return !isUnreserved (c );
583
+ private static boolean shouldEncode (int c , BitSet safe ) {
584
+ return !isUnreserved (c , safe );
506
585
}
507
586
508
587
private static boolean isAlpha (int c ) {
@@ -564,8 +643,8 @@ private static int indexOfPercentChar(final byte[] bytes, final int start) {
564
643
return IntStream .range (start , bytes .length ).filter (i -> isPercent (bytes [i ])).findFirst ().orElse (-1 );
565
644
}
566
645
567
- private static int indexOfUnsafeChar (final byte [] bytes , final int start ) {
568
- return IntStream .range (start , bytes .length ).filter (i -> shouldEncode (bytes [i ])).findFirst ().orElse (-1 );
646
+ private static int indexOfUnsafeChar (final byte [] bytes , final int start , BitSet safe ) {
647
+ return IntStream .range (start , bytes .length ).filter (i -> shouldEncode (bytes [i ], safe )).findFirst ().orElse (-1 );
569
648
}
570
649
571
650
private static byte percentDecode (final byte [] bytes , final int start ) {
@@ -649,15 +728,15 @@ private static byte[] percentEncode(byte b) {
649
728
return new byte [] {(byte ) PERCENT_CHAR , b1 , b2 };
650
729
}
651
730
652
- public static String percentEncode (final String source ) {
731
+ public static String percentEncode (final String source , BitSet safe ) {
653
732
if (source .isEmpty ()) {
654
733
return source ;
655
734
}
656
735
657
736
byte [] bytes = source .getBytes (StandardCharsets .UTF_8 );
658
737
659
738
int off = 0 ;
660
- int idx = indexOfUnsafeChar (bytes , off );
739
+ int idx = indexOfUnsafeChar (bytes , off , safe );
661
740
662
741
if (idx == -1 ) {
663
742
return source ;
@@ -674,7 +753,7 @@ public static String percentEncode(final String source) {
674
753
}
675
754
676
755
buffer .put (percentEncode (bytes [off ++]));
677
- idx = indexOfUnsafeChar (bytes , off );
756
+ idx = indexOfUnsafeChar (bytes , off , safe );
678
757
679
758
if (idx == -1 ) {
680
759
int rem = bytes .length - off ;
@@ -835,8 +914,8 @@ private String[] parsePath(final String path, final boolean isSubpath) {
835
914
.toArray (String []::new );
836
915
}
837
916
838
- private String encodePath (final String path ) {
839
- return Arrays .stream (path .split ("/" )).map (PackageURL :: percentEncode ).collect (Collectors .joining ("/" ));
917
+ private String encodePath (final String path , BitSet safe ) {
918
+ return Arrays .stream (path .split ("/" )).map (source -> percentEncode ( source , safe ) ).collect (Collectors .joining ("/" ));
840
919
}
841
920
842
921
/**
0 commit comments