@@ -225,13 +225,19 @@ static final class ThreadProcessor implements Runnable {
225
225
*/
226
226
private final long numbersAddress ;
227
227
228
+ /**
229
+ * The main memory address at the beginning of the name length table slots.
230
+ */
231
+ private final long lengthsAddress ;
232
+
228
233
/**
229
234
* The SIMD vectors associated with each slot in the hash table. The
230
235
* content of a given slot in a hash table is a lookup into this array.
231
236
* The intent of having this array as an extra lookup is to allow N
232
237
* vectors per slot while having fixed size slots.
233
238
*/
234
239
private ByteVector [] vectors = new ByteVector [200000 ];
240
+ private String [] cityNames = new String [NUM_SLOTS ];
235
241
236
242
/**
237
243
* The next available index in the vectors array.
@@ -261,6 +267,7 @@ public ThreadProcessor(long start, long end, int bufferSize) {
261
267
unsafe .putShort (slotsAddress + i * 2 , (short ) 0 );
262
268
}
263
269
numbersAddress = unsafe .allocateMemory (NUM_SLOTS * 16 );
270
+ lengthsAddress = unsafe .allocateMemory (NUM_SLOTS );
264
271
}
265
272
266
273
public final void run () {
@@ -479,9 +486,22 @@ private final void record(byte[] buffer, int nameStart, int nameEnd, int measure
479
486
*/
480
487
var vectorOffset = unsafe .getShort (slotsAddress + slotIndex * 2 );
481
488
while (vectorOffset != 0 ) {
482
- if (slotEquals (buffer , nameStart , vectorOffset , numVectors , MASKS [remainder ], slotIndex )) {
483
- updateSlot (slotIndex , measurement );
484
- break ;
489
+
490
+ /**
491
+ * Check the set of vectors in the slot match the city name
492
+ */
493
+ if (slotEquals (buffer , nameStart , vectorOffset , numVectors , remainder , slotIndex )) {
494
+
495
+ /**
496
+ * Check the length of the slot name and city name match. This
497
+ * check is needed because the vector equality check can give
498
+ * false positives if one city name starts with another.
499
+ */
500
+ byte slotNameLength = unsafe .getByte (lengthsAddress + slotIndex );
501
+ if (slotNameLength == nameLength ) {
502
+ updateSlot (slotIndex , measurement );
503
+ break ;
504
+ }
485
505
}
486
506
487
507
/**
@@ -497,20 +517,36 @@ private final void record(byte[] buffer, int nameStart, int nameEnd, int measure
497
517
* city name and measurement.
498
518
*/
499
519
if (vectorOffset == 0 ) {
520
+ /**
521
+ * Record where the city name length is recorded for this slot.
522
+ */
523
+ unsafe .putByte (lengthsAddress + slotIndex , (byte ) nameLength );
524
+
525
+ /**
526
+ * Record where the start of the set of vectors are recorded for
527
+ */
500
528
unsafe .putShort (slotsAddress + slotIndex * 2 , nextVectorIndex );
529
+
530
+ /**
531
+ * Records the vectors for the city name.
532
+ */
501
533
for (int v = 0 ; v < numVectors ; v ++) {
502
534
vectors [nextVectorIndex ] = ByteVector .fromArray (SPECIES , buffer , nameStart + v * SPECIES .length ());
503
535
nextVectorIndex ++;
504
536
}
505
537
506
538
cityVectorLookup .put (new String (buffer , nameStart , nameLength ), slotIndex );
507
539
508
- // Min, max, count, sum
540
+ /**
541
+ * Min, max, count, sum
542
+ */
509
543
var numbersIndex = getNumbersIndex (slotIndex );
510
544
unsafe .putInt (numbersIndex , measurement );
511
545
unsafe .putInt (numbersIndex + 4 , measurement );
512
546
unsafe .putInt (numbersIndex + 8 , 1 );
513
547
unsafe .putInt (numbersIndex + 12 , measurement );
548
+
549
+ cityNames [slotIndex ] = new String (buffer , nameStart , nameLength );
514
550
}
515
551
}
516
552
@@ -587,13 +623,21 @@ private final void updateSlot(int slotIndex, int measurement) {
587
623
* length, so the last vector in the slot will be a partial vector. The
588
624
* masks are used to ignore the unused bytes in the last vector.
589
625
*/
590
- private final boolean slotEquals (byte [] buffer , int nameStart , int vectorOffset , int numVectors , VectorMask < Byte > lastVectorMask , int slotIndex ) {
626
+ private final boolean slotEquals (byte [] buffer , int nameStart , int vectorOffset , int numVectors , int remainder , int slotIndex ) {
591
627
for (int v = 0 ; v < numVectors ; v ++) {
592
628
var nameVector = ByteVector .fromArray (SPECIES , buffer , nameStart + v * SPECIES .length ());
593
629
var slotVector = vectors [vectorOffset + v ];
594
630
if (v == numVectors - 1 ) {
595
- if (!slotVector .compare (VectorOperators .EQ , nameVector , lastVectorMask ).equals (lastVectorMask )) {
596
- return false ;
631
+ if (remainder == 0 ) {
632
+ if (!slotVector .eq (nameVector ).allTrue ()) {
633
+ return false ;
634
+ }
635
+ }
636
+ else {
637
+ var mask = MASKS [remainder - 1 ];
638
+ if (!slotVector .compare (VectorOperators .EQ , nameVector , mask ).equals (mask )) {
639
+ return false ;
640
+ }
597
641
}
598
642
break ;
599
643
}
@@ -644,14 +688,14 @@ public void collectResults(TreeMap<String, CityResult> results) {
644
688
* the last vector of the source line and a given slot in the hash table.
645
689
*/
646
690
private static final VectorMask <Byte >[] generateMasks (VectorSpecies <Byte > species ) {
647
- VectorMask <Byte >[] masks = new VectorMask [species .length () + 1 ];
648
- for ( int i = 0 ; i < species . length () + 1 ; i ++) {
649
- boolean [] maskBooleans = new boolean [ species . length ()] ;
650
- for ( int j = 0 ; j < species . length (); j ++) {
651
- maskBooleans [ j ] = j < i ;
652
- }
653
- masks [i ] = VectorMask .fromArray (species , maskBooleans , 0 );
654
- }
691
+ VectorMask <Byte >[] masks = new VectorMask [species .length () - 1 ];
692
+ masks [ 0 ] = VectorMask . fromArray ( species , new boolean []{ true , false , false , false , false , false , false , false }, 0 );
693
+ masks [ 1 ] = VectorMask . fromArray ( species , new boolean []{ true , true , false , false , false , false , false , false }, 0 ) ;
694
+ masks [ 2 ] = VectorMask . fromArray ( species , new boolean []{ true , true , true , false , false , false , false , false }, 0 );
695
+ masks [ 3 ] = VectorMask . fromArray ( species , new boolean []{ true , true , true , true , false , false , false , false }, 0 ) ;
696
+ masks [ 4 ] = VectorMask . fromArray ( species , new boolean []{ true , true , true , true , true , false , false , false }, 0 );
697
+ masks [5 ] = VectorMask .fromArray (species , new boolean []{ true , true , true , true , true , true , false , false } , 0 );
698
+ masks [ 6 ] = VectorMask . fromArray ( species , new boolean []{ true , true , true , true , true , true , true , false }, 0 );
655
699
return masks ;
656
700
}
657
701
0 commit comments