Skip to content

Commit 6b86ee3

Browse files
committed
Compute max compressed length for SkippableLongCODEC
1 parent 32e374a commit 6b86ee3

File tree

8 files changed

+82
-5
lines changed

8 files changed

+82
-5
lines changed

src/main/java/me/lemire/longcompression/LongBinaryPacking.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@
2323
* @author Benoit Lacelle
2424
*/
2525
public final class LongBinaryPacking implements LongCODEC, SkippableLongCODEC {
26-
final static int BLOCK_SIZE = 64;
27-
26+
public final static int BLOCK_SIZE = 64;
27+
private static final int MAX_BIT_WIDTH = Long.SIZE;
28+
2829
@Override
2930
public void compress(long[] in, IntWrapper inpos, int inlength,
3031
long[] out, IntWrapper outpos) {
@@ -136,6 +137,15 @@ public void headlessUncompress(long[] in, IntWrapper inpos, int inlength,
136137
inpos.set(tmpinpos);
137138
}
138139

140+
@Override
141+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
142+
int blockCount = inlength / BLOCK_SIZE;
143+
int headersSizeInLongs = blockCount / Long.BYTES + (blockCount % Long.BYTES);
144+
int blocksSizeInLongs = blockCount * MAX_BIT_WIDTH;
145+
compressedPositions.add(blockCount * BLOCK_SIZE);
146+
return headersSizeInLongs + blocksSizeInLongs;
147+
}
148+
139149
@Override
140150
public String toString() {
141151
return this.getClass().getSimpleName();

src/main/java/me/lemire/longcompression/LongCompressor.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ public LongCompressor() {
4040
* @throws UncompressibleInputException if the data is too poorly compressible
4141
*/
4242
public long[] compress(long[] input) {
43-
long[] compressed = new long[input.length + input.length / 100 + 1024];
43+
int maxCompressedLength = codec.maxHeadlessCompressedLength(new IntWrapper(0), input.length);
44+
long[] compressed = new long[maxCompressedLength + 1]; // +1 to store the length of the input
4445
// Store at index=0 the length of the input, hence enabling .headlessCompress
4546
compressed[0] = input.length;
4647
IntWrapper outpos = new IntWrapper(1);

src/main/java/me/lemire/longcompression/LongJustCopy.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ public void headlessUncompress(long[] in, IntWrapper inpos, int inlength,
4343

4444
}
4545

46+
@Override
47+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
48+
compressedPositions.add(inlength);
49+
return inlength;
50+
}
51+
4652
@Override
4753
public void compress(long[] in, IntWrapper inpos, int inlength,
4854
long[] out, IntWrapper outpos) {

src/main/java/me/lemire/longcompression/LongVariableByte.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
* @author Benoit Lacelle
2323
*/
2424
public class LongVariableByte implements LongCODEC, ByteLongCODEC, SkippableLongCODEC {
25+
private static final int MAX_BYTES_PER_INT = 10;
2526

2627
private static byte extract7bits(int i, long val) {
2728
return (byte) ((val >>> (7 * i)) & ((1 << 7) - 1));
@@ -326,6 +327,14 @@ public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, long[]
326327
inpos.set(p + (s!=0 ? 1 : 0));
327328
}
328329

330+
@Override
331+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
332+
int maxLengthInBytes = inlength * MAX_BYTES_PER_INT;
333+
int maxLengthInLongs = (maxLengthInBytes + Long.BYTES - 1) / Long.BYTES;
334+
compressedPositions.add(inlength);
335+
return maxLengthInLongs;
336+
}
337+
329338
/**
330339
* Creates a new buffer of the requested size.
331340
*

src/main/java/me/lemire/longcompression/SkippableLongCODEC.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,21 @@ public void headlessCompress(long[] in, IntWrapper inpos, int inlength, long[] o
6767
public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, long[] out,
6868
IntWrapper outpos, int num);
6969

70+
/**
71+
* Compute the maximum number of longs that might be required to store
72+
* the compressed form of a given input array segment, without headers.
73+
* <p>
74+
* This is useful to pre-allocate the output buffer before calling
75+
* {@link #headlessCompress(long[], IntWrapper, int, long[], IntWrapper)}.
76+
* </p>
77+
*
78+
* @param compressedPositions
79+
* since not all schemes compress every input integer, this parameter
80+
* returns how many input integers will actually be compressed.
81+
* This is useful when composing multiple schemes.
82+
* @param inlength
83+
* number of longs to be compressed
84+
* @return the maximum number of longs needed in the output array
85+
*/
86+
int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength);
7087
}

src/main/java/me/lemire/longcompression/SkippableLongComposition.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,16 @@ public void headlessUncompress(long[] in, IntWrapper inpos, int inlength, long[]
6262
F2.headlessUncompress(in, inpos, inlength, out, outpos, num);
6363
}
6464

65+
@Override
66+
public int maxHeadlessCompressedLength(IntWrapper compressedPositions, int inlength) {
67+
int init = compressedPositions.get();
68+
int maxLength = F1.maxHeadlessCompressedLength(compressedPositions, inlength);
69+
maxLength += 1; // Add +1 for the potential F2 header. Question: is this header actually needed in the headless version?
70+
inlength -= compressedPositions.get() - init;
71+
maxLength += F2.maxHeadlessCompressedLength(compressedPositions, inlength);
72+
return maxLength;
73+
}
74+
6575
@Override
6676
public String toString() {
6777
return F1.toString() + "+" + F2.toString();

src/test/java/me/lemire/longcompression/LongTestUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ protected static long[] uncompress(ByteLongCODEC codec, byte[] data, int len) {
111111
}
112112

113113
protected static long[] compressHeadless(SkippableLongCODEC codec, long[] data) {
114-
long[] outBuf = new long[data.length * 4];
114+
long[] outBuf = new long[codec.maxHeadlessCompressedLength(new IntWrapper(0), data.length)];
115115
IntWrapper inPos = new IntWrapper();
116116
IntWrapper outPos = new IntWrapper();
117117
codec.headlessCompress(data, inPos, data.length, outBuf, outPos);

src/test/java/me/lemire/longcompression/SkippableLongBasicTest.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import me.lemire.integercompression.TestUtils;
1616
import me.lemire.integercompression.VariableByte;
1717

18+
import static org.junit.Assert.assertTrue;
1819

1920
/**
2021
* Just some basic sanity tests.
@@ -42,10 +43,11 @@ public void consistentTest() {
4243
for (SkippableLongCODEC c : codecs) {
4344
System.out.println("[SkippeableBasicTest.consistentTest] codec = "
4445
+ c);
45-
long[] outBuf = new long[N + 1024];
4646
for (int n = 0; n <= N; ++n) {
4747
IntWrapper inPos = new IntWrapper();
4848
IntWrapper outPos = new IntWrapper();
49+
long[] outBuf = new long[c.maxHeadlessCompressedLength(new IntWrapper(0), n)];
50+
4951
c.headlessCompress(data, inPos, n, outBuf, outPos);
5052

5153
IntWrapper inPoso = new IntWrapper();
@@ -142,5 +144,27 @@ public void varyingLengthTest2() {
142144
}
143145
}
144146

147+
@Test
148+
public void testMaxHeadlessCompressedLength() {
149+
testMaxHeadlessCompressedLength(new LongJustCopy(), 128);
150+
testMaxHeadlessCompressedLength(new LongBinaryPacking(), 16 * LongBinaryPacking.BLOCK_SIZE);
151+
testMaxHeadlessCompressedLength(new LongVariableByte(), 128);
152+
testMaxHeadlessCompressedLength(new SkippableLongComposition(new LongBinaryPacking(), new LongVariableByte()), 16 * LongBinaryPacking.BLOCK_SIZE + 10);
153+
}
154+
155+
private static void testMaxHeadlessCompressedLength(SkippableLongCODEC codec, int inlengthTo) {
156+
for (int inlength = 0; inlength < inlengthTo; ++inlength) {
157+
long[] input = new long[inlength];
158+
Arrays.fill(input, -1L);
145159

160+
int maxOutputLength = codec.maxHeadlessCompressedLength(new IntWrapper(), inlength);
161+
long[] output = new long[maxOutputLength];
162+
IntWrapper outPos = new IntWrapper();
163+
164+
codec.headlessCompress(input, new IntWrapper(), inlength, output, outPos);
165+
// If we reach this point, no exception was thrown, which means the calculated output length was sufficient.
166+
167+
assertTrue(maxOutputLength <= outPos.get() + 1); // +1 because SkippableLongComposition always adds one extra integer for the potential header
168+
}
169+
}
146170
}

0 commit comments

Comments
 (0)