From 282322a5b04f216ffc2178dbc8ded5e0cf8bb145 Mon Sep 17 00:00:00 2001 From: Chris Norman Date: Tue, 14 Jan 2025 17:35:38 -0500 Subject: [PATCH] Conform to updated interop test structure. --- .../cram/compression/fqzcomp/FQZModels.java | 2 +- .../cram/compression/fqzcomp/FQZParams.java | 1 - .../cram/compression/fqzcomp/FQZState.java | 2 +- .../nametokenisation/TokenStreams.java | 57 +----------- .../cram/compression/range/RangeDecode.java | 1 - .../samtools/cram/CRAMInteropTestUtils.java | 91 ++++++++++--------- .../samtools/cram/FQZCompInteropTest.java | 8 +- .../cram/NameTokenizationInteropTest.java | 82 ++++++----------- .../htsjdk/samtools/cram/RANSInteropTest.java | 73 +++++++-------- .../samtools/cram/RangeInteropTest.java | 56 ++++-------- 10 files changed, 135 insertions(+), 238 deletions(-) diff --git a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZModels.java b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZModels.java index aa5338fba5..696971888a 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZModels.java +++ b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZModels.java @@ -17,7 +17,7 @@ public FQZModels(final FQZParams fqzParams) { } length = new ByteModel[4]; for (int i = 0; i < 4; i++) { - length[i] = new ByteModel(256); //TODO: Magic 256 ? + length[i] = new ByteModel(256); } reverse = new ByteModel(2); duplicate = new ByteModel(2); diff --git a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZParams.java b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZParams.java index 68a4dac327..713f28fb8a 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZParams.java +++ b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZParams.java @@ -5,7 +5,6 @@ import java.util.List; public class FQZParams { - //TODO: share this with the decoder/encoder private static final int NUMBER_OF_SYMBOLS = 256; final FQZGlobalFlags fqzFlags; diff --git a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZState.java b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZState.java index 08474a64d6..4012aa797f 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZState.java +++ b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZState.java @@ -38,7 +38,7 @@ public class FQZState { public void setSelector(int selector) { this.selector = selector; } //TODO: it is super confusing that this is called selectorTable, since that is also a thing in FQZParams as well, - // but there it is an array - is this the index ? + // but there it is an array public int getSelectorTable() { return selectorTable; } public void setSelectorTable(int selectorTable) { this.selectorTable = selectorTable; } diff --git a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/TokenStreams.java b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/TokenStreams.java index e463181fd0..3989a6abe8 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/TokenStreams.java +++ b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/TokenStreams.java @@ -95,67 +95,14 @@ public TokenStreams(final ByteBuffer inputByteBuffer, final int useArith, final final ByteBuffer uncompressedTokenStream; if (useArith != 0) { final RangeDecode rangeDecode = new RangeDecode(); - uncompressedTokenStream = rangeDecode.uncompress(ByteBuffer.wrap(compressedTokenStream)); + uncompressedTokenStream = rangeDecode.uncompress(CompressionUtils.wrap(compressedTokenStream)); } else { final RANSNx16Decode ransDecode = new RANSNx16Decode(); - uncompressedTokenStream = ransDecode.uncompress(ByteBuffer.wrap(compressedTokenStream)); + uncompressedTokenStream = ransDecode.uncompress(CompressionUtils.wrap(compressedTokenStream)); } getStreamsForPos(tokenPosition)[tokenType] = uncompressedTokenStream; } } - //displayTokenStreamSizes(); - } - - private void displayTokenStreamSizes() { - for (int t = 0; t < TOTAL_TOKEN_TYPES; t++) { - System.out.print(String.format("%s ", typeToString(t))); - } - System.out.println(); - for (int pos = 0; pos < tokenStreams.length; pos++) { - System.out.print(String.format("Pos %2d: ", pos)); - for (int typ = 0; typ < tokenStreams[pos].length; typ++) { - final ByteBuffer bf = tokenStreams[pos][typ]; - if (bf == null) { - System.out.print(String.format("%8s", "null")); - } else { - System.out.print(String.format("%8d", bf.limit())); - } - } - System.out.println(); - } - } - - private String typeToString(int i) { - switch (i) { - case TOKEN_TYPE: - return "TOKEN_TYPE"; - case TOKEN_STRING: - return "TOKEN_STRING"; - case TOKEN_CHAR: - return "TOKEN_CHAR"; - case TOKEN_DIGITS0: - return "TOKEN_DIGITS0"; - case TOKEN_DZLEN: - return "TOKEN_DZLEN"; - case TOKEN_DUP: - return "TOKEN_DUP"; - case TOKEN_DIFF: - return "TOKEN_DIFF"; - case TOKEN_DIGITS: - return "TOKEN_DIGITS"; - case TOKEN_DELTA: - return "TOKEN_DELTA"; - case TOKEN_DELTA0: - return "TOKEN_DELTA0"; - case TOKEN_MATCH: - return "TOKEN_MATCH"; - case TOKEN_END: - return "TOKEN_END"; - case TOKEN_NOP: - return "NOP"; - default: - throw new CRAMException("Invalid name tokenizer tokenType: " + i); - } } public ByteBuffer[] getStreamsForPos(final int pos) { diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java index a00af4ab94..046a062821 100644 --- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java +++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeDecode.java @@ -107,7 +107,6 @@ private ByteBuffer uncompress(final ByteBuffer inBuffer, final int outSize) { } outBuffer.rewind(); return outBuffer; - } private void uncompressOrder0( diff --git a/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java index 1496df8558..11e2c790a2 100644 --- a/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java +++ b/src/test/java/htsjdk/samtools/cram/CRAMInteropTestUtils.java @@ -9,18 +9,60 @@ import java.util.List; /** - * Interop test data originates in a separate repository, currently at https://github.com/samtools/htslib, in htscodecs, - * but we keep a copy in htsjdk so we can use it for round trip tests in CI without needing to clone a second repo. + * Interop test data originates in a separate repository, currently at hts-specs/test, so we keep a copy in + * htsjdk so we can use it for round trip tests in CI without needing to clone a second repo. */ public class CRAMInteropTestUtils { - public static final String INTEROP_TEST_FILES_PATH = "src/test/resources/htsjdk/samtools/cram/htslib_interop/"; + public static final String INTEROP_TEST_FILES_PATH = "src/test/resources/htsjdk/hts-specs/test/cram/codecs"; + public static final String GZIP_PATH = "gzip/"; + public static final String GZIP_SUFFIX = ".gz"; /** * @return the name and location of the local interop test data as specified by the * variable INTEROP_TEST_FILES_PATH */ - public static Path getCRAM31_Htslib_InteropTestDataLocation() { - return Paths.get(INTEROP_TEST_FILES_PATH + "cram31/"); + public static Path getCRAMInteropTestDataLocation() { + return Paths.get(INTEROP_TEST_FILES_PATH); + } + + // return a list of all encoded test data files in the interop/ directory + protected static List getCRAMInteropCompressedPaths(final String compressedDir) throws IOException { + final List paths = new ArrayList<>(); + Files.newDirectoryStream( + CRAMInteropTestUtils.getCRAMInteropTestDataLocation().resolve(compressedDir), + path -> Files.isRegularFile(path)) + .forEach(path -> paths.add(path)); + return paths; + } + + // Given a compressed test file path, return the corresponding uncompressed file path + protected static final Path getUnCompressedPathForCompressedPath(final Path compressedInteropPath) { + final String uncompressedFileName = getUncompressedFileName(compressedInteropPath); + return compressedInteropPath.getParent().getParent() + .resolve(CRAMInteropTestUtils.GZIP_PATH + uncompressedFileName); + } + + private static final String getUncompressedFileName(final Path compressedPath) { + // Returns original filename from compressed file name + final String fileName = compressedPath.getFileName().toString(); + final int lastDotIndex = fileName.lastIndexOf("."); + if (lastDotIndex >= 0) { + final String compressedFileName = fileName.substring(0, lastDotIndex); + return compressedFileName + CRAMInteropTestUtils.GZIP_SUFFIX; + } else { + throw new CRAMException("The name of the compressed file should contain a period followed by a number that" + + "indicates the order of compression. Actual compressed file name = "+ fileName); + } + } + + // return a list of all raw test files in the hts-specs interop test directory (these files are raw in + // the sense that they are not compressed by any CRAM codec, although they ARE gzip compressed for inclusion + // in the repo) + protected static final List getRawCRAMInteropTestFiles() throws IOException { + final List paths = new ArrayList<>(); + Files.newDirectoryStream(CRAMInteropTestUtils.getCRAMInteropTestDataLocation().resolve(GZIP_PATH)) + .forEach(path -> paths.add(path)); + return paths; } // the input files have embedded newlines that the test remove before round-tripping... @@ -45,43 +87,4 @@ protected static final byte[] filterEmbeddedNewlines(final byte[] rawBytes) thro } } - // return a list of all encoded test data files in the htscodecs/tests/dat/ directory - protected static List getInteropCompressedFilePaths(final String compressedDir) throws IOException { - final List paths = new ArrayList<>(); - Files.newDirectoryStream( - CRAMInteropTestUtils.getCRAM31_Htslib_InteropTestDataLocation().resolve("dat/"+compressedDir), - path -> Files.isRegularFile(path)) - .forEach(path -> paths.add(path)); - return paths; - } - - // Given a compressed test file path, return the corresponding uncompressed file path - protected static final Path getUnCompressedFilePath(final Path compressedInteropPath) { - final String uncompressedFileName = getUncompressedFileName(compressedInteropPath.getFileName().toString()); - // Example compressedInteropPath: ../dat/r4x8/q4.1 => unCompressedFilePath: ../dat/q4 - return compressedInteropPath.getParent().getParent().resolve(uncompressedFileName); - } - - private static final String getUncompressedFileName(final String compressedFileName) { - // Returns original filename from compressed file name - final int lastDotIndex = compressedFileName.lastIndexOf("."); - if (lastDotIndex >= 0) { - return compressedFileName.substring(0, lastDotIndex); - } else { - throw new CRAMException("The format of the compressed File Name is not as expected. " + - "The name of the compressed file should contain a period followed by a number that" + - "indicates the order of compression. Actual compressed file name = "+ compressedFileName); - } - } - - // return a list of all raw test files in the htscodecs/tests/dat directory - protected static final List getInteropRawTestFiles() throws IOException { - final List paths = new ArrayList<>(); - Files.newDirectoryStream( - CRAMInteropTestUtils.getCRAM31_Htslib_InteropTestDataLocation().resolve("dat"), - path -> (Files.isRegularFile(path)) && !Files.isHidden(path)) - .forEach(path -> paths.add(path)); - return paths; - } - } \ No newline at end of file diff --git a/src/test/java/htsjdk/samtools/cram/FQZCompInteropTest.java b/src/test/java/htsjdk/samtools/cram/FQZCompInteropTest.java index 884b51dbed..343d1a0e2c 100644 --- a/src/test/java/htsjdk/samtools/cram/FQZCompInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/FQZCompInteropTest.java @@ -18,6 +18,7 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.List; +import java.util.zip.GZIPInputStream; public class FQZCompInteropTest extends HtsjdkTest { @@ -31,10 +32,10 @@ public Object[][] getDecodeOnlyTestCases() throws IOException { // compressed testfile path, uncompressed testfile path, // FQZComp decoder final List testCases = new ArrayList<>(); - for (Path path : CRAMInteropTestUtils.getInteropCompressedFilePaths(COMPRESSED_FQZCOMP_DIR)) { + for (Path path : CRAMInteropTestUtils.getCRAMInteropCompressedPaths(COMPRESSED_FQZCOMP_DIR)) { Object[] objects = new Object[]{ path, - CRAMInteropTestUtils.getUnCompressedFilePath(path), + CRAMInteropTestUtils.getUnCompressedPathForCompressedPath(path), new FQZCompDecode() }; testCases.add(objects); @@ -49,7 +50,8 @@ public void testDecodeOnly( final Path compressedFilePath, final Path uncompressedInteropPath, final FQZCompDecode fqzcompDecode) throws IOException { - try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath); + try (final InputStream uncompressedInteropStream = + new GZIPInputStream(Files.newInputStream(uncompressedInteropPath)); final InputStream preCompressedInteropStream = Files.newInputStream(compressedFilePath) ) { // preprocess the uncompressed data (to match what the htscodecs-library test harness does) diff --git a/src/test/java/htsjdk/samtools/cram/NameTokenizationInteropTest.java b/src/test/java/htsjdk/samtools/cram/NameTokenizationInteropTest.java index d1dac98405..315cd53f40 100644 --- a/src/test/java/htsjdk/samtools/cram/NameTokenizationInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/NameTokenizationInteropTest.java @@ -15,26 +15,28 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.List; +import java.util.zip.GZIPInputStream; -// Test the roundtrip and decompression of name tokenization encoded data using the htslib cram interop stream +// Test the roundtrip and decompression of name tokenization encoded data using the hts-specs cram interop stream // data for the name tokenization codec. public class NameTokenizationInteropTest extends HtsjdkTest { - public static final String COMPRESSED_TOK_DIR = "tok3"; + public static final String COMPRESSED_TOK_DIR = "tok3/"; - // the htslib cram interop tests streams use this separator in the raw (uncompressed) streams to separate + // the hts-specs cram interop tests streams use this separator in the raw (uncompressed) streams to separate // the read names that are passed into/out of the name tokenization codec, but htsjdk uses '\0' because - // the downstream htsjdk cram code assumes that value; so in the interop tests we need to replace the htslib + // the downstream htsjdk cram code assumes that value; so in the interop tests we need to replace the hts-specs // separator with the corresponding name tokenization separator used by htsjdk - public static final byte HTSLIB_NAME_SEPARATOR = '\n'; + public static final byte HTS_SPECS_NAME_SEPARATOR = '\n'; @DataProvider(name = "allNameTokInteropTests") public Object[][] getAllNameTokenizationInteropTests() throws IOException { // raw (unCompressed) path, useArith final List testCases = new ArrayList<>(); - for (final Path preCompressedInteropPath : getPreCompressedInteropNameTokTestPaths()) { + for (final Path preCompressedInteropPath : CRAMInteropTestUtils.getCRAMInteropCompressedPaths(COMPRESSED_TOK_DIR)) { for (boolean useArith: new boolean[]{true, false}) { testCases.add(new Object[] { - unCompressedPathFromPreCompressedPath(preCompressedInteropPath), + //unCompressedPathFromPreCompressedPath(preCompressedInteropPath), + CRAMInteropTestUtils.getUnCompressedPathForCompressedPath(preCompressedInteropPath), useArith }); } @@ -48,9 +50,9 @@ public void testNameTokRoundTrip( final Path unCompressedInteropPath, final boolean useArith) throws IOException { - try (final InputStream unCompressedInteropStream = Files.newInputStream(unCompressedInteropPath)) { - // convert the uncompressed data from htslib to the unCompressed format used to pass data in/out of the htsjdk name tok codec - final ByteBuffer unCompressedInteropBytes = convertHTSLIBToHTSJDKStreamFormat( + try (final InputStream unCompressedInteropStream = new GZIPInputStream(Files.newInputStream(unCompressedInteropPath))) { + // convert the uncompressed data from hts-specs to the unCompressed format used to pass data in/out of the htsjdk name tok codec + final ByteBuffer unCompressedInteropBytes = convertHTSSpecsToHTSJDKStreamFormat( ByteBuffer.wrap(IOUtils.toByteArray(unCompressedInteropStream)), NameTokenisationDecode.NAME_SEPARATOR ); @@ -78,79 +80,51 @@ public void testNameTokRoundTrip( @DataProvider(name = "uncompressNameTokInteropTests") public Object[][] getUncompressNameTokInteropTests() throws IOException { - // compressed path (htslib interop preCompressed file), raw (unCompressed) path, useArith (used for round tripping only) + // compressed path (hts-specs interop preCompressed file), raw (unCompressed) path, useArith (used for round tripping only) final List testCases = new ArrayList<>(); - for (final Path preCompressedInteropPath : getPreCompressedInteropNameTokTestPaths()) { + for (final Path preCompressedInteropPath : CRAMInteropTestUtils.getCRAMInteropCompressedPaths(COMPRESSED_TOK_DIR)) { testCases.add(new Object[] { preCompressedInteropPath, - unCompressedPathFromPreCompressedPath(preCompressedInteropPath) + //unCompressedPathFromPreCompressedPath(preCompressedInteropPath) + CRAMInteropTestUtils.getUnCompressedPathForCompressedPath(preCompressedInteropPath) }); } return testCases.toArray(new Object[][]{}); } @Test (dataProvider = "uncompressNameTokInteropTests", - description = "Uncompress the htslib compressed file using htsjdk and compare it with the uncompressed file.") + description = "Uncompress the hts-specs compressed file using htsjdk and compare it with the uncompressed file.") public void testNameTokUnCompress( final Path preCompressedInteropPath, final Path unCompressedInteropPath) throws IOException { try (final InputStream preCompressedInteropStream = Files.newInputStream(preCompressedInteropPath); - final InputStream unCompressedInteropStream = Files.newInputStream(unCompressedInteropPath)) { + final InputStream unCompressedInteropStream = new GZIPInputStream(Files.newInputStream(unCompressedInteropPath))) { final ByteBuffer preCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInteropStream)); - // convert the uncompressed data from htslib to the unCompressed format used to pass data in/out of the htsjdk name tok codec - final ByteBuffer uncompressedInteropBytes = convertHTSLIBToHTSJDKStreamFormat( + // convert the uncompressed data from hts-specs to the unCompressed format used to pass data in/out of the htsjdk name tok codec + final ByteBuffer uncompressedInteropBytes = convertHTSSpecsToHTSJDKStreamFormat( ByteBuffer.wrap(IOUtils.toByteArray(unCompressedInteropStream)), NameTokenisationDecode.NAME_SEPARATOR ); - // Use htsjdk to uncompress the precompressed file from htscodecs repo + // Use htsjdk to uncompress the precompressed file from hts-specs repo final NameTokenisationDecode nameTokenisationDecode = new NameTokenisationDecode(); final ByteBuffer uncompressedHtsjdkBytes = ByteBuffer.wrap( nameTokenisationDecode.uncompress(preCompressedInteropBytes, NameTokenisationDecode.NAME_SEPARATOR) ); - // Compare the htsjdk uncompressed bytes with the original input file from htscodecs repo + // Compare the htsjdk uncompressed bytes with the original input file from hts-specs repo Assert.assertEquals(uncompressedHtsjdkBytes, uncompressedInteropBytes); } } - // return a list of all NameTokenization encoded test data files in the htscodecs/tests/names/tok3 directory - private List getPreCompressedInteropNameTokTestPaths() throws IOException { - final List paths = new ArrayList<>(); - Files.newDirectoryStream( - CRAMInteropTestUtils.getCRAM31_Htslib_InteropTestDataLocation().resolve("names/" + COMPRESSED_TOK_DIR), - path -> Files.isRegularFile(path)) - .forEach(path -> paths.add(path)); - return paths; - } - - // Given a compressed test file path, return the corresponding uncompressed file path - private static Path unCompressedPathFromPreCompressedPath(final Path preCompressedInteropPath) { - final String uncompressedFileName = getUnCompressedFileNameFromCompressedFileName(preCompressedInteropPath.getFileName().toString()); - // Example compressedInteropPath: ../names/tok3/01.names.1 => unCompressedFilePath: ../names/01.names - return preCompressedInteropPath.getParent().getParent().resolve(uncompressedFileName); - } - - // Return the name of the unCompressed interop test filethat corresponds to a preCompressed interop file - private static String getUnCompressedFileNameFromCompressedFileName(final String preCompressedFileName) { - int lastDotIndex = preCompressedFileName.lastIndexOf("."); - if (lastDotIndex >= 0) { - return preCompressedFileName.substring(0, lastDotIndex); - } else { - throw new CRAMException("The format of the compressed File Name is not as expected. " + - "The name of the compressed file should contain a period followed by a number that" + - "indicates type of compression. Actual compressed file name = "+ preCompressedFileName); - } - } - - // translate an htslib interop stream into the stream format used by the htsjdk name tokenization codec - private ByteBuffer convertHTSLIBToHTSJDKStreamFormat(final ByteBuffer htslibBuffer, final byte newSeparator) { - final ByteBuffer translatedBuffer = ByteBuffer.allocate(htslibBuffer.limit()); - for (int i = 0; i < htslibBuffer.limit(); i++) { - if (htslibBuffer.get(i) == HTSLIB_NAME_SEPARATOR) { + // translate an hts-specs interop stream into the stream format used by the htsjdk name tokenization codec + private ByteBuffer convertHTSSpecsToHTSJDKStreamFormat(final ByteBuffer htsSpecsBuffer, final byte newSeparator) { + final ByteBuffer translatedBuffer = ByteBuffer.allocate(htsSpecsBuffer.limit()); + for (int i = 0; i < htsSpecsBuffer.limit(); i++) { + if (htsSpecsBuffer.get(i) == HTS_SPECS_NAME_SEPARATOR) { translatedBuffer.put(i, newSeparator); } else { - translatedBuffer.put(i, htslibBuffer.get(i)); + translatedBuffer.put(i, htsSpecsBuffer.get(i)); } } return translatedBuffer; diff --git a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java index bf4e9439e4..37f459f46b 100644 --- a/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RANSInteropTest.java @@ -27,14 +27,15 @@ import java.util.Arrays; import java.util.List; import java.util.stream.Stream; +import java.util.zip.GZIPInputStream; /** * RANSInteropTest tests if the htsjdk RANS4x8 and RANSNx16 implementations are interoperable * with the htslib implementations. The test files for Interop tests are from the htslib repo. */ public class RANSInteropTest extends HtsjdkTest { - public static final String COMPRESSED_RANS4X8_DIR = "r4x8"; - public static final String COMPRESSED_RANSNX16_DIR = "r4x16"; + public static final String COMPRESSED_RANS4X8_DIR = "rans4x8"; + public static final String COMPRESSED_RANSNX16_DIR = "ransNx16"; // The cross product of all interop test files with all the various parameter combinations currently results in // about 80 test cases in total. Serially spinning up so many rans encoders/decoders can result in memory pressure @@ -50,17 +51,15 @@ public class RANSInteropTest extends HtsjdkTest { private RANSNx16Encode ransNx16Encoder = new RANSNx16Encode(); private RANSNx16Decode ransNx16Decoder = new RANSNx16Decode(); - // enumerates the different flag combinations public Object[][] get4x8RoundTripTestCases() throws IOException { - - // params: - // uncompressed testfile path, - // RANS encoder, RANS decoder, RANS params + // uncompressed testfile path, RANS encoder, RANS decoder, RANS params final List rans4x8ParamsOrderList = Arrays.asList( RANSParams.ORDER.ZERO, RANSParams.ORDER.ONE); final List testCases = new ArrayList<>(); - CRAMInteropTestUtils.getInteropRawTestFiles() + // opportunistically use ALL the raw test files in the interop directory, including ones intended to test + // the other codecs; from the rans perspective they're just a stream of bits + CRAMInteropTestUtils.getRawCRAMInteropTestFiles() .forEach(path -> rans4x8ParamsOrderList.stream().map(rans4x8ParamsOrder -> new Object[]{ path, @@ -73,10 +72,7 @@ public Object[][] get4x8RoundTripTestCases() throws IOException { // enumerates the different flag combinations public Object[][] getNx16RoundTripTestCases() throws IOException { - - // params: - // uncompressed testfile path, - // RANS encoder, RANS decoder, RANS params + // uncompressed testfile path, RANS encoder, RANS decoder, RANS params final List ransNx16ParamsFormatFlagList = Arrays.asList( 0x00, RANSNx16Params.ORDER_FLAG_MASK, @@ -93,7 +89,9 @@ public Object[][] getNx16RoundTripTestCases() throws IOException { RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK, RANSNx16Params.RLE_FLAG_MASK | RANSNx16Params.PACK_FLAG_MASK | RANSNx16Params.ORDER_FLAG_MASK); final List testCases = new ArrayList<>(); - CRAMInteropTestUtils.getInteropRawTestFiles() + // opportunistically gets ALL the raw test files in the interop directory, including ones intended to test + // the other codecs; from the rans perspective they're just a stream of bits + CRAMInteropTestUtils.getRawCRAMInteropTestFiles() .forEach(path -> ransNx16ParamsFormatFlagList.stream().map(ransNx16ParamsFormatFlag -> new Object[]{ path, @@ -104,17 +102,13 @@ public Object[][] getNx16RoundTripTestCases() throws IOException { return testCases.toArray(new Object[][]{}); } - // uses the available compressed interop test files public Object[][] get4x8DecodeOnlyTestCases() throws IOException { - - // params: - // compressed testfile path, uncompressed testfile path, - // RANS decoder + // compressed testfile path, uncompressed testfile path, RANS decoder final List testCases = new ArrayList<>(); - for (Path path : CRAMInteropTestUtils.getInteropCompressedFilePaths(COMPRESSED_RANS4X8_DIR)) { + for (Path path : CRAMInteropTestUtils.getCRAMInteropCompressedPaths(COMPRESSED_RANS4X8_DIR)) { Object[] objects = new Object[]{ path, - CRAMInteropTestUtils.getUnCompressedFilePath(path), + CRAMInteropTestUtils.getUnCompressedPathForCompressedPath(path), rans4x8Decoder }; testCases.add(objects); @@ -122,17 +116,13 @@ public Object[][] get4x8DecodeOnlyTestCases() throws IOException { return testCases.toArray(new Object[][]{}); } - // uses the available compressed interop test files public Object[][] getNx16DecodeOnlyTestCases() throws IOException { - - // params: - // compressed testfile path, uncompressed testfile path, - // RANS decoder + // compressed testfile path, uncompressed testfile path, RANS decoder final List testCases = new ArrayList<>(); - for (Path path : CRAMInteropTestUtils.getInteropCompressedFilePaths(COMPRESSED_RANSNX16_DIR)) { + for (Path path : CRAMInteropTestUtils.getCRAMInteropCompressedPaths(COMPRESSED_RANSNX16_DIR)) { Object[] objects = new Object[]{ path, - CRAMInteropTestUtils.getUnCompressedFilePath(path), + CRAMInteropTestUtils.getUnCompressedPathForCompressedPath(path), ransNx16Decoder }; testCases.add(objects); @@ -142,20 +132,14 @@ public Object[][] getNx16DecodeOnlyTestCases() throws IOException { @DataProvider(name = "roundTripTestCases") public Object[][] getRoundTripTestCases() throws IOException { - - // params: - // uncompressed testfile path, - // RANS encoder, RANS decoder, RANS params + // uncompressed testfile path, RANS encoder, RANS decoder, RANS params return Stream.concat(Arrays.stream(get4x8RoundTripTestCases()), Arrays.stream(getNx16RoundTripTestCases())) .toArray(Object[][]::new); } @DataProvider(name = "decodeOnlyTestCases") public Object[][] getDecodeOnlyTestCases() throws IOException { - - // params: - // compressed testfile path, uncompressed testfile path, - // RANS decoder + // compressed testfile path, uncompressed testfile path, RANS decoder return Stream.concat(Arrays.stream(get4x8DecodeOnlyTestCases()), Arrays.stream(getNx16DecodeOnlyTestCases())) .toArray(Object[][]::new); } @@ -173,7 +157,10 @@ public void testRANSRoundTrip( // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through RANS and compare the // results - final ByteBuffer uncompressedInteropBytes = CompressionUtils.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + final ByteBuffer uncompressedInteropBytes = + CompressionUtils.wrap( + CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream)) + ); // Stripe Flag is not implemented in RANSNx16 Encoder. // The encoder throws CRAMException if Stripe Flag is used. @@ -194,17 +181,19 @@ public void testDecodeOnly( final Path compressedFilePath, final Path uncompressedInteropPath, final RANSDecode ransDecode) throws IOException { - try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath); - final InputStream preCompressedInteropStream = Files.newInputStream(compressedFilePath) - ) { - + try (final InputStream uncompressedInteropStream = + new GZIPInputStream(Files.newInputStream(uncompressedInteropPath)); + final InputStream preCompressedInteropStream = Files.newInputStream(compressedFilePath)) { // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through RANS and compare the // results - final ByteBuffer uncompressedInteropBytes = CompressionUtils.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + final ByteBuffer uncompressedInteropBytes = + uncompressedInteropPath.toString().endsWith("u32" + CRAMInteropTestUtils.GZIP_SUFFIX) ? + ByteBuffer.wrap(IOUtils.toByteArray(uncompressedInteropStream)) : + ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); final ByteBuffer preCompressedInteropBytes = CompressionUtils.wrap(IOUtils.toByteArray(preCompressedInteropStream)); - // Use htsjdk to uncompress the precompressed file from htscodecs repo + // Use htsjdk to uncompress the precompressed file from hts-spec repo final ByteBuffer uncompressedHtsjdkBytes = ransDecode.uncompress(preCompressedInteropBytes); // Compare the htsjdk uncompressed bytes with the original input file from htscodecs repo diff --git a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java index 36bd28a9a0..967271091b 100644 --- a/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java +++ b/src/test/java/htsjdk/samtools/cram/RangeInteropTest.java @@ -19,20 +19,15 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import java.util.zip.GZIPInputStream; -//TODO: interop test failures: -// u32.4, u32.65, u32.1, u32.9 -// public class RangeInteropTest extends HtsjdkTest { - public static final String COMPRESSED_RANGE_DIR = "arith"; + public static final String COMPRESSED_RANGE_DIR = "range"; // enumerates the different flag combinations @DataProvider(name = "roundTripTestCases") public Object[][] getRoundTripTestCases() throws IOException { - - // params: - // uncompressed testfile path, - // Range encoder, Range decoder, Range params + // uncompressed testfile path, Range params final List rangeParamsFormatFlagList = Arrays.asList( 0x00, RangeParams.ORDER_FLAG_MASK, @@ -47,32 +42,24 @@ public Object[][] getRoundTripTestCases() throws IOException { RangeParams.EXT_FLAG_MASK, RangeParams.EXT_FLAG_MASK | RangeParams.PACK_FLAG_MASK); final List testCases = new ArrayList<>(); - CRAMInteropTestUtils.getInteropRawTestFiles() + //note that for the roundtrip tests, we're retrieving ALL of the raw test files in the interop directory, + // *including* the ones intended for the other codecs such as name tok, etc., but the range codec should + // be tolerant of any stream of data + CRAMInteropTestUtils.getRawCRAMInteropTestFiles() .forEach(path -> rangeParamsFormatFlagList.stream().map(rangeParamsFormatFlag -> new Object[]{ path, - new RangeEncode(), - new RangeDecode(), new RangeParams(rangeParamsFormatFlag) }).forEach(testCases::add)); return testCases.toArray(new Object[][]{}); } - // uses the available compressed interop test files @DataProvider(name = "decodeOnlyTestCases") public Object[][] getDecodeOnlyTestCases() throws IOException { - - // params: // compressed testfile path, uncompressed testfile path, - // Range decoder final List testCases = new ArrayList<>(); - for (Path path : CRAMInteropTestUtils.getInteropCompressedFilePaths(COMPRESSED_RANGE_DIR)) { - Object[] objects = new Object[]{ - path, - CRAMInteropTestUtils.getUnCompressedFilePath(path), - new RangeDecode() - }; - testCases.add(objects); + for (Path path : CRAMInteropTestUtils.getCRAMInteropCompressedPaths(COMPRESSED_RANGE_DIR)) { + testCases.add(new Object[] { path, CRAMInteropTestUtils.getUnCompressedPathForCompressedPath(path) }); } return testCases.toArray(new Object[][]{}); } @@ -80,18 +67,16 @@ public Object[][] getDecodeOnlyTestCases() throws IOException { @Test ( dataProvider = "roundTripTestCases", description = "Roundtrip using htsjdk Range Codec. Compare the output with the original file" ) - public void testRangeRoundTrip( - final Path uncompressedFilePath, - final RangeEncode rangeEncode, - final RangeDecode rangeDecode, - final RangeParams params) throws IOException { - try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedFilePath)) { - + public void testRangeRoundTrip(final Path uncompressedFilePath, final RangeParams params) throws IOException { + try (final InputStream uncompressedInteropStream = new GZIPInputStream(Files.newInputStream(uncompressedFilePath))) { // preprocess the uncompressed data (to match what the htscodecs-library test harness does) // by filtering out the embedded newlines, and then round trip through Range codec and compare the // results - final ByteBuffer uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + final ByteBuffer uncompressedInteropBytes = + ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); + final RangeDecode rangeDecode = new RangeDecode(); + final RangeEncode rangeEncode = new RangeEncode(); if (params.isStripe()) { Assert.assertThrows(CRAMException.class, () -> rangeEncode.compress(uncompressedInteropBytes, params)); } else { @@ -105,11 +90,9 @@ public void testRangeRoundTrip( @Test ( dataProvider = "decodeOnlyTestCases", description = "Uncompress the existing compressed file using htsjdk Range codec and compare it with the original file.") - public void testDecodeOnly( - final Path compressedFilePath, - final Path uncompressedInteropPath, - final RangeDecode rangeDecode) throws IOException { - try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath); + public void testDecodeOnly(final Path compressedFilePath, final Path uncompressedInteropPath) throws IOException { + try (final InputStream uncompressedInteropStream = + new GZIPInputStream(Files.newInputStream(uncompressedInteropPath)); final InputStream preCompressedInteropStream = Files.newInputStream(compressedFilePath) ) { // preprocess the uncompressed data (to match what the htscodecs-library test harness does) @@ -117,7 +100,7 @@ public void testDecodeOnly( // and compare the results final ByteBuffer uncompressedInteropBytes; - if (uncompressedInteropPath.toString().endsWith("dat/u32")) { + if (uncompressedInteropPath.toString().endsWith("u32" + CRAMInteropTestUtils.GZIP_SUFFIX)) { uncompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(uncompressedInteropStream)); } else { uncompressedInteropBytes = ByteBuffer.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream))); @@ -125,6 +108,7 @@ public void testDecodeOnly( final ByteBuffer preCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInteropStream)); // Use htsjdk to uncompress the precompressed file from htscodecs repo + final RangeDecode rangeDecode = new RangeDecode(); final ByteBuffer uncompressedHtsjdkBytes = rangeDecode.uncompress(preCompressedInteropBytes); // Compare the htsjdk uncompressed bytes with the original input file from htscodecs repo