From b73b922e18ad9395ccd2c681ed7cf47ac87ede60 Mon Sep 17 00:00:00 2001
From: yash-puligundla <yasasvini.puligundla@ga4gh.org>
Date: Wed, 7 Feb 2024 13:55:16 -0500
Subject: [PATCH] Add FQZComp Decoder

---
 .../compression/fqzcomp/FQZCompDecode.java    | 301 ++++++++++++++++++
 .../compression/fqzcomp/FQZGlobalFlags.java   |  28 ++
 .../cram/compression/fqzcomp/FQZModel.java    |  56 ++++
 .../cram/compression/fqzcomp/FQZParam.java    | 205 ++++++++++++
 .../cram/compression/fqzcomp/FQZState.java    |  88 +++++
 .../NameTokenisationDecode.java               | 164 ++++++++++
 .../NameTokenisationEncode.java               | 287 +++++++++++++++++
 .../nametokenisation/TokenStreams.java        | 125 ++++++++
 .../nametokenisation/tokens/EncodeToken.java  |  38 +++
 .../cram/compression/range/RangeCoder.java    |   4 +-
 .../samtools/cram/FQZCompInteropTest.java     |  80 +++++
 .../cram/NameTokenizationInteropTest.java     | 134 ++++++++
 .../NameTokenisationTest.java                 | 100 ++++++
 13 files changed, 1608 insertions(+), 2 deletions(-)
 create mode 100644 src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZCompDecode.java
 create mode 100644 src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZGlobalFlags.java
 create mode 100644 src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZModel.java
 create mode 100644 src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZParam.java
 create mode 100644 src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZState.java
 create mode 100644 src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationDecode.java
 create mode 100644 src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationEncode.java
 create mode 100644 src/main/java/htsjdk/samtools/cram/compression/nametokenisation/TokenStreams.java
 create mode 100644 src/main/java/htsjdk/samtools/cram/compression/nametokenisation/tokens/EncodeToken.java
 create mode 100644 src/test/java/htsjdk/samtools/cram/FQZCompInteropTest.java
 create mode 100644 src/test/java/htsjdk/samtools/cram/NameTokenizationInteropTest.java
 create mode 100644 src/test/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationTest.java

diff --git a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZCompDecode.java b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZCompDecode.java
new file mode 100644
index 0000000000..5933a7bcaf
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZCompDecode.java
@@ -0,0 +1,301 @@
+package htsjdk.samtools.cram.compression.fqzcomp;
+
+import htsjdk.samtools.cram.CRAMException;
+import htsjdk.samtools.cram.compression.CompressionUtils;
+import htsjdk.samtools.cram.compression.range.ByteModel;
+import htsjdk.samtools.cram.compression.range.RangeCoder;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+public class FQZCompDecode {
+    private static final int NUMBER_OF_SYMBOLS = 256;
+
+    public static ByteBuffer uncompress( final ByteBuffer inBuffer) {
+        final int bufferLength = CompressionUtils.readUint7(inBuffer);
+        final int version = inBuffer.get() & 0xFF;
+        if (version != 5) {
+            throw new CRAMException("Invalid FQZComp format version number: " + version);
+        }
+        final FQZGlobalFlags globalFlags = new FQZGlobalFlags(inBuffer.get() & 0xFF);
+        final int numParamBlock = globalFlags.isMultiParam()?inBuffer.get() : 1;
+        int maxSelector = (numParamBlock > 1) ? (numParamBlock - 1) : 0;
+        final int[] selectorTable = new int[NUMBER_OF_SYMBOLS];
+        if (globalFlags.hasSelectorTable()) {
+            maxSelector = inBuffer.get() & 0xFF;
+            readArray(inBuffer, selectorTable, NUMBER_OF_SYMBOLS);
+        } else {
+            for (int i = 0; i < numParamBlock; i++) {
+                selectorTable[i] = i;
+            }
+            for (int i = numParamBlock; i < NUMBER_OF_SYMBOLS; i++) {
+                selectorTable[i] = numParamBlock - 1;
+            }
+        }
+        final List<FQZParam> fqzParamList = new ArrayList<FQZParam>(numParamBlock);
+        int maxSymbols = 0; // maximum number of distinct Quality values across all param sets
+        for (int p=0; p < numParamBlock; p++){
+            fqzParamList.add(p,decodeFQZSingleParam(inBuffer));
+            if(maxSymbols < fqzParamList.get(p).getMaxSymbols()){
+                maxSymbols = fqzParamList.get(p).getMaxSymbols();
+            }
+        }
+
+        // main decode loop
+        int i = 0;
+        final FQZState fqzState = new FQZState();
+        final RangeCoder rangeCoder = new RangeCoder();
+        rangeCoder.rangeDecodeStart(inBuffer);
+        final FQZModel model = fqzCreateModels(maxSymbols, maxSelector);
+        final List<Integer> QualityLengths = new ArrayList<>();
+        FQZParam params = null;
+        int last = 0;
+        final int[] rev = null;
+        final ByteBuffer outBuffer = CompressionUtils.allocateByteBuffer(bufferLength);
+        while (i<bufferLength){
+            if (fqzState.getBases()==0) {
+                decodeFQZNewRecord(
+                        inBuffer,
+                        rangeCoder,
+                        model,
+                        fqzState,
+                        maxSelector,
+                        globalFlags.doReverse(),
+                        selectorTable,
+                        fqzParamList,
+                        rev);
+                if (fqzState.getIsDuplicate() == true) {
+                    if (model.getDuplicate().modelDecode(inBuffer, rangeCoder) == 0) {
+                        for (int x = 0; x < fqzState.getRecordLength(); x++) {
+                            outBuffer.put(i + x, outBuffer.get(i + x - fqzState.getRecordLength()));
+                        }
+                        i += fqzState.getRecordLength();
+                        fqzState.setBases(0);
+                    }
+                }
+                QualityLengths.add(fqzState.getRecordLength());
+                params = fqzParamList.get(fqzState.getSelectorTable());
+                last = params.getContext();
+            }
+            final int quality = model.getQuality()[last].modelDecode(inBuffer, rangeCoder);
+            outBuffer.put(i++, (byte) params.getQualityMap()[quality]);
+            last = fqzUpdateContext(params, fqzState, quality);
+        }
+        if (globalFlags.doReverse()){
+            reverseQualities(outBuffer,bufferLength,rev,QualityLengths);
+        }
+        int outBufferIndex = 0;
+        for (int recordLength:QualityLengths) {
+            for (int recordIndex = 0; recordIndex < recordLength; recordIndex++) {
+                outBuffer.put(outBufferIndex, (byte)((outBuffer.get(outBufferIndex)& 0xFF) + 33)); // Shift character codes by 33
+                outBufferIndex += 1;
+            }
+        }
+        outBuffer.rewind();
+        return outBuffer;
+    }
+
+    public static void readArray(final ByteBuffer inBuffer, final int[] table, final int size) {
+        int j = 0; // array value
+        int z = 0; // array index: table[j]
+        int last = -1;
+
+        // Remove first level of run-length encoding
+        final int[] rle = new int[1024]; // runs
+        while (z < size) {
+            final int run = inBuffer.get() & 0xFF;
+            rle[j++] = run;
+            z += run;
+
+            if (run == last) {
+                int copy = inBuffer.get() & 0xFF;
+                z += run * copy;
+                while (copy-- > 0)
+                    rle[j++] = run;
+            }
+            last = run;
+        }
+
+        // Now expand runs in rle to table, noting 255 is max run
+        int i = 0;
+        j = 0;
+        z = 0;
+        int part;
+        while (z < size) {
+            int run_len = 0;
+            do {
+                part = rle[j++];
+                run_len += part;
+            } while (part == 255);
+
+            while (run_len-- > 0)
+                table[z++] = i;
+            i++;
+        }
+    }
+
+    public static FQZModel fqzCreateModels(final int maxSymbols, final int maxSelector){
+        final FQZModel fqzModel = new FQZModel();
+        fqzModel.setQuality(new ByteModel[1 << 16]);
+        for (int i = 0; i < (1 << 16); i++) {
+            fqzModel.getQuality()[i] = new ByteModel(maxSymbols + 1); // +1 as max value not num. values
+        }
+        fqzModel.setLength(new ByteModel[4]);
+        for (int i = 0; i < 4; i++) {
+            fqzModel.getLength()[i] = new ByteModel(NUMBER_OF_SYMBOLS);
+        }
+        fqzModel.setReverse(new ByteModel(2));
+        fqzModel.setDuplicate(new ByteModel(2));
+        if (maxSelector > 0) {
+            fqzModel.setSelector(new ByteModel(maxSelector + 1));
+        }
+        return fqzModel;
+    }
+
+    // If duplicate returns 1, else 0
+    public static void decodeFQZNewRecord(
+            final ByteBuffer inBuffer,
+            final RangeCoder rangeCoder,
+            final FQZModel model,
+            final FQZState state,
+            final int maxSelector,
+            final boolean doReverse,
+            final int[] selectorTable,
+            final List<FQZParam> fqzParamList,
+            final int[] rev){
+
+        // Parameter selector
+        if (maxSelector > 0) {
+            state.setSelector(model.getSelector().modelDecode(inBuffer, rangeCoder));
+        } else {
+            state.setSelector(0);
+        }
+        state.setSelectorTable(selectorTable[state.getSelector()]);
+        final FQZParam params = fqzParamList.get(state.getSelectorTable());
+
+        // Reset contexts at the start of each new record
+        int len;
+        if (params.getFixedLen() >= 0) {
+            // Not fixed or fixed but first record
+            len = model.getLength()[0].modelDecode(inBuffer, rangeCoder);
+            len |= model.getLength()[1].modelDecode(inBuffer, rangeCoder) << 8;
+            len |= model.getLength()[2].modelDecode(inBuffer, rangeCoder) << 16;
+            len |= model.getLength()[3].modelDecode(inBuffer, rangeCoder) << 24;
+            if (params.getFixedLen() > 0) {
+                params.setFixedLen(-len);
+            }
+        } else {
+            len = -params.getFixedLen();
+        }
+        state.setRecordLength(len);
+        if (doReverse) {
+            rev[state.getRecordNumber()] = model.getReverse().modelDecode(inBuffer, rangeCoder);
+        }
+        state.setIsDuplicate(false);
+        if (params.isDoDedup()) {
+            if (model.getDuplicate().modelDecode(inBuffer, rangeCoder) != 0) {
+                state.setIsDuplicate(true);
+            }
+        }
+        state.setBases(len);  // number of remaining bytes in this record
+        state.setDelta(0);
+        state.setQualityContext(0);
+        state.setPreviousQuality(0);
+        state.setRecordNumber(state.getRecordNumber() + 1);
+    }
+
+    public static int fqzUpdateContext(final FQZParam params,
+                                       final FQZState state,
+                                       final int quality){
+
+        int last = params.getContext();
+        state.setQualityContext(((state.getQualityContext() << params.getQualityContextShift()) + params.getQualityContextTable()[quality]) >>> 0);
+        last += ((state.getQualityContext() & ((1 << params.getQualityContextBits()) - 1)) << params.getQualityContextLocation()) >>> 0;
+
+        if (params.isDoPos())
+            last += params.getPositionContextTable()[Math.min(state.getBases(), 1023)] << params.getPositionContextLocation();
+
+        if (params.isDoDelta()) {
+            last += params.getDeltaContextTable()[Math.min(state.getDelta(), 255)] << params.getDeltaContextLocation();
+            state.setDelta(state.getDelta()+ ((state.getPreviousQuality() != quality) ? 1 : 0));
+            state.setPreviousQuality(quality);
+        }
+        if (params.isDoSel())
+            last += state.getSelector() << params.getSelectorContextLocation();
+        state.setBases(state.getBases()-1);
+        return last & 0xffff;
+    }
+
+    public static FQZParam decodeFQZSingleParam(ByteBuffer inBuffer) {
+        final FQZParam param = new FQZParam();
+        param.setContext((inBuffer.get() & 0xFF) | ((inBuffer.get() & 0xFF) << 8));
+        param.setParameterFlags(inBuffer.get() & 0xFF);
+        param.setMaxSymbols(inBuffer.get() & 0xFF);
+        final int x = inBuffer.get() & 0xFF;
+        param.setQualityContextBits(x >> 4);
+        param.setQualityContextShift(x & 0x0F);
+        final int y = inBuffer.get() & 0xFF;
+        param.setQualityContextLocation(y >> 4);
+        param.setSelectorContextLocation(y & 0x0F);
+        final int z = inBuffer.get() & 0xFF;
+        param.setPositionContextLocation(z >> 4);
+        param.setDeltaContextLocation(z & 0x0F);
+
+        // Read Quality Map. Example: "unbin" Illumina Qualities
+        param.setQualityMap(new int[NUMBER_OF_SYMBOLS]);
+        if (param.isDoQmap()) {
+            for (int i = 0; i < param.getMaxSymbols(); i++) {
+                param.getQualityMap()[i] = inBuffer.get() & 0xFF;
+            }
+        } else {
+            for (int i = 0; i < NUMBER_OF_SYMBOLS; i++) {
+                param.getQualityMap()[i] = i;
+            }
+        }
+
+        // Read tables
+        param.setQualityContextTable(new int[1024]);
+        if (param.getQualityContextBits() > 0 && param.isDoQtab()) {
+            readArray(inBuffer, param.getQualityContextTable(), NUMBER_OF_SYMBOLS);
+        } else {
+            for (int i = 0; i < NUMBER_OF_SYMBOLS; i++) {
+                param.getQualityContextTable()[i] = i;  // NOP
+            }
+        }
+        param.setPositionContextTable(new int[1024]);
+        if (param.isDoPos()) {
+            readArray(inBuffer, param.getPositionContextTable(), 1024);
+        }
+        param.setDeltaContextTable(new int[NUMBER_OF_SYMBOLS]);
+        if (param.isDoDelta()) {
+            readArray(inBuffer, param.getDeltaContextTable(), NUMBER_OF_SYMBOLS);
+        }
+        return param;
+    }
+
+    public static void reverseQualities(
+            final ByteBuffer outBuffer,
+            final int bufferLength,
+            final int[] rev,
+            final List<Integer> QualityLengths
+            ){
+        int rec = 0;
+        int idx = 0;
+        while (idx< bufferLength) {
+            if (rev[rec]==1) {
+                int j = 0;
+                int k = QualityLengths.get(rec) - 1;
+                while (j < k) {
+                    byte tmp = outBuffer.get(idx + j);
+                    outBuffer.put(idx + j,outBuffer.get(idx + k));
+                    outBuffer.put(idx + k, tmp);
+                    j++;
+                    k--;
+                }
+            }
+            idx += QualityLengths.get(rec++);
+        }
+    }
+
+}
\ No newline at end of file
diff --git a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZGlobalFlags.java b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZGlobalFlags.java
new file mode 100644
index 0000000000..937b7eed62
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZGlobalFlags.java
@@ -0,0 +1,28 @@
+package htsjdk.samtools.cram.compression.fqzcomp;
+
+public class FQZGlobalFlags {
+    public static final int MULTI_PARAM_FLAG_MASK = 0x01;
+    public static final int SELECTOR_TABLE_FLAG_MASK = 0x02;
+    public static final int DO_REVERSE_FLAG_MASK = 0x04;
+
+    private int globalFlags;
+
+    public FQZGlobalFlags(final int globalFlags) {
+        this.globalFlags = globalFlags;
+    }
+
+    // returns True if more than one parameter block is present
+    public boolean isMultiParam(){
+        return ((globalFlags & MULTI_PARAM_FLAG_MASK)!=0);
+    }
+
+    // returns True if the parameter selector is mapped through selector table
+    public boolean hasSelectorTable(){
+        return ((globalFlags & SELECTOR_TABLE_FLAG_MASK)!=0);
+    }
+
+    public boolean doReverse(){
+        return ((globalFlags & DO_REVERSE_FLAG_MASK)!=0);
+    }
+
+}
\ No newline at end of file
diff --git a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZModel.java b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZModel.java
new file mode 100644
index 0000000000..047c387a2a
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZModel.java
@@ -0,0 +1,56 @@
+package htsjdk.samtools.cram.compression.fqzcomp;
+
+import htsjdk.samtools.cram.compression.range.ByteModel;
+
+public class FQZModel {
+
+    private ByteModel[] quality; // Primary model for quality values
+    private ByteModel[] length; // Read length models with the context 0-3 being successive byte numbers (little endian order)
+    private ByteModel reverse; // indicates which strings to reverse
+    private ByteModel duplicate; // Indicates if this whole string is a duplicate of the last one
+    private ByteModel selector; // Used if gflags.multi_param or pflags.do_sel are defined.
+
+    public FQZModel() {
+    }
+
+    public ByteModel[] getQuality() {
+
+        return quality;
+    }
+
+    public void setQuality(ByteModel[] quality) {
+        this.quality = quality;
+    }
+
+    public ByteModel[] getLength() {
+        return length;
+    }
+
+    public void setLength(ByteModel[] length) {
+        this.length = length;
+    }
+
+    public ByteModel getReverse() {
+        return reverse;
+    }
+
+    public void setReverse(ByteModel reverse) {
+        this.reverse = reverse;
+    }
+
+    public ByteModel getDuplicate() {
+        return duplicate;
+    }
+
+    public void setDuplicate(ByteModel duplicate) {
+        this.duplicate = duplicate;
+    }
+
+    public ByteModel getSelector() {
+        return selector;
+    }
+
+    public void setSelector(ByteModel selector) {
+        this.selector = selector;
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZParam.java b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZParam.java
new file mode 100644
index 0000000000..eaf9b9d08c
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZParam.java
@@ -0,0 +1,205 @@
+package htsjdk.samtools.cram.compression.fqzcomp;
+
+public class FQZParam {
+    private int context;
+    private int parameterFlags; // Per-parameter block bit-flags
+    // TODO: rename - follow names from spec. These flags should be set using parameterFlags value
+    private boolean doDedup;
+    private int fixedLen;
+    private boolean doSel;
+    private boolean doQmap;
+    private boolean doPos;
+    private boolean doDelta;
+    private boolean doQtab;
+
+    private int maxSymbols; // Total number of distinct quality values
+    private int qualityContextBits; // Total number of bits for Quality context
+    private int qualityContextShift; // Left bit shift per successive quality in quality context
+    private int qualityContextLocation; // Bit position of quality context
+    private int selectorContextLocation; // Bit position of selector context
+    private int positionContextLocation; // Bit position of position context
+    private int deltaContextLocation; // Bit position of delta context
+    private int[] qualityMap; // Map for unbinning quality values.
+    private int[] qualityContextTable; // Quality context lookup table
+    private int[] positionContextTable; // Position context lookup table
+    private int[] deltaContextTable; // Delta context lookup table
+
+    private static final int DEDUP_FLAG_MASK = 0x02;
+    private static final int FIXED_LEN_FLAG_MASK = 0x04;
+    private static final int SEL_FLAG_MASK = 0x08;
+    private static final int QMAP_FLAG_MASK = 0x10;
+    private static final int PTAB_FLAG_MASK = 0x20;
+    private static final int DTAB_FLAG_MASK = 0x40;
+    private static final int QTAB_FLAG_MASK = 0x80;
+
+    public FQZParam() {
+    }
+
+    public int getContext() {
+        return context;
+    }
+
+    public int getParameterFlags() {
+        return parameterFlags;
+    }
+
+    public boolean isDoDedup() {
+        return doDedup;
+    }
+
+    public int getFixedLen() {
+        return fixedLen;
+    }
+
+    public boolean isDoSel() {
+        return doSel;
+    }
+
+    public boolean isDoQmap() {
+        return doQmap;
+    }
+
+    public boolean isDoPos() {
+        return doPos;
+    }
+
+    public boolean isDoDelta() {
+        return doDelta;
+    }
+
+    public boolean isDoQtab() {
+        return doQtab;
+    }
+
+    public int getMaxSymbols() {
+        return maxSymbols;
+    }
+
+    public int getQualityContextBits() {
+        return qualityContextBits;
+    }
+
+    public int getQualityContextShift() {
+        return qualityContextShift;
+    }
+
+    public int getQualityContextLocation() {
+        return qualityContextLocation;
+    }
+
+    public int getSelectorContextLocation() {
+        return selectorContextLocation;
+    }
+
+    public int getPositionContextLocation() {
+        return positionContextLocation;
+    }
+
+    public int getDeltaContextLocation() {
+        return deltaContextLocation;
+    }
+
+    public int[] getQualityMap() {
+        return qualityMap;
+    }
+
+    public int[] getQualityContextTable() {
+        return qualityContextTable;
+    }
+
+    public int[] getPositionContextTable() {
+        return positionContextTable;
+    }
+
+    public int[] getDeltaContextTable() {
+        return deltaContextTable;
+    }
+
+    public void setContext(int context) {
+        this.context = context;
+    }
+
+    public void setParameterFlags(int parameterFlags) {
+        this.parameterFlags = parameterFlags;
+        setDoDedup((parameterFlags & DEDUP_FLAG_MASK) != 0);
+        setFixedLen(parameterFlags & FIXED_LEN_FLAG_MASK);
+        setDoSel((parameterFlags & SEL_FLAG_MASK) != 0);
+        setDoQmap((parameterFlags & QMAP_FLAG_MASK) != 0);
+        setDoPos((parameterFlags & PTAB_FLAG_MASK) != 0);
+        setDoDelta((parameterFlags & DTAB_FLAG_MASK) != 0);
+        setDoQtab((parameterFlags & QTAB_FLAG_MASK) != 0);
+    }
+
+    public void setDoDedup(boolean doDedup) {
+        this.doDedup = doDedup;
+    }
+
+    public void setFixedLen(int fixedLen) {
+        this.fixedLen = fixedLen;
+    }
+
+    public void setDoSel(boolean doSel) {
+        this.doSel = doSel;
+    }
+
+    public void setDoQmap(boolean doQmap) {
+        this.doQmap = doQmap;
+    }
+
+    public void setDoPos(boolean doPos) {
+        this.doPos = doPos;
+    }
+
+    public void setDoDelta(boolean doDelta) {
+        this.doDelta = doDelta;
+    }
+
+    public void setDoQtab(boolean doQtab) {
+        this.doQtab = doQtab;
+    }
+
+    public void setMaxSymbols(int maxSymbols) {
+        this.maxSymbols = maxSymbols;
+    }
+
+    public void setQualityContextBits(int qualityContextBits) {
+        this.qualityContextBits = qualityContextBits;
+    }
+
+    public void setQualityContextShift(int qualityContextShift) {
+        this.qualityContextShift = qualityContextShift;
+    }
+
+    public void setQualityContextLocation(int qualityContextLocation) {
+        this.qualityContextLocation = qualityContextLocation;
+    }
+
+    public void setSelectorContextLocation(int selectorContextLocation) {
+        this.selectorContextLocation = selectorContextLocation;
+    }
+
+    public void setPositionContextLocation(int positionContextLocation) {
+        this.positionContextLocation = positionContextLocation;
+    }
+
+    public void setDeltaContextLocation(int deltaContextLocation) {
+        this.deltaContextLocation = deltaContextLocation;
+    }
+
+    public void setQualityMap(int[] qualityMap) {
+        this.qualityMap = qualityMap;
+    }
+
+    public void setQualityContextTable(int[] qualityContextTable) {
+        this.qualityContextTable = qualityContextTable;
+    }
+
+    public void setPositionContextTable(int[] positionContextTable) {
+        this.positionContextTable = positionContextTable;
+    }
+
+    public void setDeltaContextTable(int[] deltaContextTable) {
+        this.deltaContextTable = deltaContextTable;
+    }
+
+}
\ No newline at end of file
diff --git a/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZState.java b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZState.java
new file mode 100644
index 0000000000..3a4981029c
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/cram/compression/fqzcomp/FQZState.java
@@ -0,0 +1,88 @@
+package htsjdk.samtools.cram.compression.fqzcomp;
+
+public class FQZState {
+    private int qualityContext;    // Qual-only sub-context
+    private int previousQuality;   // Previous quality value
+    private int delta;   // Running delta (quality vs previousQuality)
+    private int bases;       // Number of bases left in current record
+    private int selector;       // Current parameter selector value (0 if unused)
+    private int selectorTable;       // "stab" tabulated copy of s
+    private int recordLength;     // Length of current string
+    private boolean isDuplicate;   // This string is a duplicate of last
+    private int recordNumber;     // Record number
+
+    public FQZState() {
+    }
+
+    public int getQualityContext() {
+        return qualityContext;
+    }
+
+    public void setQualityContext(int qualityContext) {
+        this.qualityContext = qualityContext;
+    }
+
+    public int getPreviousQuality() {
+        return previousQuality;
+    }
+
+    public void setPreviousQuality(int previousQuality) {
+        this.previousQuality = previousQuality;
+    }
+
+    public int getDelta() {
+        return delta;
+    }
+
+    public void setDelta(int delta) {
+        this.delta = delta;
+    }
+
+    public int getBases() {
+        return bases;
+    }
+
+    public void setBases(int bases) {
+        this.bases = bases;
+    }
+
+    public int getSelector() {
+        return selector;
+    }
+
+    public void setSelector(int selector) {
+        this.selector = selector;
+    }
+
+    public int getSelectorTable() {
+        return selectorTable;
+    }
+
+    public void setSelectorTable(int selectorTable) {
+        this.selectorTable = selectorTable;
+    }
+
+    public int getRecordLength() {
+        return recordLength;
+    }
+
+    public void setRecordLength(int recordLength) {
+        this.recordLength = recordLength;
+    }
+
+    public boolean getIsDuplicate() {
+        return isDuplicate;
+    }
+
+    public void setIsDuplicate(boolean isDuplicate) {
+        this.isDuplicate = isDuplicate;
+    }
+
+    public int getRecordNumber() {
+        return recordNumber;
+    }
+
+    public void setRecordNumber(int recordNumber) {
+        this.recordNumber = recordNumber;
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationDecode.java b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationDecode.java
new file mode 100644
index 0000000000..61d935aad1
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationDecode.java
@@ -0,0 +1,164 @@
+package htsjdk.samtools.cram.compression.nametokenisation;
+
+import htsjdk.samtools.cram.CRAMException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.StringJoiner;
+
+import static htsjdk.samtools.cram.compression.nametokenisation.TokenStreams.TOKEN_TYPE;
+import static htsjdk.samtools.cram.compression.nametokenisation.TokenStreams.TOKEN_STRING;
+import static htsjdk.samtools.cram.compression.nametokenisation.TokenStreams.TOKEN_CHAR;
+import static htsjdk.samtools.cram.compression.nametokenisation.TokenStreams.TOKEN_DIGITS0;
+import static htsjdk.samtools.cram.compression.nametokenisation.TokenStreams.TOKEN_DZLEN;
+import static htsjdk.samtools.cram.compression.nametokenisation.TokenStreams.TOKEN_DUP;
+import static htsjdk.samtools.cram.compression.nametokenisation.TokenStreams.TOKEN_DIGITS;
+import static htsjdk.samtools.cram.compression.nametokenisation.TokenStreams.TOKEN_DELTA;
+import static htsjdk.samtools.cram.compression.nametokenisation.TokenStreams.TOKEN_DELTA0;
+import static htsjdk.samtools.cram.compression.nametokenisation.TokenStreams.TOKEN_MATCH;
+import static htsjdk.samtools.cram.compression.nametokenisation.TokenStreams.TOKEN_END;
+
+public class NameTokenisationDecode {
+
+    public static String uncompress(final ByteBuffer inBuffer) {
+        return uncompress(inBuffer, "\n");
+    }
+
+    public static String uncompress(
+            final ByteBuffer inBuffer,
+            final String separator) {
+        inBuffer.order(ByteOrder.LITTLE_ENDIAN);
+        final int uncompressedLength =  inBuffer.getInt() & 0xFFFFFFFF; //unused variable. Following the spec
+        final int numNames =  inBuffer.getInt() & 0xFFFFFFFF;
+        final int useArith = inBuffer.get() & 0xFF;
+        TokenStreams tokenStreams = new TokenStreams(inBuffer, useArith, numNames);
+        List<List<String>> tokensList = new ArrayList<>(numNames);
+        for(int i = 0; i < numNames; i++) {
+            tokensList.add(new ArrayList<>());
+        }
+        StringJoiner decodedNamesJoiner = new StringJoiner(separator);
+        for (int i = 0; i < numNames; i++) {
+            decodedNamesJoiner.add(decodeSingleName(tokenStreams, tokensList, i));
+        }
+        String uncompressedNames = decodedNamesJoiner.toString();
+        if (uncompressedLength == uncompressedNames.length() + separator.length()){
+            return uncompressedNames + separator;
+        }
+        return uncompressedNames;
+    }
+
+    private static String decodeSingleName(
+            final TokenStreams tokenStreams,
+            final List<List<String>> tokensList,
+            final int currentNameIndex) {
+
+        // The information about whether a name is a duplicate or not
+        // is obtained from the list of tokens at tokenStreams[0,0]
+        byte nameType = tokenStreams.getTokenStreamByteBuffer(0,TOKEN_TYPE).get();
+        final ByteBuffer distBuffer = tokenStreams.getTokenStreamByteBuffer(0,nameType).order(ByteOrder.LITTLE_ENDIAN);
+        final int dist = distBuffer.getInt() & 0xFFFFFFFF;
+        final int prevNameIndex = currentNameIndex - dist;
+        if (nameType == TOKEN_DUP){
+            tokensList.add(currentNameIndex, tokensList.get(prevNameIndex));
+            return String.join("", tokensList.get(currentNameIndex));
+        }
+        int tokenPosition = 1; // At position 0, we get nameType information
+        byte type;
+        StringBuilder decodedNameBuilder = new StringBuilder();
+        do {
+            type = tokenStreams.getTokenStreamByteBuffer(tokenPosition, TOKEN_TYPE).get();
+            String currentToken = "";
+            switch(type){
+                case TOKEN_CHAR:
+                    final char currentTokenChar = (char) tokenStreams.getTokenStreamByteBuffer(tokenPosition, TOKEN_CHAR).get();
+                    currentToken = String.valueOf(currentTokenChar);
+                    break;
+                case TOKEN_STRING:
+                    currentToken = readString(tokenStreams.getTokenStreamByteBuffer(tokenPosition, TOKEN_STRING));
+                    break;
+                case TOKEN_DIGITS:
+                    currentToken = getDigitsToken(tokenStreams, tokenPosition, TOKEN_DIGITS);
+                    break;
+                case TOKEN_DIGITS0:
+                    final String digits0Token = getDigitsToken(tokenStreams, tokenPosition, TOKEN_DIGITS0);
+                    final int lenDigits0Token = tokenStreams.getTokenStreamByteBuffer(tokenPosition, TOKEN_DZLEN).get() & 0xFF;
+                    currentToken = leftPadNumber(digits0Token, lenDigits0Token);
+                    break;
+                case TOKEN_DELTA:
+                    currentToken = getDeltaToken(tokenStreams, tokenPosition, tokensList, prevNameIndex, TOKEN_DELTA);
+                    break;
+                case TOKEN_DELTA0:
+                    final String delta0Token = getDeltaToken(tokenStreams, tokenPosition, tokensList, prevNameIndex, TOKEN_DELTA0);
+                    final int lenDelta0Token = tokensList.get(prevNameIndex).get(tokenPosition-1).length();
+                    currentToken = leftPadNumber(delta0Token, lenDelta0Token);
+                    break;
+                case TOKEN_MATCH:
+                    currentToken = tokensList.get(prevNameIndex).get(tokenPosition-1);
+                    break;
+                default:
+                    break;
+            }
+            tokensList.get(currentNameIndex).add(tokenPosition-1,currentToken);
+            decodedNameBuilder.append(currentToken);
+            tokenPosition++;
+        } while (type!= TOKEN_END);
+        return decodedNameBuilder.toString();
+        }
+
+    private static String getDeltaToken(
+            final TokenStreams tokenStreams,
+            final int tokenPosition,
+            final List<List<String>> tokensList,
+            final int prevNameIndex,
+            final byte tokenType) {
+        if (!(tokenType == TOKEN_DELTA || tokenType == TOKEN_DELTA0)){
+            throw new CRAMException(String.format("Invalid tokenType : %s. " +
+                    "tokenType must be either TOKEN_DELTA or TOKEN_DELTA0", tokenType));
+        }
+        int prevToken;
+        try {
+            prevToken = Integer.parseInt(tokensList.get(prevNameIndex).get(tokenPosition -1));
+        } catch (final NumberFormatException e) {
+            final String exceptionMessageSubstring = (tokenType == TOKEN_DELTA) ? "DIGITS or DELTA" : "DIGITS0 or DELTA0";
+            throw new CRAMException(String.format("The token in the prior name must be of type %s",
+                    exceptionMessageSubstring), e);
+        }
+        final int deltaTokenValue = tokenStreams.getTokenStreamByteBuffer(tokenPosition,tokenType).get() & 0xFF;
+        return Long.toString(prevToken + deltaTokenValue);
+    }
+
+    private static String getDigitsToken(
+            final TokenStreams tokenStreams,
+            final int tokenPosition,
+            final byte tokenType ) {
+        if (!(tokenType == TOKEN_DIGITS || tokenType == TOKEN_DIGITS0)){
+            throw new CRAMException(String.format("Invalid tokenType : %s. " +
+                    "tokenType must be either TOKEN_DIGITS or TOKEN_DIGITS0", tokenType));
+        }
+        final ByteBuffer digitsByteBuffer = tokenStreams.getTokenStreamByteBuffer(tokenPosition, tokenType).order(ByteOrder.LITTLE_ENDIAN);
+        final long digits = digitsByteBuffer.getInt() & 0xFFFFFFFFL;
+        return Long.toString(digits);
+    }
+
+    private static String readString(final ByteBuffer inputBuffer) {
+        // spec: We fetch one byte at a time from the value byte stream,
+        // appending to the name buffer until the byte retrieved is zero.
+        StringBuilder resultStringBuilder = new StringBuilder();
+        byte currentByte = inputBuffer.get();
+        while (currentByte != 0) {
+            resultStringBuilder.append((char) currentByte);
+            currentByte = inputBuffer.get();
+        }
+        return resultStringBuilder.toString();
+    }
+
+    private static String leftPadNumber(String value, final int len) {
+        // return value such that it is at least len bytes long with leading zeros
+        while (value.length() < len) {
+            value = "0" + value;
+        }
+        return value;
+    }
+
+}
\ No newline at end of file
diff --git a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationEncode.java b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationEncode.java
new file mode 100644
index 0000000000..4a07f8422f
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationEncode.java
@@ -0,0 +1,287 @@
+package htsjdk.samtools.cram.compression.nametokenisation;
+
+import htsjdk.samtools.cram.compression.CompressionUtils;
+import htsjdk.samtools.cram.compression.nametokenisation.tokens.EncodeToken;
+import htsjdk.samtools.cram.compression.range.RangeEncode;
+import htsjdk.samtools.cram.compression.range.RangeParams;
+import htsjdk.samtools.cram.compression.rans.RANSEncode;
+import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Encode;
+import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Params;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class NameTokenisationEncode {
+
+    private int maxToken;
+    private int maxLength;
+
+    public ByteBuffer compress(final ByteBuffer inBuffer){
+        return compress(inBuffer, 0);
+    }
+
+    public ByteBuffer compress(final ByteBuffer inBuffer, final int useArith){
+        maxToken = 0;
+        maxLength = 0;
+        ArrayList<String> names = new ArrayList<>();
+        int lastPosition = inBuffer.position();
+
+        // convert buffer to array of names
+        while(inBuffer.hasRemaining()){
+            byte currentByte = inBuffer.get();
+            if ((currentByte) == '\n' || inBuffer.position()==inBuffer.limit()){
+                int length = inBuffer.position() - lastPosition;
+                byte[] bytes = new byte[length];
+                inBuffer.position(lastPosition);
+                inBuffer.get(bytes, 0, length);
+                names.add(new String(bytes, StandardCharsets.UTF_8).trim());
+                lastPosition = inBuffer.position();
+            }
+        }
+
+        final int numNames = names.size();
+        // guess max size -> str.length*2 + 10000 (from htscodecs javascript code)
+        ByteBuffer outBuffer = allocateOutputBuffer((inBuffer.limit()*2)+10000);
+        outBuffer.putInt(inBuffer.limit());
+        outBuffer.putInt(numNames);
+        outBuffer.put((byte)useArith);
+
+        // Instead of List<List<String>> for tokensList like we did in Decoder, we use List<List<EncodeToken>>
+        // as we also need to store the TOKEN_TYPE, relative value when compared to prev name's token
+        // along with the token value.
+        List<List<EncodeToken>> tokensList = new ArrayList<>(numNames);
+        HashMap<String, Integer> nameIndexMap = new HashMap<>();
+        int[] tokenFrequencies = new int[256];
+        for(int nameIndex = 0; nameIndex < numNames; nameIndex++) {
+            tokeniseName(tokensList, nameIndexMap, tokenFrequencies, names.get(nameIndex), nameIndex);
+        }
+        for (int tokenPosition = 0; tokenPosition < maxToken; tokenPosition++) {
+            List<ByteBuffer> tokenStream = new ArrayList(TokenStreams.TOTAL_TOKEN_TYPES);
+            for (int i = 0; i < TokenStreams.TOTAL_TOKEN_TYPES; i++) {
+                tokenStream.add(ByteBuffer.allocate(numNames* maxLength).order(ByteOrder.LITTLE_ENDIAN));
+            }
+            fillByteStreams(tokenStream,tokensList,tokenPosition,numNames);
+            serializeByteStreams(tokenStream,useArith,outBuffer);
+        }
+
+        // sets limit to current position and position to '0'
+        outBuffer.flip();
+        return outBuffer;
+    }
+
+    private void tokeniseName(final List<List<EncodeToken>> tokensList,
+                              HashMap<String, Integer> nameIndexMap,
+                              int[] tokenFrequencies,
+                              final String name,
+                              final int currentNameIndex) {
+        int currMaxLength = 0;
+
+        // always compare against last name only
+        final int prevNameIndex = currentNameIndex - 1;
+        tokensList.add(new ArrayList<>());
+        if (nameIndexMap.containsKey(name)) {
+            // TODO: Add Test to cover this code
+            tokensList.get(currentNameIndex).add(new EncodeToken(String.valueOf(currentNameIndex - nameIndexMap.get(name)), String.valueOf(currentNameIndex - nameIndexMap.get(name)),TokenStreams.TOKEN_DUP));
+        } else {
+            tokensList.get(currentNameIndex).add(new EncodeToken(String.valueOf(currentNameIndex == 0 ? 0 : 1),String.valueOf(currentNameIndex == 0 ? 0 : 1),TokenStreams.TOKEN_DIFF));
+        }
+        // Get the list of tokens `tok` for the current name
+        nameIndexMap.put(name, currentNameIndex);
+        String regex = "([a-zA-Z0-9]{1,9})|([^a-zA-Z0-9]+)";
+        Pattern pattern = Pattern.compile(regex);
+        Matcher matcher = pattern.matcher(name);
+        List<String> tok = new ArrayList<>();
+        while (matcher.find()) {
+            tok.add(matcher.group());
+        }
+        for (int i = 0; i < tok.size(); i++) {
+            // In the list of tokens, all the tokens are offset by 1
+            // because at position "0", we have a token that provides info if the name is a DIFF or DUP
+            // token 0 = DIFF vs DUP
+            int tokenIndex = i + 1;
+            byte type = TokenStreams.TOKEN_STRING;
+            String str = tok.get(i); // absolute value of the token
+            String val = tok.get(i); // relative value of the token (comparing to prevname's token at the same token position)
+            if (tok.get(i).matches("^0+[0-9]*$")) {
+                type = TokenStreams.TOKEN_DIGITS0;
+            } else if (tok.get(i).matches("^[0-9]+$")) {
+                type = TokenStreams.TOKEN_DIGITS;
+            } else if (tok.get(i).length() == 1) {
+                type = TokenStreams.TOKEN_CHAR;
+            }
+
+            // compare the current token with token from the previous name at the current token's index
+            // if there exists a previous name and a token at the corresponding index of the previous name
+            if (prevNameIndex >=0 && tokensList.get(prevNameIndex).size() > tokenIndex) {
+                EncodeToken prevToken = tokensList.get(prevNameIndex).get(tokenIndex);
+                if (prevToken.getActualTokenValue().equals(tok.get(i))) {
+                    type = TokenStreams.TOKEN_MATCH;
+                    val = "";
+                } else if (type==TokenStreams.TOKEN_DIGITS
+                        && (prevToken.getTokenType() == TokenStreams.TOKEN_DIGITS || prevToken.getTokenType() == TokenStreams.TOKEN_DELTA)) {
+                    int v = Integer.parseInt(val);
+                    int s = Integer.parseInt(prevToken.getActualTokenValue());
+                    int d = v - s;
+                    tokenFrequencies[tokenIndex]++;
+                    if (d >= 0 && d < 256 && tokenFrequencies[tokenIndex] > currentNameIndex / 2) {
+                        type = TokenStreams.TOKEN_DELTA;
+                        val = String.valueOf(d);
+                    }
+                } else if (type==TokenStreams.TOKEN_DIGITS0 && prevToken.getActualTokenValue().length() == val.length()
+                        && (prevToken.getTokenType() == TokenStreams.TOKEN_DIGITS0 || prevToken.getTokenType() == TokenStreams.TOKEN_DELTA0)) {
+                    int d = Integer.parseInt(val) - Integer.parseInt(prevToken.getActualTokenValue());
+                    tokenFrequencies[tokenIndex]++;
+                    if (d >= 0 && d < 256 && tokenFrequencies[tokenIndex] > currentNameIndex / 2) {
+                        type = TokenStreams.TOKEN_DELTA0;
+                        val = String.valueOf(d);
+                    }
+                }
+            }
+            tokensList.get(currentNameIndex).add(new EncodeToken(str, val, type));
+
+            if (currMaxLength < val.length() + 3) {
+                // TODO: check this? Why isn't unint32 case handled?
+                // +3 for integers; 5 -> (Uint32)5 (from htscodecs javascript code)
+                currMaxLength = val.length() + 3;
+            }
+        }
+
+        tokensList.get(currentNameIndex).add(new EncodeToken("","",TokenStreams.TOKEN_END));
+        final int currMaxToken = tokensList.get(currentNameIndex).size();
+        if (maxToken < currMaxToken)
+            maxToken = currMaxToken;
+        if (maxLength < currMaxLength)
+            maxLength = currMaxLength;
+    }
+
+    public void fillByteStreams(
+            final List<ByteBuffer> tokenStream,
+            final List<List<EncodeToken>> tokensList,
+            final int tokenPosition,
+            final int numNames) {
+
+        // Fill tokenStreams object using tokensList
+        for (int nameIndex = 0; nameIndex < numNames; nameIndex++) {
+            if (tokenPosition > 0 && tokensList.get(nameIndex).get(0).getTokenType() == TokenStreams.TOKEN_DUP) {
+                continue;
+            }
+            if (tokensList.get(nameIndex).size() <= tokenPosition) {
+                continue;
+            }
+            EncodeToken encodeToken = tokensList.get(nameIndex).get(tokenPosition);
+            byte type = encodeToken.getTokenType();
+            tokenStream.get(TokenStreams.TOKEN_TYPE).put(type);
+            switch (type) {
+                case TokenStreams.TOKEN_DIFF:
+                    tokenStream.get(TokenStreams.TOKEN_DIFF).putInt(Integer.parseInt(encodeToken.getRelativeTokenValue()));
+                    break;
+
+                case TokenStreams.TOKEN_DUP:
+                    tokenStream.get(TokenStreams.TOKEN_DUP).putInt(Integer.parseInt(encodeToken.getRelativeTokenValue()));
+                    break;
+
+                case TokenStreams.TOKEN_STRING:
+                    writeString(tokenStream.get(TokenStreams.TOKEN_STRING),encodeToken.getRelativeTokenValue());
+                    break;
+
+                case TokenStreams.TOKEN_CHAR:
+                    tokenStream.get(TokenStreams.TOKEN_CHAR).put(encodeToken.getRelativeTokenValue().getBytes()[0]);
+                    break;
+
+                case TokenStreams.TOKEN_DIGITS:
+                    tokenStream.get(TokenStreams.TOKEN_DIGITS).putInt(Integer.parseInt(encodeToken.getRelativeTokenValue()));
+                    break;
+
+                case TokenStreams.TOKEN_DIGITS0:
+                    tokenStream.get(TokenStreams.TOKEN_DIGITS0).putInt(Integer.parseInt(encodeToken.getRelativeTokenValue()));
+                    tokenStream.get(TokenStreams.TOKEN_DZLEN).put((byte) encodeToken.getRelativeTokenValue().length());
+                    break;
+
+                case TokenStreams.TOKEN_DELTA:
+                    tokenStream.get(TokenStreams.TOKEN_DELTA).put((byte)Integer.parseInt(encodeToken.getRelativeTokenValue()));
+                    break;
+
+                case TokenStreams.TOKEN_DELTA0:
+                    tokenStream.get(TokenStreams.TOKEN_DELTA0).put((byte)Integer.parseInt(encodeToken.getRelativeTokenValue()));
+                    break;
+            }
+        }
+    }
+
+    private static void writeString(final ByteBuffer tokenStreamBuffer, final String val) {
+        byte[] bytes = val.getBytes();
+        tokenStreamBuffer.put(bytes);
+        tokenStreamBuffer.put((byte) 0);
+    }
+
+    public static ByteBuffer tryCompress(final ByteBuffer src, final int useArith) {
+        // compress with different formatFlags
+        // and return the compressed output ByteBuffer with the least number of bytes
+        int bestcompressedByteLength = 1 << 30;
+        ByteBuffer compressedByteBuffer = null;
+        int[] formatFlagsList = {0, 1, 64, 65, 128, 129, 193+8};
+        for (int formatFlags : formatFlagsList) {
+            if ((formatFlags & 1) != 0 && src.remaining() < 100)
+                continue;
+
+            if ((formatFlags & 8) != 0 && (src.remaining() % 4) != 0)
+                continue;
+
+            ByteBuffer tmpByteBuffer = null;
+            try {
+                if (useArith!=0) {
+                    // Encode using Range
+                    RangeEncode rangeEncode = new RangeEncode();
+                    src.rewind();
+                    tmpByteBuffer = rangeEncode.compress(src,new RangeParams(formatFlags));
+
+                } else {
+                    // Encode using RANS
+                    RANSEncode ransEncode = new RANSNx16Encode();
+                    src.rewind();
+                    tmpByteBuffer = ransEncode.compress(src, new RANSNx16Params(formatFlags));
+                }
+            } catch (final Exception ignored) {}
+            if (tmpByteBuffer != null && bestcompressedByteLength > tmpByteBuffer.remaining()) {
+                bestcompressedByteLength = tmpByteBuffer.remaining();
+                compressedByteBuffer = tmpByteBuffer;
+            }
+        }
+        return compressedByteBuffer;
+    }
+
+    protected void serializeByteStreams(
+            final List<ByteBuffer> tokenStream,
+            final int useArith,
+            final ByteBuffer outBuffer) {
+
+        // Compress and serialise tokenStreams
+        for (int tokenType = 0; tokenType <= TokenStreams.TOKEN_END; tokenType++) {
+            if (tokenStream.get(tokenType).remaining() > 0) {
+                outBuffer.put((byte) (tokenType + ((tokenType == 0) ? 128 : 0)));
+                ByteBuffer tempOutByteBuffer = tryCompress(tokenStream.get(tokenType), useArith);
+                CompressionUtils.writeUint7(tempOutByteBuffer.limit(),outBuffer);
+                outBuffer.put(tempOutByteBuffer);
+            }
+        }
+    }
+
+    protected ByteBuffer allocateOutputBuffer(final int inSize) {
+
+        // same as the allocateOutputBuffer in RANS4x8Encode and RANSNx16Encode
+        // TODO: de-duplicate
+        final int compressedSize = (int) (1.05 * inSize + 257 * 257 * 3 + 9);
+        final ByteBuffer outputBuffer = ByteBuffer.allocate(compressedSize);
+        if (outputBuffer.remaining() < compressedSize) {
+            throw new RuntimeException("Failed to allocate sufficient buffer size for Range coder.");
+        }
+        outputBuffer.order(ByteOrder.LITTLE_ENDIAN);
+        return outputBuffer;
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/TokenStreams.java b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/TokenStreams.java
new file mode 100644
index 0000000000..deed459022
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/TokenStreams.java
@@ -0,0 +1,125 @@
+package htsjdk.samtools.cram.compression.nametokenisation;
+
+import htsjdk.samtools.cram.CRAMException;
+import htsjdk.samtools.cram.compression.CompressionUtils;
+import htsjdk.samtools.cram.compression.range.RangeDecode;
+import htsjdk.samtools.cram.compression.rans.RANSDecode;
+import htsjdk.samtools.cram.compression.rans.ransnx16.RANSNx16Decode;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+public class TokenStreams {
+
+    public static final byte TOKEN_TYPE = 0x00;
+    public static final byte TOKEN_STRING  = 0x01;
+    public static final byte TOKEN_CHAR = 0x02;
+    public static final byte TOKEN_DIGITS0 = 0x03;
+    public static final byte TOKEN_DZLEN = 0x04;
+    public static final byte TOKEN_DUP = 0x05;
+    public static final byte TOKEN_DIFF = 0x06;
+    public static final byte TOKEN_DIGITS = 0x07;
+    public static final byte TOKEN_DELTA = 0x08;
+    public static final byte TOKEN_DELTA0 = 0x09;
+    public static final byte TOKEN_MATCH = 0x0A;
+    public static final byte TOKEN_END = 0x0C;
+    public static final int TOTAL_TOKEN_TYPES = 13;
+
+    private static final int NEW_TOKEN_FLAG_MASK = 0x80;
+    private static final int DUP_TOKEN_FLAG_MASK = 0x40;
+    private static final int TYPE_TOKEN_FLAG_MASK = 0x3F;
+
+    private final List<List<ByteBuffer>> tokenStreams;
+
+    public TokenStreams() {
+        tokenStreams = new ArrayList<>(TOTAL_TOKEN_TYPES);
+        for (int i = 0; i < TOTAL_TOKEN_TYPES; i++) {
+            tokenStreams.add(new ArrayList<>());
+        }
+    }
+
+    public TokenStreams(final ByteBuffer inputByteBuffer, final int useArith, final int numNames) {
+        // The outer index corresponds to type of the token
+        // and the inner index corresponds to the position of the token in a name (starting at index 1)
+        // Each element in this list of lists is a Token (ie, a ByteBuffer)
+
+        // TokenStreams[type = TOKEN_TYPE(0x00), pos = 0] contains a ByteBuffer of length = number of names
+        // This ByteBuffer helps determine if each of the names is a TOKEN_DUP or TOKEN_DIFF
+        // when compared with the previous name
+
+        // TokenStreams[type = TOKEN_TYPE(0x00), pos = all except 0]
+        // contains a ByteBuffer of length = number of names
+        // This ByteBuffer helps determine the type of each of the token at the specicfied pos
+
+        this();
+        int tokenPosition = -1;
+        while (inputByteBuffer.hasRemaining()) {
+            final byte tokenTypeFlags = inputByteBuffer.get();
+            final boolean isNewToken = ((tokenTypeFlags & NEW_TOKEN_FLAG_MASK) != 0);
+            final boolean isDupToken = ((tokenTypeFlags & DUP_TOKEN_FLAG_MASK) != 0);
+            final int tokenType = (tokenTypeFlags & TYPE_TOKEN_FLAG_MASK);
+            if (tokenType < 0 || tokenType > TOKEN_END) {
+                throw new CRAMException("Invalid Token tokenType: " + tokenType);
+            }
+            if (isNewToken) {
+                tokenPosition++;
+                if (tokenPosition > 0) {
+                    // If newToken and not the first newToken
+                    // Ensure that the size of tokenStream for each type of token = tokenPosition
+                    // by adding an empty ByteBuffer if needed
+                    for (int i = 0; i < TOTAL_TOKEN_TYPES; i++) {
+                        final List<ByteBuffer> currTokenStream = tokenStreams.get(i);
+                        if (currTokenStream.size() < tokenPosition) {
+                            currTokenStream.add(ByteBuffer.allocate(0));
+                        }
+                        if (currTokenStream.size() < tokenPosition) {
+                            throw new CRAMException("TokenStream is missing Token(s) at Token Type: " + i);
+                        }
+                    }
+                }
+            }
+            if ((isNewToken) && (tokenType != TOKEN_TYPE)) {
+
+                // Spec: if we have a byte stream B5,DIGIT S but no B5,T Y P E
+                // then we assume the contents of B5,T Y P E consist of one DIGITS tokenType
+                // followed by as many MATCH types as are needed.
+                final ByteBuffer typeDataByteBuffer = ByteBuffer.allocate(numNames);
+                for (int i = 0; i < numNames; i++) {
+                    typeDataByteBuffer.put((byte) TOKEN_MATCH);
+                }
+                typeDataByteBuffer.rewind();
+                typeDataByteBuffer.put(0, (byte) tokenType);
+                tokenStreams.get(0).add(typeDataByteBuffer);
+            }
+            if (isDupToken) {
+                final int dupPosition = inputByteBuffer.get() & 0xFF;
+                final int dupType = inputByteBuffer.get() & 0xFF;
+                final ByteBuffer dupTokenStream = tokenStreams.get(dupType).get(dupPosition).duplicate();
+                tokenStreams.get(tokenType).add(tokenPosition,dupTokenStream);
+            } else {
+                final int clen = CompressionUtils.readUint7(inputByteBuffer);
+                final byte[] dataBytes = new byte[clen];
+                inputByteBuffer.get(dataBytes, 0, clen); // offset in the dst byte array
+                final ByteBuffer uncompressedDataByteBuffer;
+                if (useArith != 0) {
+                    RangeDecode rangeDecode = new RangeDecode();
+                    uncompressedDataByteBuffer = rangeDecode.uncompress(ByteBuffer.wrap(dataBytes));
+
+                } else {
+                    RANSDecode ransdecode = new RANSNx16Decode();
+                    uncompressedDataByteBuffer = ransdecode.uncompress(ByteBuffer.wrap(dataBytes));
+                }
+                this.getTokenStreamByType(tokenType).add(tokenPosition,uncompressedDataByteBuffer);
+            }
+        }
+    }
+
+    public List<ByteBuffer> getTokenStreamByType(final int tokenType) {
+        return tokenStreams.get(tokenType);
+    }
+
+    public ByteBuffer getTokenStreamByteBuffer(final int tokenPosition, final int tokenType) {
+        return tokenStreams.get(tokenType).get(tokenPosition);
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/tokens/EncodeToken.java b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/tokens/EncodeToken.java
new file mode 100644
index 0000000000..4e7cb0288a
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/cram/compression/nametokenisation/tokens/EncodeToken.java
@@ -0,0 +1,38 @@
+package htsjdk.samtools.cram.compression.nametokenisation.tokens;
+
+public class EncodeToken {
+
+    private String actualTokenValue;
+    private String relativeTokenValue;
+    private byte tokenType;
+
+    public EncodeToken(String str, String val, byte type) {
+        this.actualTokenValue = str;
+        this.relativeTokenValue = val;
+        this.tokenType = type;
+    }
+
+    public String getActualTokenValue() {
+        return actualTokenValue;
+    }
+
+    public void setActualTokenValue(String actualTokenValue) {
+        this.actualTokenValue = actualTokenValue;
+    }
+
+    public String getRelativeTokenValue() {
+        return relativeTokenValue;
+    }
+
+    public void setRelativeTokenValue(String relativeTokenValue) {
+        this.relativeTokenValue = relativeTokenValue;
+    }
+
+    public byte getTokenType() {
+        return tokenType;
+    }
+
+    public void setTokenType(byte tokenType) {
+        this.tokenType = tokenType;
+    }
+}
\ No newline at end of file
diff --git a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java
index a7d7b21828..f0d7d82911 100644
--- a/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java
+++ b/src/main/java/htsjdk/samtools/cram/compression/range/RangeCoder.java
@@ -11,7 +11,7 @@ public class RangeCoder {
     private boolean carry;
     private int cache;
 
-    protected RangeCoder() {
+    public RangeCoder() {
         // Spec: RangeEncodeStart
         this.low = 0;
         this.range = Constants.MAX_RANGE; // 4 bytes of all 1's
@@ -21,7 +21,7 @@ protected RangeCoder() {
         this.cache = 0;
     }
 
-    protected void rangeDecodeStart(final ByteBuffer inBuffer){
+    public void rangeDecodeStart(final ByteBuffer inBuffer){
         for (int i = 0; i < 5; i++){
             code = (code << 8) + (inBuffer.get() & 0xFF);
         }
diff --git a/src/test/java/htsjdk/samtools/cram/FQZCompInteropTest.java b/src/test/java/htsjdk/samtools/cram/FQZCompInteropTest.java
new file mode 100644
index 0000000000..ab9ad4a517
--- /dev/null
+++ b/src/test/java/htsjdk/samtools/cram/FQZCompInteropTest.java
@@ -0,0 +1,80 @@
+package htsjdk.samtools.cram;
+
+import htsjdk.HtsjdkTest;
+import htsjdk.samtools.cram.compression.CompressionUtils;
+import htsjdk.samtools.cram.compression.fqzcomp.FQZCompDecode;
+import org.apache.commons.compress.utils.IOUtils;
+import org.testng.Assert;
+import org.testng.SkipException;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.nio.file.NoSuchFileException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+
+public class FQZCompInteropTest extends HtsjdkTest {
+
+    public static final String COMPRESSED_FQZCOMP_DIR = "fqzcomp";
+
+    // uses the available compressed interop test files
+    @DataProvider(name = "decodeOnlyTestCases")
+    public Object[][] getDecodeOnlyTestCases() throws IOException {
+
+        // params:
+        // compressed testfile path, uncompressed testfile path,
+        // FQZComp decoder
+        final List<Object[]> testCases = new ArrayList<>();
+        for (Path path : CRAMInteropTestUtils.getInteropCompressedFilePaths(COMPRESSED_FQZCOMP_DIR)) {
+            Object[] objects = new Object[]{
+                    path,
+                    CRAMInteropTestUtils.getUnCompressedFilePath(path),
+                    new FQZCompDecode()
+            };
+            testCases.add(objects);
+        }
+        return testCases.toArray(new Object[][]{});
+    }
+
+    @Test(description = "Test if CRAM Interop Test Data is available")
+    public void testHtsCodecsCorpusIsAvailable() {
+        if (!CRAMInteropTestUtils.isInteropTestDataAvailable()) {
+            throw new SkipException(String.format("CRAM Interop Test Data is not available at %s",
+                    CRAMInteropTestUtils.INTEROP_TEST_FILES_PATH));
+        }
+    }
+
+    @Test (
+            dependsOnMethods = "testHtsCodecsCorpusIsAvailable",
+            dataProvider = "decodeOnlyTestCases",
+            description = "Uncompress the existing compressed file using htsjdk FQZComp and compare it with the original file.")
+    public void testDecodeOnly(
+            final Path compressedFilePath,
+            final Path uncompressedInteropPath,
+            final FQZCompDecode fqzcompDecode) throws IOException {
+        try (final InputStream uncompressedInteropStream = Files.newInputStream(uncompressedInteropPath);
+             final InputStream preCompressedInteropStream = Files.newInputStream(compressedFilePath)
+        ) {
+            // preprocess the uncompressed data (to match what the htscodecs-library test harness does)
+            // by filtering out the embedded newlines, and then round trip through FQZComp codec
+            // and compare the results
+            final ByteBuffer uncompressedInteropBytes = CompressionUtils.wrap(CRAMInteropTestUtils.filterEmbeddedNewlines(IOUtils.toByteArray(uncompressedInteropStream)));
+            final ByteBuffer preCompressedInteropBytes = CompressionUtils.wrap(IOUtils.toByteArray(preCompressedInteropStream));
+
+            // Use htsjdk to uncompress the precompressed file from htscodecs repo
+            final ByteBuffer uncompressedHtsjdkBytes = fqzcompDecode.uncompress(preCompressedInteropBytes);
+
+            // Compare the htsjdk uncompressed bytes with the original input file from htscodecs repo
+            Assert.assertEquals(uncompressedHtsjdkBytes, uncompressedInteropBytes);
+        } catch (final NoSuchFileException ex){
+            throw new SkipException("Skipping testDecodeOnly as either input file " +
+                    "or precompressed file is missing.", ex);
+        }
+    }
+
+}
\ No newline at end of file
diff --git a/src/test/java/htsjdk/samtools/cram/NameTokenizationInteropTest.java b/src/test/java/htsjdk/samtools/cram/NameTokenizationInteropTest.java
new file mode 100644
index 0000000000..8a4aa0e22b
--- /dev/null
+++ b/src/test/java/htsjdk/samtools/cram/NameTokenizationInteropTest.java
@@ -0,0 +1,134 @@
+package htsjdk.samtools.cram;
+
+import htsjdk.HtsjdkTest;
+import htsjdk.samtools.cram.compression.nametokenisation.NameTokenisationDecode;
+import htsjdk.samtools.cram.compression.nametokenisation.NameTokenisationEncode;
+import org.apache.commons.compress.utils.IOUtils;
+import org.testng.Assert;
+import org.testng.SkipException;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.NoSuchFileException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+
+public class NameTokenizationInteropTest extends HtsjdkTest {
+    public static final String COMPRESSED_TOK_DIR = "tok3";
+
+    @DataProvider(name = "allNameTokenizationFiles")
+    public Object[][] getAllNameTokenizationCodecsForRoundTrip() throws IOException {
+
+        // params:
+        // compressed testfile path, uncompressed testfile path, NameTokenization encoder, NameTokenization decoder
+        final List<Object[]> testCases = new ArrayList<>();
+        for (Path path : getInteropNameTokenizationCompressedFiles()) {
+                Object[] objects = new Object[]{
+                        path,
+                        getNameTokenizationUnCompressedFilePath(path),
+                        new NameTokenisationEncode(),
+                        new NameTokenisationDecode()
+                };
+                testCases.add(objects);
+        }
+        return testCases.toArray(new Object[][]{});
+    }
+
+    @Test(description = "Test if CRAM Interop Test Data is available")
+    public void testGetHTSCodecsCorpus() {
+        if (!CRAMInteropTestUtils.isInteropTestDataAvailable()) {
+            throw new SkipException(String.format("CRAM Interop Test Data is not available at %s",
+                    CRAMInteropTestUtils.INTEROP_TEST_FILES_PATH));
+        }
+    }
+
+    @Test (
+            dependsOnMethods = "testGetHTSCodecsCorpus",
+            dataProvider = "allNameTokenizationFiles",
+            description = "Roundtrip using htsjdk NameTokenization Codec. Compare the output with the original file" )
+    public void testRangeRoundTrip(
+            final Path precompressedFilePath,
+            final Path uncompressedFilePath,
+            final NameTokenisationEncode nameTokenisationEncode,
+            final NameTokenisationDecode nameTokenisationDecode) throws IOException {
+        try(final InputStream preCompressedInteropStream = Files.newInputStream(precompressedFilePath);
+            final InputStream unCompressedInteropStream = Files.newInputStream(uncompressedFilePath)){
+            final ByteBuffer preCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInteropStream));
+            final ByteBuffer unCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(unCompressedInteropStream));
+            ByteBuffer compressedHtsjdkBytes = nameTokenisationEncode.compress(unCompressedInteropBytes);
+            String decompressedHtsjdkString = nameTokenisationDecode.uncompress(compressedHtsjdkBytes);
+            ByteBuffer decompressedHtsjdkBytes = StandardCharsets.UTF_8.encode(decompressedHtsjdkString);
+            unCompressedInteropBytes.rewind();
+            Assert.assertEquals(decompressedHtsjdkBytes, unCompressedInteropBytes);
+        } catch (final NoSuchFileException ex){
+            throw new SkipException("Skipping testRangeRoundTrip as either the input precompressed file " +
+                    "or the uncompressed file is missing.", ex);
+        }
+    }
+
+
+
+    @Test (
+            dependsOnMethods = "testGetHTSCodecsCorpus",
+            dataProvider = "allNameTokenizationFiles",
+            description = "Compress the original file using htsjdk NameTokenization Codec and compare it with the existing compressed file. " +
+                    "Uncompress the existing compressed file using htsjdk NameTokenization Codec and compare it with the original file.")
+    public void testtNameTokenizationPreCompressed(
+            final Path compressedFilePath,
+            final Path uncompressedFilePath,
+            final NameTokenisationEncode unsusednameTokenisationEncode,
+            final NameTokenisationDecode nameTokenisationDecode) throws IOException {
+        try(final InputStream preCompressedInteropStream = Files.newInputStream(compressedFilePath);
+            final InputStream unCompressedInteropStream = Files.newInputStream(uncompressedFilePath)){
+            final ByteBuffer preCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(preCompressedInteropStream));
+            final ByteBuffer unCompressedInteropBytes = ByteBuffer.wrap(IOUtils.toByteArray(unCompressedInteropStream));
+
+            // Use htsjdk to uncompress the precompressed file from htscodecs repo
+            final String uncompressedHtsjdkString = nameTokenisationDecode.uncompress(preCompressedInteropBytes);
+            ByteBuffer uncompressedHtsjdkBytes = StandardCharsets.UTF_8.encode(uncompressedHtsjdkString);
+
+            // Compare the htsjdk uncompressed bytes with the original input file from htscodecs repo
+            Assert.assertEquals(uncompressedHtsjdkBytes, unCompressedInteropBytes);
+        } catch (final NoSuchFileException ex){
+            throw new SkipException("Skipping testNameTokenizationPrecompressed as either input file " +
+                    "or precompressed file is missing.", ex);
+        }
+
+    }
+
+    // return a list of all NameTokenization encoded test data files in the htscodecs/tests/names/tok3 directory
+    private List<Path> getInteropNameTokenizationCompressedFiles() throws IOException {
+        final List<Path> paths = new ArrayList<>();
+        Files.newDirectoryStream(
+                CRAMInteropTestUtils.getInteropTestDataLocation().resolve("names/"+COMPRESSED_TOK_DIR),
+                        path -> Files.isRegularFile(path))
+                .forEach(path -> paths.add(path));
+        return paths;
+    }
+
+    // Given a compressed test file path, return the corresponding uncompressed file path
+    public static final Path getNameTokenizationUnCompressedFilePath(final Path compressedInteropPath) {
+        String uncompressedFileName = getUncompressedFileName(compressedInteropPath.getFileName().toString());
+        // Example compressedInteropPath: ../names/tok3/01.names.1 => unCompressedFilePath: ../names/01.names
+        return compressedInteropPath.getParent().getParent().resolve(uncompressedFileName);
+    }
+
+    public static final String getUncompressedFileName(final String compressedFileName) {
+        // Returns original filename from compressed file name
+        int lastDotIndex = compressedFileName.lastIndexOf(".");
+        if (lastDotIndex >= 0) {
+            return compressedFileName.substring(0, lastDotIndex);
+        } else {
+            throw new CRAMException("The format of the compressed File Name is not as expected. " +
+                    "The name of the compressed file should contain a period followed by a number that" +
+                    "indicates type of compression. Actual compressed file name = "+ compressedFileName);
+        }
+    }
+
+}
\ No newline at end of file
diff --git a/src/test/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationTest.java b/src/test/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationTest.java
new file mode 100644
index 0000000000..29e487f64a
--- /dev/null
+++ b/src/test/java/htsjdk/samtools/cram/compression/nametokenisation/NameTokenisationTest.java
@@ -0,0 +1,100 @@
+package htsjdk.samtools.cram.compression.nametokenisation;
+
+import htsjdk.HtsjdkTest;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+public class NameTokenisationTest extends HtsjdkTest {
+
+    private static class TestDataEnvelope {
+        public final byte[] testArray;
+        public TestDataEnvelope(final byte[] testdata) {
+            this.testArray = testdata;
+        }
+        public String toString() {
+            return String.format("Array of size %d", testArray.length);
+        }
+    }
+
+    @DataProvider(name="nameTokenisation")
+    public Object[][] getNameTokenisationTestData() {
+
+        List<String> readNamesList = new ArrayList<>();
+        readNamesList.add("");
+
+        // a subset of read names from
+        // src/test/resources/htsjdk/samtools/cram/CEUTrio.HiSeq.WGS.b37.NA12878.20.first.8000.bam
+        readNamesList.add("20FUKAAXX100202:6:27:4968:125377\n" +
+                "20FUKAAXX100202:6:27:4986:125375\n" +
+                "20FUKAAXX100202:5:62:8987:1929\n" +
+                "20GAVAAXX100126:1:28:4295:139802\n" +
+                "20FUKAAXX100202:4:23:8516:117251\n" +
+                "20FUKAAXX100202:6:23:6442:37469\n" +
+                "20FUKAAXX100202:8:24:10477:24196\n" +
+                "20GAVAAXX100126:8:63:5797:158250\n" +
+                "20FUKAAXX100202:1:45:12798:104365\n" +
+                "20GAVAAXX100126:3:23:6419:199245\n" +
+                "20FUKAAXX100202:8:48:6663:137967\n" +
+                "20FUKAAXX100202:6:68:17726:162601");
+
+        // a subset of read names from
+        // src/test/resources/htsjdk/samtools/longreads/NA12878.m64020_190210_035026.chr21.5011316.5411316.unmapped.bam
+        readNamesList.add("m64020_190210_035026/44368402/ccs\n");
+        readNamesList.add("m64020_190210_035026/44368402/ccs");
+        readNamesList.add("m64020_190210_035026/44368402/ccs\n" +
+                "m64020_190210_035026/124127126/ccs\n" +
+                "m64020_190210_035026/4981311/ccs\n" +
+                "m64020_190210_035026/80022195/ccs\n" +
+                "m64020_190210_035026/17762104/ccs\n" +
+                "m64020_190210_035026/62981096/ccs\n" +
+                "m64020_190210_035026/86968803/ccs\n" +
+                "m64020_190210_035026/46400955/ccs\n" +
+                "m64020_190210_035026/137561592/ccs\n" +
+                "m64020_190210_035026/52233471/ccs\n" +
+                "m64020_190210_035026/97127189/ccs\n" +
+                "m64020_190210_035026/115278035/ccs\n" +
+                "m64020_190210_035026/155256324/ccs\n" +
+                "m64020_190210_035026/163644151/ccs\n" +
+                "m64020_190210_035026/162728365/ccs\n" +
+                "m64020_190210_035026/160238116/ccs\n" +
+                "m64020_190210_035026/147719983/ccs\n" +
+                "m64020_190210_035026/60883331/ccs\n" +
+                "m64020_190210_035026/1116165/ccs\n" +
+                "m64020_190210_035026/75893199/ccs");
+
+        // source: https://gatk.broadinstitute.org/hc/en-us/articles/360035890671-Read-groups
+        readNamesList.add(
+                "H0164ALXX140820:2:1101:10003:23460\n" +
+                "H0164ALXX140820:2:1101:15118:25288");
+
+        final List<Object[]> testCases = new ArrayList<>();
+        for (String readName : readNamesList) {
+            Object[] objects = new Object[]{
+                    new NameTokenisationEncode(),
+                    new NameTokenisationDecode(),
+                    new TestDataEnvelope(readName.getBytes())};
+            testCases.add(objects);
+        }
+        return testCases.toArray(new Object[][]{});
+    }
+
+    @Test(dataProvider = "nameTokenisation")
+    public void testRoundTrip(
+            final NameTokenisationEncode nameTokenisationEncode,
+            final NameTokenisationDecode nameTokenisationDecode,
+            final TestDataEnvelope td) {
+        ByteBuffer uncompressedBuffer =  ByteBuffer.wrap(td.testArray);
+        ByteBuffer compressedBuffer = nameTokenisationEncode.compress(uncompressedBuffer, 0);
+        String decompressedNames = nameTokenisationDecode.uncompress(compressedBuffer);
+        ByteBuffer decompressedNamesBuffer = StandardCharsets.UTF_8.encode(decompressedNames);
+        uncompressedBuffer.rewind();
+        Assert.assertEquals(decompressedNamesBuffer, uncompressedBuffer);
+    }
+
+}
\ No newline at end of file