diff --git a/scls-format/scls-format.cabal b/scls-format/scls-format.cabal index b44fdbc..64173fb 100644 --- a/scls-format/scls-format.cabal +++ b/scls-format/scls-format.cabal @@ -29,6 +29,7 @@ library Cardano.SCLS.Internal.Record.Hdr Cardano.SCLS.Internal.Record.Internal.Class Cardano.SCLS.Internal.Record.Manifest + Cardano.SCLS.Internal.Serializer.Builder.InMemory Cardano.SCLS.Internal.Serializer.ChunksBuilder.InMemory Cardano.SCLS.Internal.Serializer.External.Impl Cardano.SCLS.Internal.Serializer.HasKey diff --git a/scls-format/src/Cardano/SCLS/Internal/Entry.hs b/scls-format/src/Cardano/SCLS/Internal/Entry.hs index 145a58d..8fde9b6 100644 --- a/scls-format/src/Cardano/SCLS/Internal/Entry.hs +++ b/scls-format/src/Cardano/SCLS/Internal/Entry.hs @@ -7,6 +7,7 @@ module Cardano.SCLS.Internal.Entry ( ) where import Cardano.SCLS.Internal.Serializer.HasKey +import Cardano.SCLS.Internal.Serializer.MemPack (MemPackHeaderOffset (..)) import Cardano.Types.ByteOrdered (BigEndian (..)) import Data.MemPack import Data.MemPack.Buffer @@ -41,6 +42,9 @@ instance (Typeable k, IsKey k, MemPack v, Typeable v) => MemPack (ChunkEntry k v v <- unpackM return (ChunkEntry k v) +instance (Typeable k, IsKey k, Typeable v, MemPack v) => MemPackHeaderOffset (ChunkEntry k v) where + headerSizeOffset = 4 + instance (Eq k, Eq v) => Eq (ChunkEntry k v) where (ChunkEntry k1 v1) == (ChunkEntry k2 v2) = k1 == k2 && v1 == v2 diff --git a/scls-format/src/Cardano/SCLS/Internal/Serializer/Builder/InMemory.hs b/scls-format/src/Cardano/SCLS/Internal/Serializer/Builder/InMemory.hs new file mode 100644 index 0000000..40bb273 --- /dev/null +++ b/scls-format/src/Cardano/SCLS/Internal/Serializer/Builder/InMemory.hs @@ -0,0 +1,185 @@ +{-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TypeFamilies #-} + +{- | +Implementation of the state machine that fills current chunk in memory. + +It manages proper filling of the buffers and emitting the values when +the next item can't be added. + +Current implementation expects the incoming values in already sorted order. + +Implementation is done in the way so it would be possible to use it with +any existing stream and effect system as long as they could carry a state. +-} +module Cardano.SCLS.Internal.Serializer.Builder.InMemory ( + BuilderItem (..), + mkMachine, + Command (..), + BuilderMachine (..), +) where + +import Cardano.SCLS.Internal.Hash +import Cardano.SCLS.Internal.Serializer.MemPack +import Control.Monad.Primitive +import Crypto.Hash (Blake2b_224 (Blake2b_224)) +import Crypto.Hash.MerkleTree.Incremental qualified as MT +import Data.MemPack + +import Data.Kind (Type) +import Data.Primitive.ByteArray +import Data.Typeable +import Foreign.Ptr +import Unsafe.Coerce (unsafeCoerce) + +-- | Typeclass for items that can be emitted by the builder state machine. +class BuilderItem item where + -- | Type of parameters needed to encode entries and build the item + type Parameters item :: Type + + -- | Get the data payload of the item + bItemData :: item -> ByteArray + + -- | Get the number of entries contained in the item + bItemEntriesCount :: item -> Int + + -- | Construct an item from build parameters, data and entry count + bMkItem :: Parameters item -> ByteArray -> Int -> item + + -- | Encode an entry for the item using the provided parameters + bEncodeEntry :: (MemPack a, Typeable a) => Parameters item -> a -> a + +-- | Command for the state machine +data Command item type_ where + -- | Append a new item to the buffer. + Append :: (MemPack u, Typeable u, MemPackHeaderOffset u) => u -> Command item (BuilderMachine item, [item]) + {- | Finalize building of the buffer. Calling this command does not + + It's up to the implementation if the state machine can be used + after interpreting this command. + -} + Finalize :: Command item (Digest, Maybe item) + +{- | State machine for building items in memory. + +Basically it's an interpreter for the 'Command' type that is implemented +the way that it can be inserted into the different streaming pipelines +or effect libraries +-} +newtype BuilderMachine item = BuilderMachine + { interpretCommand :: forall result. Command item result -> IO result + } + +-- | Create an instance of the state machine. +mkMachine :: + forall item. + (BuilderItem item) => + -- | Buffer size in bytes + Int -> + Parameters item -> + IO (BuilderMachine item) +mkMachine bufferSize params = do + -- We perform copying when we emit data outside of the state machine. + -- So this buffer is reused for all the items, as a result we copy + -- + -- We allocate pinned memory because we pass it to the digest code + -- without copying by passing raw pointer. + storage <- newPinnedByteArray bufferSize + + -- Use fix? We love fixed point combinators do we not? + let machine (!entriesCount :: Int) (!offset :: Int) !merkleTreeState = + BuilderMachine + { interpretCommand = \case + Finalize -> do + let final = Digest $ MT.merkleRootHash $ MT.finalize merkleTreeState + if offset == 0 -- no new data, nothing to emit + then + pure (final, Nothing) + else do + frozenData <- freezeByteArrayPinned storage 0 offset + pure (final, Just (bMkItem params frozenData entriesCount)) + Append @a input -> do + let entry = Entry $ bEncodeEntry @item params input + let l = packedByteCount entry + if offset + l <= bufferSize -- if we fit the current buffer we just need to write data and continue + then do + (merkleTreeState', newOffset) <- + unsafeAppendEntryToBuffer merkleTreeState storage offset entry + pure (machine (entriesCount + 1) newOffset merkleTreeState', []) + else do + -- We have no space in the current buffer, so we need to emit it first + frozenBuffer <- freezeByteArrayPinned storage 0 offset + if l > bufferSize + then do + let !tmpBuffer = pack entry + !merkleTreeState' = MT.add merkleTreeState (uncheckedByteArrayEntryContents @a tmpBuffer) + return + ( machine 0 0 merkleTreeState' + , mkDataToEmit [(params, frozenBuffer, entriesCount), (params, tmpBuffer, 1)] + ) + else do + (merkleTreeState', newOffset) <- + unsafeAppendEntryToBuffer merkleTreeState storage 0 entry + pure + ( machine 1 newOffset merkleTreeState' + , mkDataToEmit [(params, frozenBuffer, entriesCount)] + ) + } + return $! machine 0 0 (MT.empty Blake2b_224) + +{- | Freeze a bytearray to the pinned immutable bytearray by copying its contents. + +It's safe to use the source bytearray after this operation. +-} +freezeByteArrayPinned :: (PrimMonad m) => MutableByteArray (PrimState m) -> Int -> Int -> m ByteArray +freezeByteArrayPinned !src !off !len = do + dst <- newPinnedByteArray len + copyMutableByteArray dst 0 src off len + unsafeFreezeByteArray dst + +unsafeAppendEntryToBuffer :: forall u. (MemPack u, Typeable u, MemPackHeaderOffset u) => MT.MerkleTreeState Blake2b_224 -> MutableByteArray (PrimState IO) -> Int -> Entry u -> IO (MT.MerkleTreeState Blake2b_224, Int) +unsafeAppendEntryToBuffer !merkleTreeState !storage !offset u = do + newOffset <- unsafeAppendToBuffer storage offset u + let l = newOffset - offset + merkleTreeState' <- withMutableByteArrayContents storage $ \ptr -> do + let csb = CStringLenBuffer (ptr `plusPtr` (offset + headerSizeOffset @u), l - headerSizeOffset @u) + return $! MT.add merkleTreeState csb + return (merkleTreeState', newOffset) + +{- | Helper to get access to the entry contents. +This method should be used on the pinned 'ByteArray' only, but the function does +not enforce this. +-} +uncheckedByteArrayEntryContents :: forall a. (MemPackHeaderOffset a) => ByteArray -> CStringLenBuffer +uncheckedByteArrayEntryContents !buffer = CStringLenBuffer (byteArrayContents buffer `plusPtr` (headerSizeOffset @a), sizeofByteArray buffer - (headerSizeOffset @a)) + +{- | Unsafe helper that we need because MemPack interface only allows ST, and +no other PrimMonad. + +There is unsafe prefix, because this function uses 'unsafeCoerce' internally, +but it ensures everything to make it safe to use. + +This functions prepends the packed values with its lengths. +-} +unsafeAppendToBuffer :: (MemPack u) => MutableByteArray (PrimState IO) -> Int -> u -> IO Int +unsafeAppendToBuffer !storage !offset u = stToPrim $ do + let uInST = unsafeCoerce storage + (_, offset') <- + runStateT (runPack (packM u) uInST) offset + pure offset' + +{- | Helper to create the list of items to emit from the list of + (data, count) tuples. + + This function filters out items with 0 entries. +-} +mkDataToEmit :: (BuilderItem item) => [(Parameters item, ByteArray, Int)] -> [item] +mkDataToEmit = mkDataToEmit' [] + where + mkDataToEmit' acc [] = reverse acc + mkDataToEmit' acc ((_, _, 0) : xs) = mkDataToEmit' acc xs + mkDataToEmit' acc ((params, u, count) : xs) = + mkDataToEmit' (bMkItem params u count : acc) xs diff --git a/scls-format/src/Cardano/SCLS/Internal/Serializer/ChunksBuilder/InMemory.hs b/scls-format/src/Cardano/SCLS/Internal/Serializer/ChunksBuilder/InMemory.hs index 6ee7cc0..e20d80a 100644 --- a/scls-format/src/Cardano/SCLS/Internal/Serializer/ChunksBuilder/InMemory.hs +++ b/scls-format/src/Cardano/SCLS/Internal/Serializer/ChunksBuilder/InMemory.hs @@ -1,6 +1,7 @@ {-# LANGUAGE GADTs #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TypeFamilies #-} {- | Implementation of the state machine that fills current chunk in memory. @@ -15,159 +16,34 @@ any existing stream and effect system as long as they could carry a state. -} module Cardano.SCLS.Internal.Serializer.ChunksBuilder.InMemory ( mkMachine, - Command (..), - BuilderMachine (..), + B.Command (..), + BuilderMachine, ChunkItem (..), + B.interpretCommand, ) where -import Cardano.SCLS.Internal.Hash import Cardano.SCLS.Internal.Record.Chunk -import Cardano.SCLS.Internal.Serializer.MemPack -import Control.Monad.Primitive -import Crypto.Hash (Blake2b_224 (Blake2b_224)) -import Crypto.Hash.MerkleTree.Incremental qualified as MT -import Data.MemPack +import Cardano.SCLS.Internal.Serializer.Builder.InMemory qualified as B +import Cardano.SCLS.Internal.Serializer.Builder.InMemory (BuilderItem (Parameters)) import Data.Primitive.ByteArray -import Data.Typeable -import Foreign.Ptr -import Unsafe.Coerce (unsafeCoerce) data ChunkItem = ChunkItem - { chunkItemFormat :: !ChunkFormat - , chunkItemData :: !ByteArray - , chunkItemEntriesCount :: !Int + { chunkItemFormat :: ChunkFormat + , chunkItemData :: ByteArray + , chunkItemEntriesCount :: Int } --- | Command for the state machine -data Command type_ where - -- | Append a new item to the buffer. - Append :: (MemPack u, Typeable u) => u -> Command (BuilderMachine, [ChunkItem]) - {- | Finalize building of the buffer. Calling this command does not +instance B.BuilderItem ChunkItem where + type Parameters ChunkItem = ChunkFormat + bItemData = chunkItemData + bItemEntriesCount = chunkItemEntriesCount + bMkItem chunkItemFormat data_ count = ChunkItem{chunkItemData = data_, chunkItemEntriesCount = count, chunkItemFormat} + bEncodeEntry ChunkFormatRaw entry = entry + bEncodeEntry ChunkFormatZstd _entry = error "Chunk format zstd is not implemented yet" + bEncodeEntry ChunkFormatZstdE _entry = error "Chunk format zstd-e is not implemented yet" - It's up to the implementation if the state machine can be used - after interpreting this command. - -} - Finalize :: Command (Digest, Maybe ChunkItem) +type BuilderMachine = B.BuilderMachine ChunkItem -{- | State machine for building chunks in memory. - -Basically it's an interpreter for the 'Command' type that is implemented -the way that it can be inserted into the different streaming pipelines -or effect libraries --} -newtype BuilderMachine = BuilderMachine - { interpretCommand :: forall result. Command result -> IO result - } - --- | Create an instance of the state machine. -mkMachine :: - -- | Buffer size in bytes - Int -> - -- | Encoding format in chunks - ChunkFormat -> - IO BuilderMachine -mkMachine _ ChunkFormatZstd = error "Chunk format zstd is not implemented yet" -mkMachine _ ChunkFormatZstdE = error "Chunk format zstd-e is not implemented yet" -mkMachine bufferSize format@ChunkFormatRaw = do - -- We perform copying when we emit data outside of the state machine. - -- So this buffer is reused for all the chunks, as a result we copy - -- - -- We allocate pinned memory because we pass it to the digest code - -- without copying by passing raw pointer. - storage <- newPinnedByteArray bufferSize - - -- Use fix? We love fixed point combinators do we not? - let machine (!entriesCount :: Int) (!offset :: Int) !merkleTreeState = - BuilderMachine - { interpretCommand = \case - Finalize -> do - let final = Digest $ MT.merkleRootHash $ MT.finalize merkleTreeState - if offset == 0 -- no new data, nothing to emit - then - pure (final, Nothing) - else do - frozenData <- freezeByteArrayPinned storage 0 offset - pure (final, Just ChunkItem{chunkItemEntriesCount = entriesCount, chunkItemFormat = format, chunkItemData = frozenData}) - Append input -> do - let entry = Entry input - let l = packedByteCount entry - if offset + l <= bufferSize -- if we fit the current buffer we just need to write data and continue - then do - (merkleTreeState', newOffset) <- - unsafeAppendEntryToBuffer merkleTreeState storage offset entry - pure (machine (entriesCount + 1) newOffset merkleTreeState', []) - else do - -- We have no space in the current buffer, so we need to emit it first - frozenBuffer <- freezeByteArrayPinned storage 0 offset - if l > bufferSize - then do - let !tmpBuffer = pack entry - !merkleTreeState' = MT.add merkleTreeState (uncheckedByteArrayEntryContents tmpBuffer) - return - ( machine 0 0 merkleTreeState' - , mkChunksToEmit [(format, frozenBuffer, entriesCount), (format, tmpBuffer, 1)] - ) - else do - (merkleTreeState', newOffset) <- - unsafeAppendEntryToBuffer merkleTreeState storage 0 entry - pure - ( machine 1 newOffset merkleTreeState' - , mkChunksToEmit [(format, frozenBuffer, entriesCount)] - ) - } - return $! machine 0 0 (MT.empty Blake2b_224) - -{- | Freeze a bytearray to the pinned immutable bytearray by copying its contents. - -It's safe to use the source bytearray after this operation. --} -freezeByteArrayPinned :: (PrimMonad m) => MutableByteArray (PrimState m) -> Int -> Int -> m ByteArray -freezeByteArrayPinned !src !off !len = do - dst <- newPinnedByteArray len - copyMutableByteArray dst 0 src off len - unsafeFreezeByteArray dst - -unsafeAppendEntryToBuffer :: (MemPack u, Typeable u) => MT.MerkleTreeState Blake2b_224 -> MutableByteArray (PrimState IO) -> Int -> Entry u -> IO (MT.MerkleTreeState Blake2b_224, Int) -unsafeAppendEntryToBuffer !merkleTreeState !storage !offset u = do - newOffset <- unsafeAppendToBuffer storage offset u - let l = newOffset - offset - merkleTreeState' <- withMutableByteArrayContents storage $ \ptr -> do - let csb = CStringLenBuffer (ptr `plusPtr` (offset + 4), l - 4) - return $! MT.add merkleTreeState csb - return (merkleTreeState', newOffset) - -{- | Helper to get access to the entry contents. -This method should be used on the pinned 'ByteArray' only, but the function does -not enforce this. --} -uncheckedByteArrayEntryContents :: ByteArray -> CStringLenBuffer -uncheckedByteArrayEntryContents !buffer = CStringLenBuffer (byteArrayContents buffer `plusPtr` 4, sizeofByteArray buffer - 4) - -{- | Unsafe helper that we need because MemPack interface only allows ST, and -no other PrimMonad. - -There is unsafe prefix, because this function uses 'unsafeCoerce' internally, -but it ensures everything to make it safe to use. - -This functions prepends the packed values with its lengths. --} -unsafeAppendToBuffer :: (MemPack u) => MutableByteArray (PrimState IO) -> Int -> u -> IO Int -unsafeAppendToBuffer !storage !offset u = stToPrim $ do - let uInST = unsafeCoerce storage - (_, offset') <- - runStateT (runPack (packM u) uInST) offset - pure offset' - -{- | Helper to create the list of chunks to emit from the list of - (format, data, count) tuples. - - This function filters out the chunks with 0 entries. --} -mkChunksToEmit :: [(ChunkFormat, ByteArray, Int)] -> [ChunkItem] -mkChunksToEmit = mkChunksToEmit' [] - where - mkChunksToEmit' acc [] = reverse acc - mkChunksToEmit' acc ((_, _, 0) : xs) = mkChunksToEmit' acc xs - mkChunksToEmit' acc ((format, u, count) : xs) = - mkChunksToEmit' (ChunkItem{chunkItemFormat = format, chunkItemData = u, chunkItemEntriesCount = count} : acc) xs +mkMachine :: Int -> B.Parameters ChunkItem -> IO BuilderMachine +mkMachine = B.mkMachine diff --git a/scls-format/src/Cardano/SCLS/Internal/Serializer/External/Impl.hs b/scls-format/src/Cardano/SCLS/Internal/Serializer/External/Impl.hs index 87e47f8..9612e23 100644 --- a/scls-format/src/Cardano/SCLS/Internal/Serializer/External/Impl.hs +++ b/scls-format/src/Cardano/SCLS/Internal/Serializer/External/Impl.hs @@ -7,7 +7,7 @@ module Cardano.SCLS.Internal.Serializer.External.Impl ( ) where import Cardano.SCLS.Internal.Record.Hdr -import Cardano.SCLS.Internal.Serializer.MemPack (Entry (..), RawBytes (..)) +import Cardano.SCLS.Internal.Serializer.MemPack import Cardano.SCLS.Internal.Serializer.Reference.Dump import Cardano.Types.Namespace (Namespace) import Cardano.Types.Namespace qualified as Namespace @@ -45,7 +45,7 @@ import VectorBuilder.Builder qualified as Builder import VectorBuilder.MVector qualified as Builder serialize :: - (MemPack a, Ord a, Typeable a, HasKey a) => + (MemPack a, Ord a, Typeable a, HasKey a, MemPackHeaderOffset a) => -- | path to resulting file FilePath -> -- | Network identifier diff --git a/scls-format/src/Cardano/SCLS/Internal/Serializer/MemPack.hs b/scls-format/src/Cardano/SCLS/Internal/Serializer/MemPack.hs index 1acbd27..3518649 100644 --- a/scls-format/src/Cardano/SCLS/Internal/Serializer/MemPack.hs +++ b/scls-format/src/Cardano/SCLS/Internal/Serializer/MemPack.hs @@ -9,6 +9,7 @@ module Cardano.SCLS.Internal.Serializer.MemPack ( RawBytes (..), CStringLenBuffer (..), isolate, + MemPackHeaderOffset (..), ) where import Cardano.SCLS.Internal.Serializer.HasKey @@ -29,6 +30,10 @@ import Foreign.Ptr import GHC.Stack (HasCallStack) import System.ByteOrder +-- | Typeclass for types that have a fixed header offset when serialized. +class (MemPack a) => MemPackHeaderOffset a where + headerSizeOffset :: Int + {- | Wrapper that allows to store raw bytes without any prefix. It's likely that this type will be removed in the future as @@ -52,6 +57,9 @@ instance MemPack (RawBytes) where return (bufferByteCount b - s) RawBytes <$> unpackByteStringM len +instance MemPackHeaderOffset RawBytes where + headerSizeOffset = 4 + {- | Entry wrapper for other mempack values, that explicitly stores its length as a big-endian 'Word32' before the value itself diff --git a/scls-format/src/Cardano/SCLS/Internal/Serializer/Reference/Dump.hs b/scls-format/src/Cardano/SCLS/Internal/Serializer/Reference/Dump.hs index 9bea184..3adce9a 100644 --- a/scls-format/src/Cardano/SCLS/Internal/Serializer/Reference/Dump.hs +++ b/scls-format/src/Cardano/SCLS/Internal/Serializer/Reference/Dump.hs @@ -22,6 +22,7 @@ import Cardano.SCLS.Internal.Record.Chunk import Cardano.SCLS.Internal.Record.Hdr import Cardano.SCLS.Internal.Record.Manifest import Cardano.SCLS.Internal.Serializer.ChunksBuilder.InMemory +import Cardano.SCLS.Internal.Serializer.MemPack import Crypto.Hash.MerkleTree.Incremental qualified as MT import Data.Foldable qualified as F @@ -123,7 +124,7 @@ withChunkFormat format SerializationPlan{..} = -- This is reference implementation and it does not yet care about -- proper working with the hardware, i.e. flushing and calling fsync -- at the right moments. -dumpToHandle :: (HasKey a, MemPack a, Typeable a) => Handle -> Hdr -> SortedSerializationPlan a -> IO () +dumpToHandle :: (HasKey a, MemPack a, Typeable a, MemPackHeaderOffset a) => Handle -> Hdr -> SortedSerializationPlan a -> IO () dumpToHandle handle hdr plan = do let SerializationPlan{..} = getSerializationPlan plan _ <- hWriteFrame handle hdr @@ -194,7 +195,7 @@ dedup s0 = initialize s0 constructChunks_ :: forall a r. - (MemPack a, Typeable a) => + (MemPack a, Typeable a, MemPackHeaderOffset a) => ChunkFormat -> Stream (Of a) IO r -> Stream (Of ChunkItem) IO (Digest) diff --git a/scls-format/src/Cardano/SCLS/Internal/Serializer/Reference/Impl.hs b/scls-format/src/Cardano/SCLS/Internal/Serializer/Reference/Impl.hs index 670de33..9423dd1 100644 --- a/scls-format/src/Cardano/SCLS/Internal/Serializer/Reference/Impl.hs +++ b/scls-format/src/Cardano/SCLS/Internal/Serializer/Reference/Impl.hs @@ -7,6 +7,7 @@ module Cardano.SCLS.Internal.Serializer.Reference.Impl ( ) where import Cardano.SCLS.Internal.Record.Hdr +import Cardano.SCLS.Internal.Serializer.MemPack import Cardano.SCLS.Internal.Serializer.Reference.Dump import Cardano.Types.Namespace (Namespace (..)) import Cardano.Types.Network @@ -29,7 +30,7 @@ import VectorBuilder.MVector qualified as Builder At this point it accepts values from one namespace only. -} serialize :: - (MemPack a, Ord a, Typeable a, HasKey a) => + (MemPack a, Ord a, Typeable a, HasKey a, MemPackHeaderOffset a) => -- | path to resulting file FilePath -> -- | Network identifier diff --git a/scls-format/test/ChunksBuilderSpec.hs b/scls-format/test/ChunksBuilderSpec.hs index f16729d..9620b7d 100644 --- a/scls-format/test/ChunksBuilderSpec.hs +++ b/scls-format/test/ChunksBuilderSpec.hs @@ -14,6 +14,9 @@ import Test.Hspec.Expectations.Contrib import Test.Hspec.QuickCheck import Test.QuickCheck +mkMachine' :: Int -> IO BuilderMachine +mkMachine' = flip mkMachine ChunkFormatRaw + chunksBuilderTests :: Spec chunksBuilderTests = describe "ChunksBuilder.InMemory" $ do @@ -67,7 +70,7 @@ bufferBoundaryTests = describe "Buffer Boundary Tests" $ do prop "should not emit chunks when data fits" $ forAll bufferFittingChunks $ \(bufferLength, chunkLengths) -> do - machine <- mkMachine bufferLength ChunkFormatRaw + machine <- mkMachine' bufferLength (_machine, emittedChunks) <- foldAppendChunks machine (map (RawBytes . flip BS.replicate 0x43) chunkLengths) annotate "no chunks should be emitted since all data fits" $ length emittedChunks `shouldBe` 0 @@ -75,7 +78,7 @@ bufferBoundaryTests = prop "should not emit chunk when data exactly fills buffer, only after" $ forAll bufferFillingChunks $ \(bufferLength, chunkLengths) -> do - machine <- mkMachine bufferLength ChunkFormatRaw + machine <- mkMachine' bufferLength (machine', emittedChunks) <- foldAppendChunks machine (map (RawBytes . flip BS.replicate 0x43) chunkLengths) annotate "after appending exact fit data should not emit" $ length emittedChunks `shouldBe` 0 @@ -92,7 +95,7 @@ bufferBoundaryTests = forM_ [0, 1, 4, 31, 128] $ \chunkDataLength -> do it ("should emit one chunk when buffer is empty and data is oversized (dataLen =" ++ show chunkDataLength) $ do let bufferLength = chunkDataLength + 3 - machine <- mkMachine bufferLength ChunkFormatRaw + machine <- mkMachine' bufferLength let chunkData = RawBytes (BS.replicate chunkDataLength 0x46) (_machine, chunks) <- interpretCommand machine (Append chunkData) case chunks of @@ -104,7 +107,7 @@ bufferBoundaryTests = prop "should emit oversized chunk and buffer when buffer is not empty" $ forAll bufferFittingAndOversizedChunks $ \(bufferLength, smallDataLength, largeDataLength) -> do - machine <- mkMachine bufferLength ChunkFormatRaw + machine <- mkMachine' bufferLength -- Add data that fits first let smallData = RawBytes (BS.replicate smallDataLength 0x47) (machine', chunks) <- interpretCommand machine (Append smallData) @@ -126,7 +129,7 @@ bufferBoundaryTests = it "should handle multiple boundary crossings correctly" $ do let bufferLength = 50 - machine <- mkMachine bufferLength ChunkFormatRaw + machine <- mkMachine' bufferLength -- Add data that will cause multiple boundary crossings -- 26 bytes total with prefix let dataChunk1Length = 22 @@ -181,7 +184,7 @@ bufferBoundaryTests = it ("should emit chunk immediately when buffer length is zero (dataLen =" ++ show dataChunkLength ++ ")") $ do let bufferLength = 0 let dataChunk = RawBytes (BS.replicate dataChunkLength 0x4B) - machine <- mkMachine bufferLength ChunkFormatRaw + machine <- mkMachine' bufferLength (_machine', chunks) <- interpretCommand machine (Append dataChunk) case chunks of [chunk] -> do @@ -194,7 +197,7 @@ finalizationTests = describe "Finalization Tests" $ do prop "should not emit chunk when finalizing empty buffer" $ \(Positive bufferLength) -> do - machine <- mkMachine bufferLength ChunkFormatRaw + machine <- mkMachine' bufferLength (digest, maybeChunk) <- interpretCommand machine Finalize isNothing maybeChunk `shouldBe` True -- Digest should still be computed (even if empty) @@ -202,7 +205,7 @@ finalizationTests = prop "should emit fitting chunks only on finalize" $ forAll bufferFittingChunks $ \(bufferLength, chunkLengths) -> do - machine <- mkMachine bufferLength ChunkFormatRaw + machine <- mkMachine' bufferLength (machine', chunks) <- foldAppendChunks machine (map (RawBytes . flip BS.replicate 0x49) chunkLengths) annotate "should not emit chunks before finalization" $ (length chunks) `shouldBe` 0