Skip to content
Closed
3 changes: 2 additions & 1 deletion scls-format/scls-format.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ library
Cardano.SCLS.Internal.Record.Hdr
Cardano.SCLS.Internal.Record.Internal.Class
Cardano.SCLS.Internal.Record.Manifest
Cardano.SCLS.Internal.Record.Metadata
Cardano.SCLS.Internal.Serializer.ChunksBuilder.InMemory
Cardano.SCLS.Internal.Serializer.External.Impl
Cardano.SCLS.Internal.Serializer.MemPack
Expand Down Expand Up @@ -102,6 +103,7 @@ test-suite scls-format-test
other-modules:
ChunksBuilderSpec
MultiNamespace
Roundtrip

-- other-extensions:
type: exitcode-stdio-1.0
Expand All @@ -117,7 +119,6 @@ test-suite scls-format-test
cuddle >=0.5,
filepath,
hspec,
hspec-contrib,
hspec-expectations,
merkle-tree-incremental,
primitive,
Expand Down
20 changes: 20 additions & 0 deletions scls-format/src/Cardano/SCLS/Internal/Reader.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

module Cardano.SCLS.Internal.Reader (
withNamespacedData,
withRecordData,
extractRootHash,
extractNamespaceList,
extractNamespaceHash,
Expand All @@ -30,6 +31,7 @@ import Data.Typeable
import System.IO
import System.IO qualified as IO

import Cardano.SCLS.Internal.Record.Internal.Class (IsFrameRecord)
import Streaming qualified as S
import Streaming.Prelude qualified as S

Expand Down Expand Up @@ -57,6 +59,24 @@ withNamespacedData filePath namespace f =
drain rest
go next_record

{- | Stream all records for a particular record type in the file.
No optimized access, just a full scan of the file.
-}
withRecordData :: forall t b r. (IsFrameRecord t b) => FilePath -> (S.Stream (S.Of b) IO () -> IO r) -> IO r
withRecordData filePath f =
IO.withBinaryFile filePath ReadMode \handle -> f (stream handle)
where
stream handle = do
flip fix headerOffset \go record -> do
next <- S.liftIO do
fetchNextFrame handle record
for_ next \next_record -> do
dataRecord <- S.liftIO do
fetchOffsetFrame handle next_record
for_ (decodeFrame dataRecord) \metadataRecord -> do
S.yield (frameViewContent metadataRecord)
go next_record

{- | Extract the root hash from the file at the given offset.

This function does not provide additional checks.
Expand Down
100 changes: 100 additions & 0 deletions scls-format/src/Cardano/SCLS/Internal/Record/Metadata.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE RecordWildCards #-}

{- |
Record to hold metadata information.
-}
module Cardano.SCLS.Internal.Record.Metadata (Metadata, mkMetadata) where

import Cardano.SCLS.Internal.Hash (Digest, digest)
import Cardano.SCLS.Internal.Record.Internal.Class
import Data.Binary (Binary (get), Word32, put)
import Data.Binary.Get (getByteString, getWord32be, getWord64be, getWord8)
import Data.Binary.Put (putByteString, putWord32be, putWord64be)
import Data.ByteString qualified as BS
import Data.Word (Word64)

-- TODO: reintroduce MetadataEntry ?
-- data MetadataEntry = MetadataEntry
-- { subject :: URI
-- , value :: BS.ByteString -- CBOR
-- }
-- deriving (Show)

data MetadataFooter = MetadataFooter
{ totalEntries :: Word64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to add strictness here, just to be sure

, entriesHash :: Digest
}
deriving (Show, Eq)

data Metadata = Metadata
{ entries :: BS.ByteString -- TODO: reintroduce [MetadataEntry] ?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd reintroduce 'MetadataEntry' here, because otherwise it will likely be not very readable by the other clients.

, footer :: MetadataFooter
}
deriving (Show, Eq)

instance IsFrameRecord 0x31 Metadata where
encodeRecordContents Metadata{..} = do
putWord32be (fromIntegral (BS.length entries) :: Word32)
putByteString entries
-- putWord64be 0 -- end of entries
putFooter footer
where
-- putEntry MetadataEntry{..} = do
-- let subjectBytes = serializeURIRef' subject
-- subjectLen = BS.length subjectBytes
-- valueLen = BS.length value

-- putWord64be (fromIntegral subjectLen)
-- putByteString subjectBytes
-- putWord64be (fromIntegral valueLen)
-- putByteString value

putFooter MetadataFooter{..} = do
putWord64be totalEntries
put entriesHash

decodeRecordContents = do
_ <- getWord8 -- type offset: maintain consistency with Chunk pattern
entriesSize <- getWord32be
entries <- getByteString (fromIntegral entriesSize)
-- _ <- getWord64be -- end of entries marker
footer <- getFooter
pure Metadata{..}
where
-- getEntry = do
-- len <- getWord64be
-- if len == 0
-- then return Nothing
-- else do
-- subjectLen <- getWord64be
-- subjectBytes <- getByteString (fromIntegral subjectLen)
-- let subject = case parseURI strictURIParserOptions subjectBytes of
-- Left err -> error $ "Failed to parse URI: " ++ show err
-- Right uri -> uri
-- valueLen <- getWord64be
-- value <- getByteString (fromIntegral valueLen)
-- pure (Just MetadataEntry{..})

getFooter = do
totalEntries <- getWord64be
entriesHash <- get
pure MetadataFooter{..}

mkMetadata :: BS.ByteString -> Word64 -> Metadata
mkMetadata entries totalEntries =
let
-- entries = [MetadataEntry subject value | (subject, value) <- metadataEntries]
-- totalEntries = fromIntegral $ length entries
entriesHash = digest entries
footer = MetadataFooter{..}
in
Metadata{..}
where

-- digestEntries entries =
-- digest $ mconcat $ map entryDigestInput entries
-- where
-- entryDigestInput MetadataEntry{..} =
-- BS.singleton 0x01 <> serializeURIRef' subject <> value
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ module Cardano.SCLS.Internal.Serializer.External.Impl (
) where

import Cardano.SCLS.Internal.Record.Hdr
import Cardano.SCLS.Internal.Record.Metadata
import Cardano.SCLS.Internal.Serializer.MemPack (Entry (..), RawBytes (..))
import Cardano.SCLS.Internal.Serializer.Reference.Dump
import Cardano.Types.Network
Expand Down Expand Up @@ -54,16 +55,20 @@ serialize ::
SlotNo ->
-- | Input stream of entries to serialize, can be unsorted
S.Stream (S.Of (InputChunk a)) IO () ->
{- | Input stream of metadata to serialize
TODO: this currently assumes data is sorted
-}
S.Stream (S.Of Metadata) IO () ->
IO ()
serialize resultFilePath network slotNo inputStream = do
serialize resultFilePath network slotNo inputStream metadataStream = do
let !hdr = mkHdr network slotNo
withTempDirectory (takeDirectory resultFilePath) "tmp.XXXXXX" \tmpDir -> do
prepareExternalSortNamespaced tmpDir inputStream
handles <- newIORef []
onException
do
withBinaryFile resultFilePath WriteMode \handle -> do
dumpToHandle handle hdr $
dumpToHandle handle hdr metadataStream $
sourceNs handles tmpDir
do traverse hClose =<< readIORef handles

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import Cardano.SCLS.Internal.Hash (Digest (..))
import Cardano.SCLS.Internal.Record.Chunk
import Cardano.SCLS.Internal.Record.Hdr
import Cardano.SCLS.Internal.Record.Manifest
import Cardano.SCLS.Internal.Record.Metadata
import Cardano.SCLS.Internal.Serializer.ChunksBuilder.InMemory
import Crypto.Hash.MerkleTree.Incremental qualified as MT

Expand Down Expand Up @@ -59,8 +60,8 @@ newtype DataStream a = DataStream {runDataStream :: Stream (Of (InputChunk a)) I
-- This is reference implementation and it does not yet care about
-- proper working with the hardware, i.e. flushing and calling fsync
-- at the right moments.
dumpToHandle :: (MemPack a, Typeable a) => Handle -> Hdr -> DataStream a -> IO ()
dumpToHandle handle hdr orderedStream = do
dumpToHandle :: (MemPack a, Typeable a) => Handle -> Hdr -> Stream (Of Metadata) IO () -> DataStream a -> IO ()
dumpToHandle handle hdr metadataStream orderedStream = do
_ <- hWriteFrame handle hdr
manifestData :: ManifestInfo <-
runDataStream orderedStream -- output our sorted stream
Expand Down Expand Up @@ -88,6 +89,9 @@ dumpToHandle handle hdr orderedStream = do
in Map.insert namespace ni rest
mempty
do \x -> ManifestInfo x

S.mapM_ (hWriteFrame handle) metadataStream

manifest <- mkManifest manifestData
_ <- hWriteFrame handle manifest
pure ()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ module Cardano.SCLS.Internal.Serializer.Reference.Impl (
) where

import Cardano.SCLS.Internal.Record.Hdr
import Cardano.SCLS.Internal.Record.Metadata
import Cardano.SCLS.Internal.Serializer.Reference.Dump
import Cardano.Types.Network
import Cardano.Types.SlotNo
Expand Down Expand Up @@ -36,12 +37,16 @@ serialize ::
SlotNo ->
-- | Input stream of entries to serialize, can be unsorted
(S.Stream (S.Of (InputChunk a)) IO ()) ->
{- | Input stream of metadata to serialize
TODO: this currently assumes data is sorted
-}
(S.Stream (S.Of Metadata) IO ()) ->
IO ()
serialize resultFilePath network slotNo stream = do
serialize resultFilePath network slotNo stream metadataStream = do
withBinaryFile resultFilePath WriteMode \handle -> do
let hdr = mkHdr network slotNo
!orderedStream <- mkVectors stream
dumpToHandle handle hdr do
dumpToHandle handle hdr metadataStream do
DataStream (S.each [n S.:> S.each v | (n, v) <- Map.toList orderedStream])
where
mkVectors :: (Ord a) => S.Stream (S.Of (InputChunk a)) IO () -> IO (Map Text (V.Vector a))
Expand Down
92 changes: 3 additions & 89 deletions scls-format/test/Main.hs
Original file line number Diff line number Diff line change
@@ -1,100 +1,14 @@
module Main (main) where

import Cardano.SCLS.CDDL (namespaces)
import Cardano.SCLS.Internal.Hash (Digest (..))
import Cardano.SCLS.Internal.Reader (extractRootHash, withNamespacedData)
import Cardano.SCLS.Internal.Serializer.External.Impl qualified as External (serialize)
import Cardano.SCLS.Internal.Serializer.MemPack
import Cardano.SCLS.Internal.Serializer.Reference.Impl (InputChunk)
import Cardano.SCLS.Internal.Serializer.Reference.Impl qualified as Reference (serialize)
import Cardano.Types.Network (NetworkId (..))
import Cardano.Types.SlotNo (SlotNo (..))
import ChunksBuilderSpec (chunksBuilderTests)
import Codec.CBOR.Cuddle.CBOR.Gen (generateCBORTerm')
import Codec.CBOR.Cuddle.CDDL (CDDL, Name (..))
import Codec.CBOR.Cuddle.CDDL.Resolve (
asMap,
buildMonoCTree,
buildRefCTree,
buildResolvedCTree,
)
import Codec.CBOR.Cuddle.Huddle
import Codec.CBOR.Term (encodeTerm)
import Codec.CBOR.Write (toStrictByteString)
import Control.Monad (replicateM)
import Crypto.Hash.MerkleTree.Incremental qualified as MT
import Data.Function ((&))
import Data.List (sort)
import Data.Map.Strict qualified as Map
import Data.Text (Text)
import Data.Text qualified as T
import Streaming.Prelude qualified as S
import System.FilePath ((</>))
import System.IO.Temp (withSystemTempDirectory)
import System.Random.Stateful (applyAtomicGen, globalStdGen)
import Test.HUnit
import Test.Hspec
import Test.Hspec.Contrib.HUnit

import MultiNamespace qualified (tests)
import Roundtrip qualified (tests)

type SerializeF = FilePath -> NetworkId -> SlotNo -> S.Stream (S.Of (InputChunk RawBytes)) IO () -> IO ()
import Test.Hspec

main :: IO ()
main = do
hspec $ do
fromHUnitTest tests
Roundtrip.tests
chunksBuilderTests
MultiNamespace.tests
where
tests =
TestList
[ roundTriptests
]
roundTriptests =
TestLabel "Roundtrip tests" $
TestList
[ mkRountripTestsFor "Reference" Reference.serialize
, mkRountripTestsFor "External" External.serialize
]
mkRountripTestsFor :: String -> SerializeF -> Test
mkRountripTestsFor groupName serialize =
TestLabel groupName $
TestList
[ TestLabel n $ TestCase $ roundtrip (T.pack n) (toCDDL huddle) serialize
| (n, huddle) <- Map.toList namespaces
]
roundtrip :: Text -> CDDL -> SerializeF -> Assertion
roundtrip namespace cddl serialize = do
case buildMonoCTree =<< buildResolvedCTree (buildRefCTree $ asMap cddl) of
Left err -> assertFailure $ "Failed to build CTree: " ++ show err
Right mt -> withSystemTempDirectory "scls-format-test-XXXXXX" $ \fn -> do
data_ <-
replicateM 1024 $
applyAtomicGen (generateCBORTerm' mt (Name (T.pack "record_entry") mempty)) globalStdGen
let encoded_data = [toStrictByteString (encodeTerm term) | term <- data_]
let fileName = (fn </> "data.scls")
serialize
fileName
Mainnet
(SlotNo 1)
(S.each [(namespace S.:> (S.each encoded_data & S.map RawBytes))])
withNamespacedData
fileName
namespace
( \stream -> do
decoded_data <- S.toList_ stream
assertEqual
"Stream roundtrip successful"
[b | RawBytes b <- decoded_data]
(sort encoded_data)
)
-- Check roundtrip of root hash
file_digest <- extractRootHash fileName
expected_digest <-
S.each (sort encoded_data)
& S.fold_ MT.add (MT.empty undefined) (Digest . MT.merkleRootHash . MT.finalize)
assertEqual
"Root hash roundtrip successful"
file_digest
(Digest $ MT.merkleRootHash $ MT.finalize $ MT.add (MT.empty undefined) expected_digest)
4 changes: 3 additions & 1 deletion scls-format/test/MultiNamespace.hs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ module MultiNamespace (

import Cardano.SCLS.Internal.Hash (Digest (..))
import Cardano.SCLS.Internal.Reader (extractNamespaceHash, extractNamespaceList, extractRootHash, withNamespacedData)
import Cardano.SCLS.Internal.Record.Metadata (Metadata)
import Cardano.SCLS.Internal.Serializer.External.Impl qualified as External (serialize)
import Cardano.SCLS.Internal.Serializer.MemPack
import Cardano.SCLS.Internal.Serializer.Reference.Impl (InputChunk)
Expand Down Expand Up @@ -58,7 +59,7 @@ mkTestsFor serialize = do
, ("ns1", [BS8.pack (show (i :: Int)) | i <- [1 .. 2048]])
]

type SerializeF = FilePath -> NetworkId -> SlotNo -> S.Stream (S.Of (InputChunk RawBytes)) IO () -> IO ()
type SerializeF = FilePath -> NetworkId -> SlotNo -> S.Stream (S.Of (InputChunk RawBytes)) IO () -> S.Stream (S.Of Metadata) IO () -> IO ()

roundtrip :: SerializeF -> [(Text, [ByteString])] -> IO ()
roundtrip serialize input = do
Expand All @@ -70,6 +71,7 @@ roundtrip serialize input = do
Mainnet
(SlotNo 1)
mkStream
(S.each [])
nsps <- extractNamespaceList fileName
annotate "Namespaces are as expected" do
(Map.keys nsData) `shouldBe` (sort nsps)
Expand Down
Loading