Skip to content

Commit 9522c0a

Browse files
authored
Merge pull request #680 from IntersectMBO/wenkokke/full
feat(full): add full api
2 parents 5f976f5 + 1ac7f17 commit 9522c0a

File tree

33 files changed

+3145
-1652
lines changed

33 files changed

+3145
-1652
lines changed

README.md

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,7 @@ It has support for:
2424
- Range lookups, which efficiently retrieve the values for all keys in a
2525
given range.
2626

27-
- Monoidal upserts (or "mupserts") which combine the stored and new
28-
values.
27+
- Monoidal upserts which combine the stored and new values.
2928

3029
- BLOB storage which assocates a large auxiliary BLOB with a key.
3130

@@ -43,9 +42,8 @@ This package exports two modules:
4342
This module exports a simplified API which picks sensible defaults for
4443
a number of configuration parameters.
4544

46-
It does not support mupserts or BLOBs, due to their unintuitive
47-
interaction, see [Mupserts and
48-
BLOBs](#mupsertsandblobs "#mupsertsandblobs").
45+
It does not support upserts or BLOBs, due to their unintuitive
46+
interaction, see [Upsert and BLOB](#upsertandblob "#upsertandblob").
4947

5048
If you are looking at this package for the first time, it is strongly
5149
recommended that you start by reading this module.
@@ -54,13 +52,12 @@ This package exports two modules:
5452

5553
This module exports the full API.
5654

57-
### Mupserts and BLOBs <span id="mupsertsandblobs" class="anchor"></span>
55+
### Upsert and BLOB <span id="upsertandblob" class="anchor"></span>
5856

59-
The interaction between mupserts and BLOBs is unintuitive. A mupsert
60-
updates the value associated with the key by combining the old and new
61-
value with a user-specified function. However, this does not apply to
62-
any BLOB value associated with the key, which is simply overwritten by
63-
the new BLOB value.
57+
The interaction between upserts and BLOBs is unintuitive. A upsert
58+
updates the value associated with the key by combining the new and old
59+
values with a user-specified function. However, any BLOB associated with
60+
the key is simply deleted.
6461

6562
### Portability <span id="portability" class="anchor"></span>
6663

@@ -114,7 +111,8 @@ constants:
114111
describe the complexity of an operation that involves multiple tables,
115112
it refers to the sum of all table entries.
116113

117-
- The variable *t* refers to the number of open tables in the session.
114+
- The variable *o* refers to the number of open tables and cursors in
115+
the session.
118116

119117
- The variable *s* refers to the number of snapshots in the session.
120118

@@ -157,7 +155,7 @@ Otherwise, the merge policy is listed as N/A.
157155
<td></td>
158156
<td>Close</td>
159157
<td><code>MergePolicyLazyLevelling</code></td>
160-
<td><span class="math inline">$O(t \: T \: \log_T
158+
<td><span class="math inline">$O(o \: T \: \log_T
161159
\frac{n}{B})$</span></td>
162160
</tr>
163161
<tr>

bench/macro/lsm-tree-bench-wp8.hs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -415,7 +415,7 @@ doSetup' gopts opts = do
415415
let name = LSM.toSnapshotName "bench"
416416

417417
LSM.withSession (mkTracer gopts) hasFS hasBlockIO (FS.mkFsPath []) $ \session -> do
418-
tbl <- LSM.new @IO @K @V @B session (mkTableConfigSetup gopts opts benchTableConfig)
418+
tbl <- LSM.newTableWith @IO @K @V @B (mkTableConfigSetup gopts opts benchTableConfig) session
419419

420420
forM_ (groupsOfN 256 [ 0 .. initialSize gopts ]) $ \batch -> do
421421
-- TODO: this procedure simply inserts all the keys into initial lsm tree
@@ -426,7 +426,7 @@ doSetup' gopts opts = do
426426
| i <- NE.toList batch
427427
]
428428

429-
LSM.createSnapshot label name tbl
429+
LSM.saveSnapshot name label tbl
430430

431431
-------------------------------------------------------------------------------
432432
-- dry-run
@@ -583,8 +583,8 @@ doRun gopts opts = do
583583
-- reference version starts with empty (as it's not practical or
584584
-- necessary for testing to load the whole snapshot).
585585
tbl <- if check opts
586-
then LSM.new @IO @K @V @B session (mkTableConfigRun gopts benchTableConfig)
587-
else LSM.openSnapshot @IO @K @V @B session (mkOverrideDiskCachePolicy gopts) label name
586+
then LSM.newTableWith @IO @K @V @B (mkTableConfigRun gopts benchTableConfig) session
587+
else LSM.openTableFromSnapshotWith @IO @K @V @B (mkOverrideDiskCachePolicy gopts) session name label
588588

589589
-- In checking mode, compare each output against a pure reference.
590590
checkvar <- newIORef $ pureReference
@@ -765,7 +765,7 @@ pipelinedIteration h output !initialSize !batchSize
765765

766766
-- At this point, after syncing, our peer is guaranteed to no longer be
767767
-- using tbl_n. They used it to generate tbl_n+1 (which they gave us).
768-
LSM.close tbl_n
768+
LSM.closeTable tbl_n
769769
output b $! applyUpdates delta (V.zip ls lrs)
770770
pure tbl_n1
771771

@@ -868,8 +868,8 @@ updateToLookupResult :: LSM.Update v b -> LSM.LookupResult v ()
868868
updateToLookupResult (LSM.Insert v Nothing) = LSM.Found v
869869
updateToLookupResult (LSM.Insert v (Just _)) = LSM.FoundWithBlob v ()
870870
updateToLookupResult LSM.Delete = LSM.NotFound
871-
updateToLookupResult (LSM.Mupsert _) = error $
872-
"Unexpected mupsert encountered"
871+
updateToLookupResult (LSM.Upsert _) = error $
872+
"Unexpected upsert encountered"
873873

874874
-- | Return the adjacent batches where there is overlap between one batch's
875875
-- inserts and the next batch's lookups. Testing the pipelined version needs

bench/micro/Bench/Database/LSMTree.hs

Lines changed: 48 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,12 @@ import qualified Data.Vector as V
1414
import Data.Void
1515
import Data.Word
1616
import Database.LSMTree hiding (withTable)
17-
import qualified Database.LSMTree.Common as Common
1817
import Database.LSMTree.Extras
1918
import Database.LSMTree.Extras.Orphans ()
2019
import Database.LSMTree.Internal.Assertions (fromIntegralChecked)
2120
import qualified Database.LSMTree.Internal.RawBytes as RB
22-
import Database.LSMTree.Internal.Serialise.Class
2321
import GHC.Generics (Generic)
24-
import Prelude hiding (getContents)
22+
import Prelude hiding (getContents, take)
2523
import System.Directory (removeDirectoryRecursive)
2624
import qualified System.FS.API as FS
2725
import qualified System.FS.BlockIO.API as FS
@@ -35,9 +33,9 @@ benchmarks = bgroup "Bench.Database.LSMTree" [
3533
benchLargeValueVsSmallValueBlob
3634
, benchCursorScanVsRangeLookupScan
3735
, benchInsertBatches
38-
, benchInsertsVsMupserts
39-
, benchLookupsInsertsVsMupserts
40-
, benchLookupInsertsVsLookupMupserts
36+
, benchInsertsVsUpserts
37+
, benchLookupsInsertsVsUpserts
38+
, benchLookupInsertsVsLookupUpserts
4139
]
4240

4341
{-------------------------------------------------------------------------------
@@ -75,17 +73,13 @@ newtype V3 = V3 Word64
7573

7674
type B3 = Void
7775

78-
-- Simple addition
79-
resolve :: V3 -> V3 -> V3
80-
resolve = (+)
81-
8276
instance ResolveValue V3 where
83-
resolveValue = resolveDeserialised resolve
77+
resolve = (+)
8478

85-
benchConfig :: Common.TableConfig
86-
benchConfig = Common.defaultTableConfig {
87-
Common.confWriteBufferAlloc = Common.AllocNumEntries (Common.NumEntries 20000)
88-
, Common.confFencePointerIndex = Common.CompactIndex
79+
benchConfig :: TableConfig
80+
benchConfig = defaultTableConfig
81+
{ confWriteBufferAlloc = AllocNumEntries (NumEntries 20000)
82+
, confFencePointerIndex = CompactIndex
8983
}
9084

9185
{-------------------------------------------------------------------------------
@@ -142,12 +136,12 @@ benchLargeValueVsSmallValueBlob =
142136
initialise inss = do
143137
(tmpDir, hfs, hbio) <- mkFiles
144138
s <- openSession nullTracer hfs hbio (FS.mkFsPath [])
145-
t <- new s benchConfig
139+
t <- newTableWith benchConfig s
146140
V.mapM_ (inserts t) inss
147141
pure (tmpDir, hfs, hbio, s, t)
148142

149143
cleanup (tmpDir, hfs, hbio, s, t) = do
150-
close t
144+
closeTable t
151145
closeSession s
152146
cleanupFiles (tmpDir, hfs, hbio)
153147

@@ -172,11 +166,11 @@ benchCursorScanVsRangeLookupScan =
172166
bgroup "cursor-scan-vs-range-lookup-scan" [
173167
bench "cursor-scan-full" $ whnfIO $ do
174168
withCursor t $ \c -> do
175-
readCursor initialSize c
169+
take initialSize c
176170
, bench "cursor-scan-chunked" $ whnfIO $ do
177171
withCursor t $ \c -> do
178172
forM_ ([1 .. numChunks] :: [Int]) $ \_ -> do
179-
readCursor readSize c
173+
take readSize c
180174
, bench "range-scan-full" $ whnfIO $ do
181175
rangeLookup t (FromToIncluding (K minBound) (K maxBound))
182176
, bench "range-scan-chunked" $ whnfIO $ do
@@ -227,12 +221,12 @@ benchCursorScanVsRangeLookupScan =
227221
initialise inss = do
228222
(tmpDir, hfs, hbio) <- mkFiles
229223
s <- openSession nullTracer hfs hbio (FS.mkFsPath [])
230-
t <- new s benchConfig
224+
t <- newTableWith benchConfig s
231225
V.mapM_ (inserts t) inss
232226
pure (tmpDir, hfs, hbio, s, t)
233227

234228
cleanup (tmpDir, hfs, hbio, s, t) = do
235-
close t
229+
closeTable t
236230
closeSession s
237231
cleanupFiles (tmpDir, hfs, hbio)
238232

@@ -251,9 +245,9 @@ benchInsertBatches =
251245
!initialSize = 100_000
252246
!batchSize = 256
253247

254-
_benchConfig :: Common.TableConfig
248+
_benchConfig :: TableConfig
255249
_benchConfig = benchConfig {
256-
Common.confWriteBufferAlloc = Common.AllocNumEntries (Common.NumEntries 1000)
250+
confWriteBufferAlloc = AllocNumEntries (NumEntries 1000)
257251
}
258252

259253
randomInserts :: Int -> V.Vector (K, V2, Maybe Void)
@@ -272,31 +266,31 @@ benchInsertBatches =
272266
initialise = do
273267
(tmpDir, hfs, hbio) <- mkFiles
274268
s <- openSession nullTracer hfs hbio (FS.mkFsPath [])
275-
t <- new s _benchConfig
269+
t <- newTableWith _benchConfig s
276270
pure (tmpDir, hfs, hbio, s, t)
277271

278272
cleanup (tmpDir, hfs, hbio, s, t) = do
279-
close t
273+
closeTable t
280274
closeSession s
281275
cleanupFiles (tmpDir, hfs, hbio)
282276

283277
{-------------------------------------------------------------------------------
284-
Inserts vs. Mupserts
278+
Inserts vs. Upserts
285279
-------------------------------------------------------------------------------}
286280

287-
-- | Compare inserts and mupserts. The logical contents of the resulting
281+
-- | Compare inserts and upserts. The logical contents of the resulting
288282
-- database are the same.
289-
benchInsertsVsMupserts :: Benchmark
290-
benchInsertsVsMupserts =
283+
benchInsertsVsUpserts :: Benchmark
284+
benchInsertsVsUpserts =
291285
env (pure $ snd $ randomEntriesGrouped 800_000 250) $ \ess ->
292286
env (pure $ V.map mkInserts ess) $ \inss ->
293-
bgroup "inserts-vs-mupserts" [
287+
bgroup "inserts-vs-upserts" [
294288
bench "inserts" $
295289
withEmptyTable $ \(_, _, _, _, t) ->
296290
V.mapM_ (inserts t) inss
297-
, bench "mupserts" $
291+
, bench "upserts" $
298292
withEmptyTable $ \(_, _, _, _, t) ->
299-
V.mapM_ (mupserts t) ess
293+
V.mapM_ (upserts t) ess
300294
]
301295
where
302296
withEmptyTable =
@@ -311,18 +305,18 @@ benchInsertsVsMupserts =
311305
)
312306

313307
{-------------------------------------------------------------------------------
314-
Lookups plus Inserts vs. Mupserts
308+
Lookups plus Inserts vs. Upserts
315309
-------------------------------------------------------------------------------}
316310

317-
-- | Compare lookups+inserts to mupserts. The former costs 2 LSMT operations,
318-
-- while Mupserts only cost 1 LSMT operation. The number of operations do not
311+
-- | Compare lookups+inserts to upserts. The former costs 2 LSMT operations,
312+
-- while Upserts only cost 1 LSMT operation. The number of operations do not
319313
-- directly translate to the number of I\/O operations, but one can assume that
320-
-- lookup+insert is roughly twice as costly as mupsert.
321-
benchLookupsInsertsVsMupserts :: Benchmark
322-
benchLookupsInsertsVsMupserts =
314+
-- lookup+insert is roughly twice as costly as upsert.
315+
benchLookupsInsertsVsUpserts :: Benchmark
316+
benchLookupsInsertsVsUpserts =
323317
env (pure $ snd $ randomEntriesGrouped 800_000 250) $ \ess ->
324318
env (pure $ V.map mkInserts ess) $ \inss ->
325-
bgroup "lookups-inserts-vs-mupserts" [
319+
bgroup "lookups-inserts-vs-upserts" [
326320
bench "lookups-inserts" $
327321
withTable inss $ \(_, _, _, _, t) ->
328322
-- Insert the same keys again, but we sum the existing values in
@@ -333,12 +327,12 @@ benchLookupsInsertsVsMupserts =
333327
lrs <- lookups t (V.map fst es)
334328
let ins' = V.zipWith f es lrs
335329
inserts t ins'
336-
, bench "mupserts" $
330+
, bench "upserts" $
337331
withTable inss $ \(_, _, _, _, t) ->
338332
-- Insert the same keys again, but we sum the existing values in
339333
-- the table with the values we are going to insert: submit
340-
-- mupserts with the insert values.
341-
V.forM_ ess $ \es -> mupserts t es
334+
-- upserts with the insert values.
335+
V.forM_ ess $ \es -> upserts t es
342336
]
343337
where
344338
f (k, v) = \case
@@ -359,20 +353,20 @@ benchLookupsInsertsVsMupserts =
359353
)
360354

361355
{-------------------------------------------------------------------------------
362-
Lookup Inserts vs. Lookup Mupserts
356+
Lookup Inserts vs. Lookup Upserts
363357
-------------------------------------------------------------------------------}
364358

365-
-- | Compare lookups after inserts against lookups after mupserts.
366-
benchLookupInsertsVsLookupMupserts :: Benchmark
367-
benchLookupInsertsVsLookupMupserts =
359+
-- | Compare lookups after inserts against lookups after upserts.
360+
benchLookupInsertsVsLookupUpserts :: Benchmark
361+
benchLookupInsertsVsLookupUpserts =
368362
env (pure $ snd $ randomEntriesGrouped 80_000 250) $ \ess ->
369363
env (pure $ V.map mkInserts ess) $ \inss ->
370-
bgroup "lookup-inserts-vs-lookup-mupserts" [
364+
bgroup "lookup-inserts-vs-lookup-upserts" [
371365
bench "lookup-inserts" $
372366
withInsertTable inss $ \(_, _, _, _, t) -> do
373367
V.forM_ ess $ \es -> lookups t (V.map fst es)
374-
, bench "lookup-mupserts" $
375-
withMupsertTable ess $ \(_, _, _, _, t) -> do
368+
, bench "lookup-upserts" $
369+
withUpsertTable ess $ \(_, _, _, _, t) -> do
376370
V.forM_ ess $ \es -> lookups t (V.map fst es)
377371
]
378372
where
@@ -393,14 +387,14 @@ benchLookupInsertsVsLookupMupserts =
393387
cleanupFiles (tmpDir, hfs, hbio)
394388
)
395389

396-
withMupsertTable ess =
390+
withUpsertTable ess =
397391
perRunEnvWithCleanup
398-
-- Mupsert the same key 10 times. The results in a logical database
392+
-- Upsert the same key 10 times. The results in a logical database
399393
-- containing the original keys with the original value *10.
400394
(do (tmpDir, hfs, hbio) <- mkFiles
401395
(s, t) <- mkTable hfs hbio benchConfig
402396
V.forM_ [1..10] $ \(_::Int) ->
403-
V.mapM_ (mupserts t) ess
397+
V.mapM_ (upserts t) ess
404398
pure (tmpDir, hfs, hbio, s, t)
405399
)
406400
(\(tmpDir, hfs, hbio, s, t) -> do
@@ -458,7 +452,7 @@ mkTable ::
458452
)
459453
mkTable hfs hbio conf = do
460454
sesh <- openSession nullTracer hfs hbio (FS.mkFsPath [])
461-
t <- new sesh conf
455+
t <- newTableWith conf sesh
462456
pure (sesh, t)
463457

464458
cleanupTable ::
@@ -467,5 +461,5 @@ cleanupTable ::
467461
)
468462
-> IO ()
469463
cleanupTable (s, t) = do
470-
close t
464+
closeTable t
471465
closeSession s

doc/format-page.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Logically, a page consists of a value of type
2525
type Page = [(Key, Operation, Maybe BlobRef)]
2626
```
2727
where the operation can be insert (with a value), delete (with no value) or
28-
mupsert (with a value). The entries are sorted by key.
28+
upsert (with a value). The entries are sorted by key.
2929

3030
The file is structured in a page-oriented way to support efficient I/O using
3131
random page-sized reads. By page-oriented we mean that the information is

0 commit comments

Comments
 (0)