Skip to content

Commit 6434e1b

Browse files
authored
Merge pull request #647 from IntersectMBO/mheinzel/union-improvements
Small improvements related to table unions
2 parents 5e72f1f + 873e59d commit 6434e1b

File tree

8 files changed

+81
-79
lines changed

8 files changed

+81
-79
lines changed

bench/macro/lsm-tree-bench-lookups.hs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,8 @@ benchLookupsIO !hbio !arenaManager !resolve !wb !wbblobs !rs !bs !ics !hs =
486486
| n <= 0 = pure ()
487487
| otherwise = do
488488
let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize
489-
!_ <- lookupsIO hbio arenaManager resolve wb wbblobs rs bs ics hs ks
489+
!_ <- lookupsIOWithWriteBuffer
490+
hbio arenaManager resolve wb wbblobs rs bs ics hs ks
490491
go keyRng' (n-benchmarkGenBatchSize)
491492

492493
{-------------------------------------------------------------------------------

bench/micro/Bench/Database/LSMTree/Internal/Lookup.hs

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ import Database.LSMTree.Extras.UTxO
2323
import Database.LSMTree.Internal.Entry (Entry (..), NumEntries (..))
2424
import Database.LSMTree.Internal.Index as Index
2525
import Database.LSMTree.Internal.Lookup (bloomQueries, indexSearches,
26-
intraPageLookups, lookupsIO, prepLookups)
26+
intraPageLookupsWithWriteBuffer, lookupsIOWithWriteBuffer,
27+
prepLookups)
2728
import Database.LSMTree.Internal.Page (getNumPages)
2829
import Database.LSMTree.Internal.Paths (RunFsPaths (..))
2930
import Database.LSMTree.Internal.Run (Run)
@@ -132,17 +133,18 @@ benchLookups conf@Config{name} =
132133
)
133134
(\(_, _, _, arena) -> closeArena arenaManager arena)
134135
(\ ~(rkixs, ioops, ioress, _) -> do
135-
!_ <- intraPageLookups resolveV WB.empty wbblobs
136-
rs ks rkixs ioops ioress
136+
!_ <- intraPageLookupsWithWriteBuffer
137+
resolveV WB.empty wbblobs rs ks rkixs ioops ioress
137138
pure ())
138139
-- The whole shebang: lookup preparation, doing the IO, and then
139140
-- performing intra-page-lookups. Again, we evaluate the result to
140-
-- WHNF because it is the same result that intraPageLookups produces
141-
-- (see above).
141+
-- WHNF because it is the same result that
142+
-- intraPageLookupsWithWriteBuffer produces (see above).
142143
, bench "Lookups in IO" $
143-
whnfAppIO (\ks' -> lookupsIO hasBlockIO arenaManager resolveV
144-
WB.empty wbblobs
145-
rs blooms indexes kopsFiles ks') ks
144+
whnfAppIO (\ks' -> lookupsIOWithWriteBuffer
145+
hasBlockIO arenaManager resolveV
146+
WB.empty wbblobs
147+
rs blooms indexes kopsFiles ks') ks
146148
]
147149
-- TODO: consider adding benchmarks that also use the write buffer
148150
-- (then we can't just use 'WB.empty', but must take it from the env)

src/Database/LSMTree/Internal.hs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ import Database.LSMTree.Internal.Entry (Entry, NumEntries (..))
127127
import Database.LSMTree.Internal.IncomingRun (IncomingRun (..))
128128
import Database.LSMTree.Internal.Lookup (ResolveSerialisedValue,
129129
TableCorruptedError (..), lookupsIO,
130-
lookupsIOWithoutWriteBuffer)
130+
lookupsIOWithWriteBuffer)
131131
import Database.LSMTree.Internal.MergeSchedule
132132
import Database.LSMTree.Internal.MergingRun (TableTooLargeError (..))
133133
import qualified Database.LSMTree.Internal.MergingRun as MR
@@ -838,7 +838,7 @@ lookups resolve ks t = do
838838
where
839839
regularLevelLookups tEnv tableContent = do
840840
let !cache = tableCache tableContent
841-
lookupsIO
841+
lookupsIOWithWriteBuffer
842842
(tableHasBlockIO tEnv)
843843
(tableArenaManager t)
844844
resolve
@@ -851,7 +851,7 @@ lookups resolve ks t = do
851851
ks
852852

853853
treeBatchLookups tEnv runs =
854-
lookupsIOWithoutWriteBuffer
854+
lookupsIO
855855
(tableHasBlockIO tEnv)
856856
(tableArenaManager t)
857857
resolve

src/Database/LSMTree/Internal/Lookup.hs

Lines changed: 20 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
module Database.LSMTree.Internal.Lookup (
44
ResolveSerialisedValue
55
, LookupAcc
6+
, lookupsIOWithWriteBuffer
67
, lookupsIO
7-
, lookupsIOWithoutWriteBuffer
88
-- * Errors
99
, TableCorruptedError (..)
1010
-- * Internal: exposed for tests and benchmarks
@@ -14,7 +14,8 @@ module Database.LSMTree.Internal.Lookup (
1414
, prepLookups
1515
, bloomQueries
1616
, indexSearches
17-
, intraPageLookups
17+
, intraPageLookupsWithWriteBuffer
18+
, intraPageLookupsOn
1819
) where
1920

2021
import Data.Arena (Arena, ArenaManager, allocateFromArena, withArena)
@@ -114,7 +115,7 @@ type ResolveSerialisedValue = SerialisedValue -> SerialisedValue -> SerialisedVa
114115

115116
type LookupAcc m h = V.Vector (Maybe (Entry SerialisedValue (WeakBlobRef m h)))
116117

117-
{-# SPECIALIZE lookupsIO ::
118+
{-# SPECIALIZE lookupsIOWithWriteBuffer ::
118119
HasBlockIO IO h
119120
-> ArenaManager RealWorld
120121
-> ResolveSerialisedValue
@@ -127,13 +128,8 @@ type LookupAcc m h = V.Vector (Maybe (Entry SerialisedValue (WeakBlobRef m h)))
127128
-> V.Vector SerialisedKey
128129
-> IO (LookupAcc IO h)
129130
#-}
130-
-- | Batched lookups in I\/O.
131-
--
132-
-- See Note [Batched lookups, buffer strategy and restrictions]
133-
--
134-
-- PRECONDITION: the vectors of bloom filters, indexes and file handles
135-
-- should pointwise match with the vectors of runs.
136-
lookupsIO ::
131+
-- | Like 'lookupsIO', but takes a write buffer into account.
132+
lookupsIOWithWriteBuffer ::
137133
forall m h. (MonadThrow m, MonadST m)
138134
=> HasBlockIO m h
139135
-> ArenaManager (PrimState m)
@@ -146,12 +142,12 @@ lookupsIO ::
146142
-> V.Vector (Handle h) -- ^ The file handles to the key\/value files inside @rs@
147143
-> V.Vector SerialisedKey
148144
-> m (LookupAcc m h)
149-
lookupsIO !hbio !mgr !resolveV !wb !wbblobs !rs !blooms !indexes !kopsFiles !ks =
145+
lookupsIOWithWriteBuffer !hbio !mgr !resolveV !wb !wbblobs !rs !blooms !indexes !kopsFiles !ks =
150146
assert precondition $
151147
withArena mgr $ \arena -> do
152148
(rkixs, ioops) <- ST.stToIO $ prepLookups arena blooms indexes kopsFiles ks
153149
ioress <- submitIO hbio ioops
154-
intraPageLookups resolveV wb wbblobs rs ks rkixs ioops ioress
150+
intraPageLookupsWithWriteBuffer resolveV wb wbblobs rs ks rkixs ioops ioress
155151
where
156152
-- we check only that the lengths match, because checking the contents is
157153
-- too expensive.
@@ -161,7 +157,7 @@ lookupsIO !hbio !mgr !resolveV !wb !wbblobs !rs !blooms !indexes !kopsFiles !ks
161157
assert (V.length rs == V.length kopsFiles) $
162158
True
163159

164-
{-# SPECIALIZE lookupsIOWithoutWriteBuffer ::
160+
{-# SPECIALIZE lookupsIO ::
165161
HasBlockIO IO h
166162
-> ArenaManager RealWorld
167163
-> ResolveSerialisedValue
@@ -174,11 +170,9 @@ lookupsIO !hbio !mgr !resolveV !wb !wbblobs !rs !blooms !indexes !kopsFiles !ks
174170
#-}
175171
-- | Batched lookups in I\/O.
176172
--
177-
-- See Note [Batched lookups, buffer strategy and restrictions]
178-
--
179173
-- PRECONDITION: the vectors of bloom filters, indexes and file handles
180174
-- should pointwise match with the vectors of runs.
181-
lookupsIOWithoutWriteBuffer ::
175+
lookupsIO ::
182176
forall m h. (MonadThrow m, MonadST m)
183177
=> HasBlockIO m h
184178
-> ArenaManager (PrimState m)
@@ -189,7 +183,7 @@ lookupsIOWithoutWriteBuffer ::
189183
-> V.Vector (Handle h) -- ^ The file handles to the key\/value files inside @rs@
190184
-> V.Vector SerialisedKey
191185
-> m (LookupAcc m h)
192-
lookupsIOWithoutWriteBuffer !hbio !mgr !resolveV !rs !blooms !indexes !kopsFiles !ks =
186+
lookupsIO !hbio !mgr !resolveV !rs !blooms !indexes !kopsFiles !ks =
193187
assert precondition $
194188
withArena mgr $ \arena -> do
195189
(rkixs, ioops) <- ST.stToIO $ prepLookups arena blooms indexes kopsFiles ks
@@ -204,7 +198,7 @@ lookupsIOWithoutWriteBuffer !hbio !mgr !resolveV !rs !blooms !indexes !kopsFiles
204198
assert (V.length rs == V.length kopsFiles) $
205199
True
206200

207-
{-# SPECIALIZE intraPageLookups ::
201+
{-# SPECIALIZE intraPageLookupsWithWriteBuffer ::
208202
ResolveSerialisedValue
209203
-> WB.WriteBuffer
210204
-> Ref (WBB.WriteBufferBlobs IO h)
@@ -215,14 +209,10 @@ lookupsIOWithoutWriteBuffer !hbio !mgr !resolveV !rs !blooms !indexes !kopsFiles
215209
-> VU.Vector IOResult
216210
-> IO (LookupAcc IO h)
217211
#-}
218-
-- | Intra-page lookups, and combining lookup results from multiple runs and
219-
-- the write buffer.
220-
--
221-
-- This function assumes that @rkixs@ is ordered such that newer runs are
222-
-- handled first. The order matters for resolving cases where we find the same
223-
-- key in multiple runs.
212+
-- | Like 'intraPageLookupsOn', but uses the write buffer as the initial
213+
-- accumulator.
224214
--
225-
intraPageLookups ::
215+
intraPageLookupsWithWriteBuffer ::
226216
forall m h. (PrimMonad m, MonadThrow m)
227217
=> ResolveSerialisedValue
228218
-> WB.WriteBuffer
@@ -233,21 +223,9 @@ intraPageLookups ::
233223
-> V.Vector (IOOp (PrimState m) h)
234224
-> VU.Vector IOResult
235225
-> m (LookupAcc m h)
236-
intraPageLookups !resolveV !wb !wbblobs !rs !ks !rkixs !ioops !ioress = do
237-
-- We accumulate results into the 'res' vector. When there are several
238-
-- lookup hits for the same key then we combine the results. The combining
239-
-- operator is associative but not commutative, so we must do this in the
240-
-- right order. We start with the write buffer lookup results and then go
241-
-- through the run lookup results in rkixs, which must be ordered by run.
242-
--
243-
-- TODO: reassess the representation of the result vector to try to reduce
244-
-- intermediate allocations. For example use a less convenient
245-
-- representation with several vectors (e.g. separate blob info) and
246-
-- convert to the final convenient representation in a single pass near
247-
-- the surface API so that all the conversions can be done in one pass
248-
-- without intermediate allocations.
249-
--
250-
226+
intraPageLookupsWithWriteBuffer !resolveV !wb !wbblobs !rs !ks !rkixs !ioops !ioress = do
227+
-- The most recent values are in the write buffer, so we use it to
228+
-- initialise the accumulator.
251229
acc0 <-
252230
V.generateM (V.length ks) $ \ki ->
253231
case WB.lookup wb (V.unsafeIndex ks ki) of
@@ -278,7 +256,7 @@ data TableCorruptedError
278256
-> IO (LookupAcc IO h)
279257
#-}
280258
-- | Intra-page lookups, and combining lookup results from multiple runs and
281-
-- the write buffer.
259+
-- a potential initial accumulator (e.g. from the write buffer).
282260
--
283261
-- This function assumes that @rkixs@ is ordered such that newer runs are
284262
-- handled first. The order matters for resolving cases where we find the same
@@ -287,7 +265,7 @@ data TableCorruptedError
287265
intraPageLookupsOn ::
288266
forall m h. (PrimMonad m, MonadThrow m)
289267
=> ResolveSerialisedValue
290-
-> LookupAcc m h
268+
-> LookupAcc m h -- initial acc
291269
-> V.Vector (Ref (Run m h))
292270
-> V.Vector SerialisedKey
293271
-> VP.Vector RunIxKeyIx

src/Database/LSMTree/Internal/MergingRun.hs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,10 @@ unsafeNew mergeDebt (SpentCredits spentCredits)
258258

259259
-- | Create references to the runs that should be queried for lookups.
260260
-- In particular, if the merge is not complete, these are the input runs.
261+
--
262+
-- TODO: This interface doesn't work well with the action registry. Just doing
263+
-- @withRollback reg (duplicateRuns mr) (mapM_ releaseRef)@ isn't exception-safe
264+
-- since if one of the @releaseRef@ calls fails, the following ones aren't run.
261265
{-# SPECIALISE duplicateRuns ::
262266
Ref (MergingRun t IO h) -> IO (V.Vector (Ref (Run IO h))) #-}
263267
duplicateRuns ::
@@ -757,8 +761,8 @@ atomicSpendCredits (CreditsVar var) spend =
757761

758762
{-# SPECIALISE remainingMergeDebt ::
759763
Ref (MergingRun t IO h) -> IO (MergeDebt, NumEntries) #-}
760-
-- | Calculate an upper bound on the merge credits required to complete the
761-
-- merge, as well as an upper bound on the size of the resulting run.
764+
-- | Calculate the merge credits required to complete the merge, as well as an
765+
-- upper bound on the size of the resulting run.
762766
remainingMergeDebt ::
763767
(MonadMVar m, PrimMonad m)
764768
=> Ref (MergingRun t m h) -> m (MergeDebt, NumEntries)

src/Database/LSMTree/Internal/MergingTree/Lookup.hs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,6 @@ mergeLookupAcc resolve mt accs =
8181
--
8282
-- This function duplicates the references to all the tree's runs.
8383
-- These references later need to be released using 'releaseLookupTree'.
84-
--
85-
-- This function should be run with asynchronous exceptions masked to prevent
86-
-- failing after internal resources have already been created.
8784
{-# SPECIALISE buildLookupTree ::
8885
ActionRegistry IO
8986
-> Ref (MT.MergingTree IO h)

test/Test/Database/LSMTree/Internal/Lookup.hs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ prop_roundtripFromWriteBufferLookupIO (SmallList dats) =
319319
arenaManager <- newArenaManager
320320
realres <-
321321
fetchBlobs hfs =<< -- retrieve blobs to match type of model result
322-
lookupsIO
322+
lookupsIOWithWriteBuffer
323323
hbio
324324
arenaManager
325325
resolveV
@@ -339,8 +339,9 @@ prop_roundtripFromWriteBufferLookupIO (SmallList dats) =
339339
fetchBlobs hfs = traverse (traverse (traverse (readWeakBlobRef hfs)))
340340

341341
-- | Given a bunch of 'InMemLookupData', prepare the data into the form needed
342-
-- for 'lookupsIO': a write buffer (and blobs) and a vector of on-disk runs.
343-
-- Also passes the model and the keys to look up to the inner action.
342+
-- for 'lookupsIOWithWriteBuffer': a write buffer (and blobs) and a vector of
343+
-- on-disk runs. Also passes the model and the keys to look up to the inner
344+
-- action.
344345
--
345346
withWbAndRuns :: FS.HasFS IO h
346347
-> FS.HasBlockIO IO h

test/Test/Database/LSMTree/Internal/MergingTree.hs

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import Data.Map (Map)
1414
import qualified Data.Map as Map
1515
import Data.Traversable (for)
1616
import qualified Data.Vector as V
17+
import Database.LSMTree.Extras (showPowersOf10)
1718
import Database.LSMTree.Extras.MergingRunData
1819
import Database.LSMTree.Extras.MergingTreeData
1920
import Database.LSMTree.Extras.RunData
@@ -138,8 +139,9 @@ prop_lookupTree hfs hbio keys mtd = do
138139
-> IO (V.Vector (Maybe (Entry v SerialisedBlob)))
139140
fetchBlobs = traverse (traverse (traverse (readWeakBlobRef hfs)))
140141

141-
-- trees are always in the last level, there is no distinction between
142-
-- (Nothing and Just Delete), (Insert and Mupsert)
142+
-- the lookup accs might be different between implementation and model
143+
-- (Nothing vs. Just Delete, Insert vs. Mupsert), but this doesn't matter
144+
-- for the final result of the lookup
143145
normalise = V.map toLookupResult
144146

145147
toLookupResult Nothing = Nothing
@@ -156,13 +158,14 @@ prop_lookupTree hfs hbio keys mtd = do
156158
return $ V.map (const Nothing) keys
157159
False -> do
158160
batches <- buildLookupTree reg tree
159-
releaseLookupTree reg batches -- only happens at the end
160161
results <- traverse (performLookups mgr) batches
161-
foldLookupTree resolveVal results
162+
acc <- foldLookupTree resolveVal results
163+
releaseLookupTree reg batches
164+
return acc
162165

163166
performLookups mgr runs =
164167
Async.async $
165-
Lookup.lookupsIOWithoutWriteBuffer
168+
Lookup.lookupsIO
166169
hbio
167170
mgr
168171
resolveVal
@@ -227,22 +230,38 @@ prop_supplyCredits hfs hbio threshold credits mtd = do
227230
FS.createDirectory hfs (FS.mkFsPath ["active"])
228231
counter <- newUniqCounter 0
229232
withMergingTree hfs hbio resolveVal runParams setupPath counter mtd $ \tree -> do
233+
(MR.MergeDebt initialDebt, _) <- remainingMergeDebt tree
230234
props <- for credits $ \c -> do
231235
(MR.MergeDebt debt, _) <- remainingMergeDebt tree
232-
leftovers <-
233-
supplyCredits hfs hbio resolveVal runParams threshold root counter tree c
234-
(MR.MergeDebt debt', _) <- remainingMergeDebt tree
235-
return $
236-
counterexample (show (debt, leftovers, debt')) $ conjoin [
237-
counterexample "negative values" $
238-
debt >= 0 && leftovers >= 0 && debt' >= 0
239-
, counterexample "did not reduce debt sufficiently" $
240-
debt' <= debt - (c - leftovers)
241-
]
242-
return (conjoin (toList props))
236+
if debt <= 0
237+
then
238+
return $ property True
239+
else do
240+
leftovers <-
241+
supplyCredits hfs hbio resolveVal runParams threshold root counter tree c
242+
(MR.MergeDebt debt', _) <- remainingMergeDebt tree
243+
return $
244+
-- semi-useful, but mainly tells us in how many steps we supplied
245+
tabulate "supplied credits" [showPowersOf10 (fromIntegral c)] $
246+
counterexample (show (debt, leftovers, debt')) $ conjoin [
247+
counterexample "negative values" $
248+
debt >= 0 && leftovers >= 0 && debt' >= 0
249+
, counterexample "did not reduce debt sufficiently" $
250+
debt' <= debt - (c - leftovers)
251+
]
252+
(MR.MergeDebt finalDebt, _) <- remainingMergeDebt tree
253+
return $
254+
labelDebt initialDebt finalDebt $
255+
conjoin (toList props)
243256
where
244257
root = Paths.SessionRoot (FS.mkFsPath [])
245-
setupPath = FS.mkFsPath ["setup"] -- separate dir, so it doesn't clash
258+
setupPath = FS.mkFsPath ["setup"] -- separate dir, so file paths in errors
259+
-- are identifiable as created in setup
260+
--
261+
labelDebt initial final
262+
| initial == 0 = label "trivial"
263+
| final == 0 = label "completed"
264+
| otherwise = label "incomplete"
246265

247266
instance Arbitrary MR.MergeCredits where
248267
arbitrary = MR.MergeCredits . getPositive <$> arbitrary

0 commit comments

Comments
 (0)