Skip to content

Commit 06a9e27

Browse files
AuHauachingbrain
andauthored
feat: store blocks under multihash key (#211)
This is related to ipfs/js-ipfs#2415 Breaking changes: - Repo version incremented to `8`, requires a migration - Blocks are now stored using the multihash, not the full CID - `repo.blocks.query({})` now returns an async iterator that yields blocks - `repo.blocks.query({ keysOnly: true })` now returns an async iterator that yields CIDs - Those CIDs are v1 with the raw codec Co-authored-by: achingbrain <[email protected]>
1 parent d4bf852 commit 06a9e27

12 files changed

+208
-108
lines changed

README.md

+41-16
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,20 @@ This is the implementation of the [IPFS repo spec](https://github.com/ipfs/specs
4040
- [`Promise<Buffer> repo.get(key)`](#promisebuffer-repogetkey)
4141
- [Blocks](#blocks)
4242
- [`Promise<Block> repo.blocks.put(block:Block)`](#promiseblock-repoblocksputblockblock)
43-
- [`AsyncIterator<Block> repo.blocks.putMany(source)`](#asynciteratorblock-repoblocksputmanysource)
44-
- [`Promise<Buffer> repo.blocks.get(cid)`](#promisebuffer-repoblocksgetcid)
45-
- [`AsyncIterable<Buffer> repo.blocks.getMany(source)`](#asynciterablebuffer-repoblocksgetmanysource)
43+
- [`AsyncIterator<Block> repo.blocks.putMany(source:AsyncIterable<Block>)`](#asynciteratorblock-repoblocksputmanysourceasynciterableblock)
44+
- [`Promise<Block> repo.blocks.get(cid:CID)`](#promiseblock-repoblocksgetcidcid)
45+
- [`AsyncIterable<Block> repo.blocks.getMany(source:AsyncIterable<CID>)`](#asynciterableblock-repoblocksgetmanysourceasynciterablecid)
46+
- [`Promise<boolean> repo.blocks.has (cid:CID)`](#promiseboolean-repoblockshas-cidcid)
47+
- [`Promise<boolean> repo.blocks.delete (cid:CID)`](#promiseboolean-repoblocksdelete-cidcid)
48+
- [`AsyncIterator<Block|CID> repo.blocks.query (query)`](#asynciteratorblockcid-repoblocksquery-query)
4649
- [`Promise<CID> repo.blocks.delete(cid:CID)`](#promisecid-repoblocksdeletecidcid)
47-
- [`AsyncIterator<CID> repo.blocks.deleteMany(source)`](#asynciteratorcid-repoblocksdeletemanysource)
50+
- [`AsyncIterator<CID> repo.blocks.deleteMany(source:AsyncIterable<CID>)`](#asynciteratorcid-repoblocksdeletemanysourceasynciterablecid)
4851
- [Datastore](#datastore)
4952
- [`repo.datastore`](#repodatastore)
5053
- [Config](#config)
51-
- [`Promise repo.config.set(key:string, value)`](#promise-repoconfigsetkeystring-value)
52-
- [`Promise repo.config.replace(value)`](#promise-repoconfigreplacevalue)
53-
- [`Promise<?> repo.config.get(key:string)`](#promise-repoconfiggetkeystring)
54+
- [`Promise repo.config.set(key:String, value:Object)`](#promise-repoconfigsetkeystring-valueobject)
55+
- [`Promise repo.config.replace(value:Object)`](#promise-repoconfigreplacevalueobject)
56+
- [`Promise<?> repo.config.get(key:String)`](#promise-repoconfiggetkeystring)
5457
- [`Promise<Object> repo.config.getAll()`](#promiseobject-repoconfiggetall)
5558
- [`Promise<boolean> repo.config.exists()`](#promiseboolean-repoconfigexists)
5659
- [Version](#version)
@@ -229,31 +232,53 @@ Get a value at the root of the repo
229232

230233
* `block` should be of type [Block][]
231234

232-
#### `AsyncIterator<Block> repo.blocks.putMany(source)`
235+
#### `AsyncIterator<Block> repo.blocks.putMany(source:AsyncIterable<Block>)`
233236

234237
Put many blocks.
235238

236239
* `source` should be an AsyncIterable that yields entries of type [Block][]
237240

238-
#### `Promise<Buffer> repo.blocks.get(cid)`
241+
#### `Promise<Block> repo.blocks.get(cid:CID)`
239242

240243
Get block.
241244

242245
* `cid` is the content id of type [CID][]
243246

244-
#### `AsyncIterable<Buffer> repo.blocks.getMany(source)`
247+
#### `AsyncIterable<Block> repo.blocks.getMany(source:AsyncIterable<CID>)`
245248

246-
Get block.
249+
Get many blocks
247250

248251
* `source` should be an AsyncIterable that yields entries of type [CID][]
249252

253+
#### `Promise<boolean> repo.blocks.has (cid:CID)`
254+
255+
Indicate if a block is present for the passed CID
256+
257+
* `cid` should be of the type [CID][]
258+
259+
#### `Promise<boolean> repo.blocks.delete (cid:CID)`
260+
261+
Deletes a block
262+
263+
* `cid` should be of the type [CID][]
264+
265+
#### `AsyncIterator<Block|CID> repo.blocks.query (query)`
266+
267+
Query what blocks are available in blockstore.
268+
269+
If `query.keysOnly` is true, the returned iterator will yield [CID][]s, otherwise it will yield [Block][]s
270+
271+
* `query` is a object as specified in [interface-datastore](https://github.com/ipfs/interface-datastore#query).
272+
273+
Datastore:
274+
250275
#### `Promise<CID> repo.blocks.delete(cid:CID)`
251276

252277
* `cid` should be of the type [CID][]
253278

254279
Delete a block
255280

256-
#### `AsyncIterator<CID> repo.blocks.deleteMany(source)`
281+
#### `AsyncIterator<CID> repo.blocks.deleteMany(source:AsyncIterable<CID>)`
257282

258283
* `source` should be an Iterable or AsyncIterable that yields entries of the type [CID][]
259284

@@ -269,7 +294,7 @@ This contains a full implementation of [the `interface-datastore` API](https://g
269294

270295
Instead of using `repo.set('config')` this exposes an API that allows you to set and get a decoded config object, as well as, in a safe manner, change any of the config values individually.
271296

272-
#### `Promise repo.config.set(key:string, value)`
297+
#### `Promise repo.config.set(key:String, value:Object)`
273298

274299
Set a config value. `value` can be any object that is serializable to JSON.
275300

@@ -281,11 +306,11 @@ const config = await repo.config.get()
281306
assert.equal(config.a.b.c, 'c value')
282307
```
283308

284-
#### `Promise repo.config.replace(value)`
309+
#### `Promise repo.config.replace(value:Object)`
285310

286311
Set the whole config value. `value` can be any object that is serializable to JSON.
287312

288-
#### `Promise<?> repo.config.get(key:string)`
313+
#### `Promise<?> repo.config.get(key:String)`
289314

290315
Get a config value. Returned promise resolves to the same type that was set before.
291316

@@ -379,7 +404,7 @@ Returned promise resolves to a `boolean` indicating the existence of the lock.
379404

380405
### Migrations
381406

382-
When there is a new repo migration and the version of repo is increased, don't
407+
When there is a new repo migration and the version of the repo is increased, don't
383408
forget to propagate the changes into the test repo (`test/test-repo`).
384409

385410
**For tools that run mainly in the browser environment, be aware that disabling automatic

package.json

+3-4
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,7 @@
5252
"it-first": "^1.0.2",
5353
"just-range": "^2.1.0",
5454
"memdown": "^5.1.0",
55-
"multihashes": "^1.0.1",
56-
"multihashing-async": "^0.8.0",
55+
"multihashing-async": "^1.0.0",
5756
"ncp": "^2.0.0",
5857
"rimraf": "^3.0.0",
5958
"sinon": "^9.0.2"
@@ -69,11 +68,11 @@
6968
"debug": "^4.1.0",
7069
"err-code": "^2.0.0",
7170
"interface-datastore": "^1.0.2",
72-
"ipfs-repo-migrations": "^0.2.1",
71+
"ipfs-repo-migrations": "^1.0.0",
7372
"ipfs-utils": "^2.2.0",
7473
"ipld-block": "^0.9.1",
7574
"it-map": "^1.0.2",
76-
"it-pipe": "^1.1.0",
75+
"it-pushable": "^1.4.0",
7776
"just-safe-get": "^2.0.0",
7877
"just-safe-set": "^2.1.0",
7978
"multibase": "^1.0.1",

src/blockstore-utils.js

+5-2
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,18 @@ exports.cidToKey = cid => {
1616
throw errcode(new Error('Not a valid cid'), 'ERR_INVALID_CID')
1717
}
1818

19-
return new Key('/' + multibase.encode('base32', cid.buffer).toString().slice(1).toUpperCase(), false)
19+
return new Key('/' + multibase.encode('base32', cid.multihash).toString().slice(1).toUpperCase(), false)
2020
}
2121

2222
/**
2323
* Transform a datastore Key instance to a CID
24+
* As Key is a multihash of the CID, it is reconstructed using IPLD's RAW codec.
25+
* Hence it is highly probable that stored CID will differ from a CID retrieved from blockstore.
2426
*
2527
* @param {Key} key
2628
* @returns {CID}
2729
*/
2830
exports.keyToCid = key => {
29-
return new CID(multibase.decode('b' + key.toString().slice(1).toLowerCase()))
31+
// Block key is of the form /<base32 encoded string>
32+
return new CID(1, 'raw', multibase.decode('b' + key.toString().slice(1).toLowerCase()))
3033
}

src/blockstore.js

+71-71
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ const ShardingStore = core.ShardingDatastore
55
const Block = require('ipld-block')
66
const { cidToKey, keyToCid } = require('./blockstore-utils')
77
const map = require('it-map')
8-
const pipe = require('it-pipe')
8+
const drain = require('it-drain')
9+
const pushable = require('it-pushable')
910

1011
module.exports = async (filestore, options) => {
1112
const store = await maybeWithSharding(filestore, options)
@@ -23,47 +24,39 @@ function maybeWithSharding (filestore, options) {
2324
function createBaseStore (store) {
2425
return {
2526
/**
26-
* Query the store.
27+
* Query the store
2728
*
2829
* @param {Object} query
2930
* @param {Object} options
30-
* @returns {AsyncIterator<Block>}
31+
* @returns {AsyncIterator<Block|CID>}
3132
*/
32-
async * query (query, options) { // eslint-disable-line require-await
33-
yield * store.query(query, options)
33+
async * query (query, options) {
34+
for await (const { key, value } of store.query(query, options)) {
35+
if (query.keysOnly) {
36+
yield keyToCid(key)
37+
continue
38+
}
39+
40+
yield new Block(value, keyToCid(key))
41+
}
3442
},
43+
3544
/**
36-
* Get a single block by CID.
45+
* Get a single block by CID
3746
*
3847
* @param {CID} cid
3948
* @param {Object} options
4049
* @returns {Promise<Block>}
4150
*/
4251
async get (cid, options) {
4352
const key = cidToKey(cid)
44-
let blockData
45-
try {
46-
blockData = await store.get(key, options)
47-
return new Block(blockData, cid)
48-
} catch (err) {
49-
if (err.code === 'ERR_NOT_FOUND') {
50-
const otherCid = cidToOtherVersion(cid)
51-
52-
if (!otherCid) {
53-
throw err
54-
}
55-
56-
const otherKey = cidToKey(otherCid)
57-
const blockData = await store.get(otherKey, options)
58-
await store.put(key, blockData)
59-
return new Block(blockData, cid)
60-
}
53+
const blockData = await store.get(key, options)
6154

62-
throw err
63-
}
55+
return new Block(blockData, cid)
6456
},
57+
6558
/**
66-
* Like get, but for more.
59+
* Like get, but for more
6760
*
6861
* @param {AsyncIterator<CID>} cids
6962
* @param {Object} options
@@ -74,8 +67,9 @@ function createBaseStore (store) {
7467
yield this.get(cid, options)
7568
}
7669
},
70+
7771
/**
78-
* Write a single block to the store.
72+
* Write a single block to the store
7973
*
8074
* @param {Block} block
8175
* @param {Object} options
@@ -86,59 +80,75 @@ function createBaseStore (store) {
8680
throw new Error('invalid block')
8781
}
8882

89-
const exists = await this.has(block.cid)
83+
const key = cidToKey(block.cid)
84+
const exists = await store.has(key, options)
9085

91-
if (exists) {
92-
return this.get(block.cid, options)
86+
if (!exists) {
87+
await store.put(key, block.data, options)
9388
}
9489

95-
await store.put(cidToKey(block.cid), block.data, options)
96-
9790
return block
9891
},
9992

10093
/**
101-
* Like put, but for more.
94+
* Like put, but for more
10295
*
10396
* @param {AsyncIterable<Block>|Iterable<Block>} blocks
10497
* @param {Object} options
10598
* @returns {AsyncIterable<Block>}
10699
*/
107100
async * putMany (blocks, options) { // eslint-disable-line require-await
108-
yield * pipe(
109-
blocks,
110-
(source) => {
111-
// turn them into a key/value pair
112-
return map(source, (block) => {
113-
return { key: cidToKey(block.cid), value: block.data }
114-
})
115-
},
116-
(source) => {
117-
// put them into the datastore
118-
return store.putMany(source, options)
119-
},
120-
(source) => {
121-
// map the returned key/value back into a block
122-
return map(source, ({ key, value }) => {
123-
return new Block(value, keyToCid(key))
124-
})
101+
// we cannot simply chain to `store.putMany` because we convert a CID into
102+
// a key based on the multihash only, so we lose the version & codec and
103+
// cannot give the user back the CID they used to create the block, so yield
104+
// to `store.putMany` but return the actual block the user passed in.
105+
//
106+
// nb. we want to use `store.putMany` here so bitswap can control batching
107+
// up block HAVEs to send to the network - if we use multiple `store.put`s
108+
// it will not be able to guess we are about to `store.put` more blocks
109+
const output = pushable()
110+
111+
// process.nextTick runs on the microtask queue, setImmediate runs on the next
112+
// event loop iteration so is slower. Use process.nextTick if it is available.
113+
const runner = process && process.nextTick ? process.nextTick : setImmediate
114+
115+
runner(async () => {
116+
try {
117+
await drain(store.putMany(async function * () {
118+
for await (const block of blocks) {
119+
const key = cidToKey(block.cid)
120+
const exists = await store.has(key, options)
121+
122+
if (!exists) {
123+
yield { key, value: block.data }
124+
}
125+
126+
// there is an assumption here that after the yield has completed
127+
// the underlying datastore has finished writing the block
128+
output.push(block)
129+
}
130+
}()))
131+
132+
output.end()
133+
} catch (err) {
134+
output.end(err)
125135
}
126-
)
136+
})
137+
138+
yield * output
127139
},
140+
128141
/**
129-
* Does the store contain block with this cid?
142+
* Does the store contain block with this CID?
130143
*
131144
* @param {CID} cid
132145
* @param {Object} options
133146
* @returns {Promise<bool>}
134147
*/
135-
async has (cid, options) {
136-
const exists = await store.has(cidToKey(cid), options)
137-
if (exists) return exists
138-
const otherCid = cidToOtherVersion(cid)
139-
if (!otherCid) return false
140-
return store.has(cidToKey(otherCid), options)
148+
async has (cid, options) { // eslint-disable-line require-await
149+
return store.has(cidToKey(cid), options)
141150
},
151+
142152
/**
143153
* Delete a block from the store
144154
*
@@ -149,6 +159,7 @@ function createBaseStore (store) {
149159
async delete (cid, options) { // eslint-disable-line require-await
150160
return store.delete(cidToKey(cid), options)
151161
},
162+
152163
/**
153164
* Delete a block from the store
154165
*
@@ -157,12 +168,9 @@ function createBaseStore (store) {
157168
* @returns {Promise<void>}
158169
*/
159170
async * deleteMany (cids, options) { // eslint-disable-line require-await
160-
yield * store.deleteMany((async function * () {
161-
for await (const cid of cids) {
162-
yield cidToKey(cid)
163-
}
164-
}()), options)
171+
yield * store.deleteMany(map(cids, cid => cidToKey(cid)), options)
165172
},
173+
166174
/**
167175
* Close the store
168176
*
@@ -173,11 +181,3 @@ function createBaseStore (store) {
173181
}
174182
}
175183
}
176-
177-
function cidToOtherVersion (cid) {
178-
try {
179-
return cid.version === 0 ? cid.toV1() : cid.toV0()
180-
} catch (err) {
181-
return null
182-
}
183-
}

src/constants.js

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
'use strict'
22

33
module.exports = {
4-
repoVersion: 7
4+
repoVersion: 8
55
}

0 commit comments

Comments
 (0)