Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add sessions to trustless gateways #459

Merged
merged 17 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions packages/block-brokers/.aegir.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import cors from 'cors'
import polka from 'polka'

/** @type {import('aegir').PartialOptions} */
const options = {
test: {
async before (options) {
const server = polka({
port: 0,
host: '127.0.0.1'
})
server.use(cors())
server.all('/ipfs/bafkreiefnkxuhnq3536qo2i2w3tazvifek4mbbzb6zlq3ouhprjce5c3aq', (req, res) => {
res.writeHead(200, {
'content-type': 'application/octet-stream'
})
res.end(Uint8Array.from([0, 1, 2, 0]))
})

await server.listen()
const { port } = server.server.address()

return {
server,
env: {
TRUSTLESS_GATEWAY: `http://127.0.0.1:${port}`
}
}
},
async after (options, before) {
await before.server.server.close()
}
}
}

export default options
8 changes: 8 additions & 0 deletions packages/block-brokers/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,23 @@
"dependencies": {
"@helia/interface": "^4.0.1",
"@libp2p/interface": "^1.1.4",
"@libp2p/utils": "^5.2.6",
"@multiformats/multiaddr-to-uri": "^10.0.1",
"interface-blockstore": "^5.2.10",
"ipfs-bitswap": "^20.0.2",
"multiformats": "^13.1.0",
"p-defer": "^4.0.0",
"progress-events": "^1.0.0"
},
"devDependencies": {
"@libp2p/logger": "^4.0.7",
"@libp2p/peer-id-factory": "^4.0.7",
"@multiformats/multiaddr": "^12.1.14",
"@multiformats/uri-to-multiaddr": "^8.0.0",
"@types/sinon": "^17.0.3",
"aegir": "^42.2.5",
"cors": "^2.8.5",
"polka": "^0.5.2",
"sinon": "^17.0.1",
"sinon-ts": "^2.0.0"
}
Expand Down
11 changes: 4 additions & 7 deletions packages/block-brokers/src/bitswap.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import { createBitswap } from 'ipfs-bitswap'
import type { BlockAnnouncer, BlockBroker, BlockRetrievalOptions, BlockRetriever } from '@helia/interface/blocks'
import type { BlockAnnounceOptions, BlockBroker, BlockRetrievalOptions } from '@helia/interface/blocks'
import type { Libp2p, Startable } from '@libp2p/interface'
import type { Blockstore } from 'interface-blockstore'
import type { Bitswap, BitswapNotifyProgressEvents, BitswapOptions, BitswapWantBlockProgressEvents } from 'ipfs-bitswap'
import type { CID } from 'multiformats/cid'
import type { MultihashHasher } from 'multiformats/hashes/interface'
import type { ProgressOptions } from 'progress-events'

interface BitswapComponents {
libp2p: Libp2p
Expand All @@ -17,9 +16,7 @@ export interface BitswapInit extends BitswapOptions {

}

class BitswapBlockBroker implements BlockAnnouncer<ProgressOptions<BitswapNotifyProgressEvents>>, BlockRetriever<
ProgressOptions<BitswapWantBlockProgressEvents>
>, Startable {
class BitswapBlockBroker implements BlockBroker<BitswapWantBlockProgressEvents, BitswapNotifyProgressEvents>, Startable {
private readonly bitswap: Bitswap
private started: boolean

Expand Down Expand Up @@ -65,11 +62,11 @@ ProgressOptions<BitswapWantBlockProgressEvents>
this.started = false
}

announce (cid: CID, block: Uint8Array, options?: ProgressOptions<BitswapNotifyProgressEvents>): void {
async announce (cid: CID, block: Uint8Array, options?: BlockAnnounceOptions<BitswapNotifyProgressEvents>): Promise<void> {
this.bitswap.notify(cid, block, options)
}

async retrieve (cid: CID, { validateFn, ...options }: BlockRetrievalOptions<ProgressOptions<BitswapWantBlockProgressEvents>> = {}): Promise<Uint8Array> {
async retrieve (cid: CID, options: BlockRetrievalOptions<BitswapWantBlockProgressEvents> = {}): Promise<Uint8Array> {
return this.bitswap.want(cid, options)
}
}
Expand Down
118 changes: 108 additions & 10 deletions packages/block-brokers/src/trustless-gateway/broker.ts
Original file line number Diff line number Diff line change
@@ -1,30 +1,49 @@
import { DEFAULT_SESSION_MIN_PROVIDERS, DEFAULT_SESSION_MAX_PROVIDERS, DEFAULT_SESSION_PROVIDER_QUERY_CONCURRENCY, DEFAULT_SESSION_PROVIDER_QUERY_TIMEOUT } from '@helia/interface'
import { PeerQueue } from '@libp2p/utils/peer-queue'
import { multiaddrToUri } from '@multiformats/multiaddr-to-uri'
import pDefer from 'p-defer'
import { TrustlessGateway } from './trustless-gateway.js'
import { DEFAULT_TRUSTLESS_GATEWAYS } from './index.js'
import type { TrustlessGatewayBlockBrokerInit, TrustlessGatewayComponents, TrustlessGatewayGetBlockProgressEvents } from './index.js'
import type { BlockRetrievalOptions, BlockRetriever } from '@helia/interface/blocks'
import type { Routing, BlockRetrievalOptions, BlockBroker, CreateSessionOptions } from '@helia/interface'
import type { Logger } from '@libp2p/interface'
import type { CID } from 'multiformats/cid'
import type { ProgressOptions } from 'progress-events'

export interface CreateTrustlessGatewaySessionOptions extends CreateSessionOptions<TrustlessGatewayGetBlockProgressEvents> {
/**
* Specify the cache control header to send to the remote. 'only-if-cached'
* will prevent the gateway from fetching the content if they don't have it.
*
* @default only-if-cached
*/
cacheControl?: string
Comment on lines +19 to +21
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm worried about defaulting to asking gateways to only return content they have. will read more in the PR

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to stop them doing what gateways do, e.g. fetch content on your behalf. Otherwise it defeats the purpose of having a session.

}

/**
* A class that accepts a list of trustless gateways that are queried
* for blocks.
*/
export class TrustlessGatewayBlockBroker implements BlockRetriever<
ProgressOptions<TrustlessGatewayGetBlockProgressEvents>
> {
export class TrustlessGatewayBlockBroker implements BlockBroker<TrustlessGatewayGetBlockProgressEvents> {
private readonly components: TrustlessGatewayComponents
private readonly gateways: TrustlessGateway[]
private readonly routing: Routing
private readonly log: Logger

constructor (components: TrustlessGatewayComponents, init: TrustlessGatewayBlockBrokerInit = {}) {
this.components = components
this.log = components.logger.forComponent('helia:trustless-gateway-block-broker')
this.routing = components.routing
this.gateways = (init.gateways ?? DEFAULT_TRUSTLESS_GATEWAYS)
.map((gatewayOrUrl) => {
return new TrustlessGateway(gatewayOrUrl)
return new TrustlessGateway(gatewayOrUrl, components.logger)
})
}

async retrieve (cid: CID, options: BlockRetrievalOptions<ProgressOptions<TrustlessGatewayGetBlockProgressEvents>> = {}): Promise<Uint8Array> {
addGateway (gatewayOrUrl: string): void {
this.gateways.push(new TrustlessGateway(gatewayOrUrl, this.components.logger))
}

async retrieve (cid: CID, options: BlockRetrievalOptions<TrustlessGatewayGetBlockProgressEvents> = {}): Promise<Uint8Array> {
// Loop through the gateways until we get a block or run out of gateways
// TODO: switch to toSorted when support is better
const sortedGateways = this.gateways.sort((a, b) => b.reliability() - a.reliability())
Expand All @@ -41,7 +60,7 @@
this.log.error('failed to validate block for %c from %s', cid, gateway.url, err)
gateway.incrementInvalidBlocks()

throw new Error(`unable to validate block for CID ${cid} from gateway ${gateway.url}`)
throw new Error(`Block for CID ${cid} from gateway ${gateway.url} failed validation`)
}

return block
Expand All @@ -50,7 +69,7 @@
if (err instanceof Error) {
aggregateErrors.push(err)
} else {
aggregateErrors.push(new Error(`unable to fetch raw block for CID ${cid} from gateway ${gateway.url}`))
aggregateErrors.push(new Error(`Unable to fetch raw block for CID ${cid} from gateway ${gateway.url}`))

Check warning on line 72 in packages/block-brokers/src/trustless-gateway/broker.ts

View check run for this annotation

Codecov / codecov/patch

packages/block-brokers/src/trustless-gateway/broker.ts#L72

Added line #L72 was not covered by tests
}
// if signal was aborted, exit the loop
if (options.signal?.aborted === true) {
Expand All @@ -60,6 +79,85 @@
}
}

throw new AggregateError(aggregateErrors, `unable to fetch raw block for CID ${cid} from any gateway`)
if (aggregateErrors.length > 0) {
throw new AggregateError(aggregateErrors, `Unable to fetch raw block for CID ${cid} from any gateway`)
} else {
throw new Error(`Unable to fetch raw block for CID ${cid} from any gateway`)
}

Check warning on line 86 in packages/block-brokers/src/trustless-gateway/broker.ts

View check run for this annotation

Codecov / codecov/patch

packages/block-brokers/src/trustless-gateway/broker.ts#L85-L86

Added lines #L85 - L86 were not covered by tests
}

async createSession (root: CID, options: CreateTrustlessGatewaySessionOptions = {}): Promise<BlockBroker<TrustlessGatewayGetBlockProgressEvents>> {
const gateways: string[] = []
const minProviders = options.minProviders ?? DEFAULT_SESSION_MIN_PROVIDERS
const maxProviders = options.minProviders ?? DEFAULT_SESSION_MAX_PROVIDERS
const deferred = pDefer<BlockBroker<TrustlessGatewayGetBlockProgressEvents>>()
const broker = new TrustlessGatewayBlockBroker(this.components, {
gateways
})

this.log('finding transport-ipfs-gateway-http providers for cid %c', root)

const queue = new PeerQueue({
concurrency: options.providerQueryConcurrency ?? DEFAULT_SESSION_PROVIDER_QUERY_CONCURRENCY
})

Promise.resolve().then(async () => {
for await (const provider of this.routing.findProviders(root, options)) {
if (provider.protocols == null || !provider.protocols.includes('transport-ipfs-gateway-http')) {
continue
Copy link
Member

@lidel lidel Mar 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💭 I worry that as things are today, filtering by transport-ipfs-gateway-http will be harmful for decentralization in the long term.

AFAIK it is something invented by IPNI at cid.contact for Rhea/Saturn last year to allow nft.storage to avoid bitswap bills. It only works with IPNI PUTs and was not designed with p2p in mind, Amino DHT peers who in the future will announce /https multiaddrs with trustless gateway as alternative to bitswap will not be discoverable this way.

@achingbrain It may be more future-proof to look at all results, and keep ones that have Addrs with multiaddr that has /https or /tls/http.
This will be compatible with https://github.com/libp2p/specs/blob/master/http/transport-component.md and will also filter-out useless gateways without TLS set up (can't use them in browser due to mixed-content error).

If we look at Addrs instead, when IPNI+DHT proxy at https://delegated-ipfs.dev/routing/v1/providers/bafybeigdyrzt5sfp7udm7hu76uh7y26nf3efuylqabf3oclgtqy55fbzdi starts returning DHT results for Kubo nodes with Schema: peer and Addrs that have /https multiaddr, it will "just work", and helia will be able to act on HTTP transport with these peers without having to change anything.

Copy link
Member Author

@achingbrain achingbrain Mar 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is quite sad-face-making. I hadn't realised that transport-ipfs-gateway-http was added to the spec due to a commercial concern for nft.storage rather than being something you can rely on.

when IPNI+DHT proxy starts returning DHT results with /https multiaddr, it will "just work"

As I read the above, the HTTP transport being added to Kubo is what will cause HTTP multiaddrs to appear. Unless I'm missing something it doesn't mean the peer is running a path or subdomain gateway.

That is, the remote could have a HTTP transport, but also have the gateway config disabled, just the presence of the transport isn't enough to say one way or the other.

It seems like the only way is to make a request and see what you get back? This could be a recipe for DDoSing if you created nodes that respond to provider queries for certain CIDs?

I think a possible solution here might be for the peer to indicate it's capabilities in it's signed peer record? The HTTP routers could return the peer record in the GET /providers response?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean FWIW, if it is the case that it was added exclusively for nft.storage then I wasn't aware of that.

As far as I knew the trustless gateway was already a concept, and we implemented it for Rhea when we were asked. I think there was probably a cost saving for us there due to where we were hosting our bitswap peer at the time, but that's not the case now, and shouldn't have been a convincing argument then since infra costs and locations can easily be changed.

Copy link
Member

@lidel lidel Mar 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alanshaw HTTP is good! Trustless gateways as HTTP transport are good, and .storage implementing and exposing it as alternative to bitswap is great for ecosystem, especially our browser work (ipfs-chromium, service worker, verified-fetch, Brave etc). We do want to keep it! ❤️

The only problem is that IPNI implementation informed how HTTP-only provider is represented on /routing/v1, and unfortunately, DHT peers were not included in the design.

The transport-ipfs-gateway-http codec was registered, afaik, only because IPNI already used codes for bitswap and graphsync and needed another one internally (inferring that from discussion at multiformats/multicodec#321 which I missed an arrived after it was merged already :(). It was a provisional solution, so there was no spec, no design doc how it is supposed to work outside IPNI. So we ended up with something that works for cid.contact and ignores the rest of public IPFS swarm.

Since then, I've been trying to find a way for /routing/v1 to represent Amino DHT peers that provide trustless gateway endpoint, ensuring these can be found (once we have them), not just IPNI ones.


@achingbrain yes, looking for /tls/http or /https and sending a probe and marking endpoint as gateway or not is the only reliable way I see right now.
A basic probe could be GET for small, inlined CID: send GET /ipfs/bafkqablimvwgy3y?format=raw to HTTP port and expect hello. Fast and reliable, works with legacy and future gateways too.

FYSA there are proposals of more declarative ways of discovering if HTTP port supports IPFS Gateway functionality, but they are no better than the above HTTP probe.
Things I am aware of:

  • libp2p/specs#508 proposes probing for manifest at .well-known/libp2p, but this is not implemented anywhere and in practice, no better test than GET /ipfs/bafkqablimvwgy3y?format=raw.
  • ipfs/specs#425 proposes returning headers on OPTIONS response, but it won't work on older gateways, and no better than GET /ipfs/bafkqablimvwgy3y?format=raw
  • There were discussion about publishing protocol list from libp2p identify with the peer record o Amino DHT, and then exposing this on /routing/v1 responses (but this requires network upgrade and time).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My point is that if you send me a request, and I send you a response knowing it will trigger you to send a request to a specific third party, that is an attack vector.

If I send you a single response with 10x provider records, and you make 10x requests, that is a vector for an amplification attack because I have got you to do more work than the work it took me to get you to do the work, though hopefully it's not all bad because the provider hosts should have to be distinct.

As long as we recognise this and are ok with it.

Copy link
Member

@SgtPooki SgtPooki Mar 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thats a great catch @achingbrain

We should absolutely have some way to know the returned providers are valid ipfs node.

I guess as long as we dont modify the delegated routers from a set of known good actors it shouldnt be an issue, but thats centralizing on known delegated providers.

I imagine we would want any peer to be able to provide routing responses in the future though (ambient peer discovery?) where this could become a huge problem. I believe the amplification attack was discussed there as well

If we get a list of providers from some delegated-router and all those providers turn out bad, we could at least mark that delegated-router as a bad actor and limit any future requests to it

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My point is that if you send me a request, and I send you a response knowing it will trigger you to send a request to a specific third party, that is an attack vector.

True, although to some extent this is true regardless. There is currently no validation anywhere in use (e.g. DHT, IPNI, etc.) that a peer advertising /ip4/1.2.3.4/tcp/4001/p2p/<peerID> actually controls that IP address so there's already some potential vector there. This could be done by trying each advertised address, although this assumes the routing system supports all the protocols advertised (e.g. ipv4/ipv6, tcp/quic/webtransport/wss/...) which might not be true (or great for upgradability).

To be fair it is likely more work to setup HTTPS for a bad request than trying to establish just a libp2p connection and on the libp2p side we can be somewhat protected by the peerIDs/public keys (e.g. the public key can sign what's a valid advertisement with its key which will cause the security negotiation to fail otherwise ... although that will result in a wasted dial which is a separate issue in and of itself).

While we're here there's a slightly larger attack surface here for "webseeds-like" behavior (e.g. pointing at https://some-ubuntu-mirror.com/ubuntu-v.1.2.3.iso or (.car)) since more bytes can be transferred. This problem gets drastically worse if the incremental verifiability window gets larger.

If we get a list of providers from some delegated-router and all those providers turn out bad, we could at least mark that delegated-router as a bad actor and limit any future requests to it

Hopefully as explained above this isn't a reasonable thing to do unless there's actually responsibility on the underlying routing system to do these checks (or some delegated routers are allowed to advertise via something like HTTP OPTIONS or .well-known to do extra work you can punish them for not doing correctly). Mostly what you're left with to punish them on is do they give about the same results as other delegated-routers (for the same underlying routing systems) or as querying the underlying systems themselves (likely requires something similar to ipfs/specs#388).

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

filtering by transport-ipfs-gateway-http will be harmful for decentralization in the long term.

So this is true, just like filtering for bitswap is also harmful. These protocol advertisements are only supported by IPNI, for Amino (as well as any new protocol advertised by nodes without a new codec landing and all IPNI nodes supporting the code) using identify-like behavior (e.g. libp2p identify or leveraging .well-known) is required.

Since then, I've been trying to find a way for /routing/v1 to represent Amino DHT peers that provide trustless gateway endpoint, ensuring these can be found (once we have them), not just IPNI ones.

For trustless-gateway over libp2p .well-known can be used and it's fine. For plain trustless-gateway there's currently no way to advertise that in the Amino DHT because the mappings are multihash -> peerID. This is very similar (although the underlying use case wasn't exactly the same) as the problem expressed here libp2p/notes#11 and ipld/ipld#57 (comment) (note: that's from 2019 and my understanding of the situation has improved a bit since then 🙃 which is part of why there's more detail in the message above than there). AFAIK this would require a protocol change to support with Amino.

}

this.log('found transport-ipfs-gateway-http provider %p for cid %c', provider.id, root)

void queue.add(async () => {
for (const ma of provider.multiaddrs) {
let uri: string | undefined

try {
// /ip4/x.x.x.x/tcp/31337/http
// /ip4/x.x.x.x/tcp/31337/https
// etc
uri = multiaddrToUri(ma)

const resource = `${uri}/ipfs/${root.toString()}?format=raw`

// make sure the peer is available - HEAD support doesn't seem to
// be very widely implemented so as long as the remote responds
// we are happy they are valid
// https://specs.ipfs.tech/http-gateways/trustless-gateway/#head-ipfs-cid-path-params
const response = await fetch(resource, {
method: 'HEAD',
headers: {
Accept: 'application/vnd.ipld.raw',
'Cache-Control': options.cacheControl ?? 'only-if-cached'
},
signal: AbortSignal.timeout(options.providerQueryTimeout ?? DEFAULT_SESSION_PROVIDER_QUERY_TIMEOUT)
})

this.log('HEAD %s %d', resource, response.status)
gateways.push(uri)
broker.addGateway(uri)

this.log('found %d transport-ipfs-gateway-http providers for cid %c', gateways.length, root)

if (gateways.length === minProviders) {
deferred.resolve(broker)
}

if (gateways.length === maxProviders) {
queue.clear()
}
} catch (err: any) {
this.log.error('could not fetch %c from %a', root, uri ?? ma, err)
}

Check warning on line 152 in packages/block-brokers/src/trustless-gateway/broker.ts

View check run for this annotation

Codecov / codecov/patch

packages/block-brokers/src/trustless-gateway/broker.ts#L151-L152

Added lines #L151 - L152 were not covered by tests
}
})
}
})
.catch(err => {
this.log.error('error creating session for %c', root, err)

Check warning on line 158 in packages/block-brokers/src/trustless-gateway/broker.ts

View check run for this annotation

Codecov / codecov/patch

packages/block-brokers/src/trustless-gateway/broker.ts#L158

Added line #L158 was not covered by tests
})

return deferred.promise
}
}
5 changes: 3 additions & 2 deletions packages/block-brokers/src/trustless-gateway/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { TrustlessGatewayBlockBroker } from './broker.js'
import type { BlockRetriever } from '@helia/interface/src/blocks.js'
import type { Routing, BlockBroker } from '@helia/interface'
import type { ComponentLogger } from '@libp2p/interface'
import type { ProgressEvent } from 'progress-events'

Expand All @@ -22,9 +22,10 @@ export interface TrustlessGatewayBlockBrokerInit {
}

export interface TrustlessGatewayComponents {
routing: Routing
logger: ComponentLogger
}

export function trustlessGateway (init: TrustlessGatewayBlockBrokerInit = {}): (components: TrustlessGatewayComponents) => BlockRetriever {
export function trustlessGateway (init: TrustlessGatewayBlockBrokerInit = {}): (components: TrustlessGatewayComponents) => BlockBroker<TrustlessGatewayGetBlockProgressEvents> {
return (components) => new TrustlessGatewayBlockBroker(components, init)
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import type { ComponentLogger, Logger } from '@libp2p/interface'
import type { CID } from 'multiformats/cid'

/**
Expand Down Expand Up @@ -36,8 +37,11 @@ export class TrustlessGateway {
*/
#successes = 0

constructor (url: URL | string) {
private readonly log: Logger

constructor (url: URL | string, logger: ComponentLogger) {
this.url = url instanceof URL ? url : new URL(url)
this.log = logger.forComponent(`helia:trustless-gateway-block-broker:${this.url.hostname}`)
2color marked this conversation as resolved.
Show resolved Hide resolved
}

/**
Expand Down Expand Up @@ -67,6 +71,9 @@ export class TrustlessGateway {
},
cache: 'force-cache'
})

this.log('GET %s %d', gwUrl, res.status)

if (!res.ok) {
this.#errors++
throw new Error(`unable to fetch raw block for CID ${cid} from gateway ${this.url}`)
Expand Down
Loading
Loading