-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathpreprocess.js
358 lines (328 loc) · 12.4 KB
/
preprocess.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
import assert from 'node:assert'
import { ethers } from 'ethers'
import { ethAddressFromDelegated } from '@glif/filecoin-address'
import { CarReader } from '@ipld/car'
import { validateBlock } from '@web3-storage/car-block-validator'
import { recursive as exporter } from 'ipfs-unixfs-exporter'
import createDebug from 'debug'
import pRetry from 'p-retry'
const debug = createDebug('spark:preprocess')
export class Measurement {
/**
* @param {Partial<import('./typings.js').RawMeasurement>} m
* @param {<T extends string>(str: T) => T} pointerize
*/
constructor (m, pointerize = (v) => v) {
this.participantAddress = pointerize(parseParticipantAddress(m.participant_address))
this.retrievalResult = pointerize(getRetrievalResult(m))
this.alternativeProviderRetrievalResult = pointerize(getAlternativeProviderRetrievalResult(m))
this.cid = pointerize(m.cid)
this.minerId = pointerize(m.miner_id)
// Note: providerId is recorded by spark-publish but we don't use it for evaluations yet
this.providerId = pointerize(m.provider_id)
this.spark_version = pointerize(m.spark_version)
/** @type {import('./typings.js').TaskingEvaluation} */
this.taskingEvaluation = null
/** @type {import('./typings.js').ConsensusEvaluation} */
this.consensusEvaluation = null
this.inet_group = pointerize(m.inet_group)
this.finished_at = parseDateTime(m.finished_at)
this.provider_address = pointerize(m.provider_address)
this.protocol = pointerize(m.protocol?.toLowerCase())
this.byte_length = m.byte_length
this.start_at = parseDateTime(m.start_at)
this.first_byte_at = parseDateTime(m.first_byte_at)
this.end_at = parseDateTime(m.end_at)
this.status_code = m.status_code
this.head_status_code = m.head_status_code
this.timeout = m.timeout
this.indexerResult = pointerize(m.indexer_result)
this.stationId = pointerize(m.station_id)
this.carChecksum = pointerize(m.car_checksum)
this.carTooLarge = m.car_too_large
this.alternativeProviderCheck = {
statusCode: m.alternative_provider_check_status_code,
timeout: m.alternative_provider_check_timeout,
carTooLarge: m.alternative_provider_check_car_too_large,
endAt: parseDateTime(m.alternative_provider_check_end_at),
protocol: m.alternative_provider_check_protocol,
providerId: pointerize(m.alternative_provider_check_provider_id)
}
}
}
const parseDateTime = (str) => {
if (!str) return undefined
const value = new Date(str)
if (Number.isNaN(value.getTime())) return undefined
return value.getTime()
}
export const preprocess = async ({
round,
cid,
roundIndex,
fetchMeasurements,
recordTelemetry,
logger,
fetchRetries = 14
}) => {
const start = Date.now()
/** @type import('./typings.js').RawMeasurement[] */
const measurements = await pRetry(
attempt => fetchMeasurements(cid, { noCache: attempt > 1 }),
{
retries: fetchRetries,
onFailedAttempt: err => {
if (!fetchRetries) return
console.error(err)
console.error(`Retrying ${cid} ${err.retriesLeft} more times`)
}
}
)
const fetchDuration = Date.now() - start
const validMeasurements = measurements
// eslint-disable-next-line camelcase
.map(measurement => {
try {
return new Measurement(measurement, round.pointerize)
} catch (err) {
logger.error('Invalid measurement:', err.message, measurement)
return null
}
})
.filter(measurement => {
if (measurement === null) return false
// Print round & participant address & CID together to simplify lookup when debugging
// Omit the `m` object from the format string to get nicer formatting
debug(
'RETRIEVAL RESULT for round=%s client=%s cid=%s minerId=%s: %s',
roundIndex,
measurement.participantAddress,
measurement.cid,
measurement.minerId,
measurement.retrievalResult,
measurement)
try {
assertValidMeasurement(measurement)
return true
} catch (err) {
debug('INVALID MEASURMENT err=%s measurement=%j', err.message, measurement)
return false
}
})
logger.log(
'PREPROCESS ROUND %s: Added measurements from CID %s\n%o',
roundIndex,
cid,
{ total: measurements.length, valid: validMeasurements.length }
)
const okCount = validMeasurements.reduce((c, m) => m.retrievalResult === 'OK' ? c + 1 : c, 0)
const total = validMeasurements.length
logger.log('Retrieval Success Rate: %s%s (%s of %s)', Math.round(100 * okCount / total), '%', okCount, total)
round.measurements.push(...validMeasurements)
round.measurementBatches.push(cid)
recordTelemetry('preprocess', point => {
point.intField('round_index', roundIndex)
point.intField('total_measurements', measurements.length)
point.intField('valid_measurements', validMeasurements.length)
point.intField('fetch_duration_ms', fetchDuration)
})
/** @type {Map<string,number>} */
const sparkVersions = new Map()
for (const m of validMeasurements) {
if (typeof m.spark_version !== 'string') continue
const oldCount = sparkVersions.get(m.spark_version) ?? 0
sparkVersions.set(m.spark_version, oldCount + 1)
}
recordTelemetry('spark_versions', point => {
point.intField('round_index', roundIndex)
let total = 0
for (const [version, count] of sparkVersions.entries()) {
point.intField(`v${version}`, count)
total += count
}
point.intField('total', total)
})
return validMeasurements
}
/**
* @param {string} cid
* @param {object} options
* @param {AbortSignal} [options.signal]
* @param {boolean} [options.noCache]
* @returns {Promise<import('./typings.js').RawMeasurement[]>}
*/
export const fetchMeasurements = async (cid, { signal, noCache = false } = {}) => {
const res = await fetch(
`https://${encodeURIComponent(cid)}.ipfs.w3s.link?format=car`,
{
signal,
headers: {
'Cache-Control': noCache ? 'no-cache' : 'default'
}
}
)
if (!res.ok) {
const msg = `Cannot fetch measurements ${cid}: ${res.status}\n${await res.text()}`
throw new Error(msg)
}
const reader = await CarReader.fromIterable(res.body)
const entries = exporter(cid, {
async get (blockCid) {
signal?.throwIfAborted()
// The cast to `any` is a workaround for the following TypeScript error
// The types of 'toV0()[Symbol.toStringTag]' are incompatible between these types.
// Type 'string' is not assignable to type '"CID"'
const block = await reader.get(/** @type {any} */(blockCid))
try {
await validateBlock(block)
} catch (err) {
throw new Error(
`Invalid block ${blockCid} of root ${cid}`, { cause: err }
)
}
return block.bytes
}
})
for await (const entry of entries) {
signal?.throwIfAborted()
// Depending on size, entries might be packaged as `file` or `raw`
// https://github.com/web3-storage/w3up/blob/e8bffe2ee0d3a59a977d2c4b7efe425699424e19/packages/upload-client/src/unixfs.js#L11
if (entry.type === 'file' || entry.type === 'raw') {
const bufs = []
for await (const buf of entry.content()) {
signal?.throwIfAborted()
bufs.push(buf)
}
return parseMeasurements(Buffer.concat(bufs).toString())
}
}
throw new Error('No measurements found')
}
export const parseMeasurements = str => {
// Supports
// - NDJSON (new format)
// - JSON array on a single line (old format)
const ret = str.split('\n').filter(Boolean).map(line => JSON.parse(line))
if (ret.length === 1 && Array.isArray(ret[0])) return ret[0]
return ret
}
/**
* @param {string} filWalletAddress
*/
export const parseParticipantAddress = filWalletAddress => {
// ETH addresses don't need any conversion
if (filWalletAddress.startsWith('0x')) {
return filWalletAddress
}
try {
return ethAddressFromDelegated(filWalletAddress)
} catch (err) {
err.message = `Invalid participant address ${filWalletAddress}: ${err.message}`
err.filWalletAddress = filWalletAddress
throw err
}
}
/**
* @param {Measurement} measurement
*/
export const assertValidMeasurement = measurement => {
assert(
typeof measurement === 'object' && measurement !== null,
'object required'
)
assert(ethers.isAddress(measurement.participantAddress), 'valid participant address required')
assert(typeof measurement.inet_group === 'string', 'valid inet group required')
assert(typeof measurement.finished_at === 'number', 'field `finished_at` must be set to a number')
assert(measurement.indexerResult, 'field `indexerResult` must be set')
if (measurement.stationId) {
assert(
typeof measurement.stationId === 'string' &&
measurement.stationId.match(/^[0-9a-fA-F]{88}$/),
'stationId must be a hex string with 88 characters'
)
}
if (measurement.retrievalResult === 'OK') {
assert(
typeof measurement.start_at === 'number' && measurement.start_at > 0,
'field `start_at` must be a number greater than 0'
)
assert(
typeof measurement.first_byte_at === 'number' && measurement.first_byte_at > 0,
'field `first_byte_at` must be a number greater than 0'
)
assert(
typeof measurement.end_at === 'number' && measurement.end_at > 0,
'field `end_at` must be a number greater than 0'
)
assert(measurement.end_at >= measurement.start_at, 'end_at must be greater than or equal to start_at')
assert(measurement.end_at >= measurement.first_byte_at, 'end_at must be greater than or equal to first_byte_at')
assert(measurement.first_byte_at >= measurement.start_at, 'first_byte_at must be greater than or equal to start_at')
if (measurement.protocol === 'http') {
assert.strictEqual(typeof measurement.head_status_code, 'number', '`head_status_code` must be a number')
} else {
assert(
measurement.head_status_code === undefined || measurement.head_status_code === null,
'`head_status_code` must be undefined or null for non-HTTP retrievals'
)
}
}
}
/**
* @param {Partial<import('./typings.js').RawMeasurement>} measurement
* @return {import('./typings.js').RetrievalResult}
*/
export const getRetrievalResult = (measurement) => {
switch (measurement.indexer_result) {
case 'OK':
case 'HTTP_NOT_ADVERTISED':
break
default:
return `IPNI_${measurement.indexer_result}`
}
if (measurement.timeout) return 'TIMEOUT'
if (measurement.car_too_large) return 'CAR_TOO_LARGE'
if (measurement.status_code >= 700 && measurement.status_code < 800) {
return 'UNSUPPORTED_MULTIADDR_FORMAT'
}
switch (measurement.status_code) {
case 600: return 'UNKNOWN_FETCH_ERROR'
case 801: return 'HOSTNAME_DNS_ERROR'
case 802: return 'CONNECTION_REFUSED'
case 901: return 'UNSUPPORTED_CID_HASH_ALGO'
case 902: return 'CONTENT_VERIFICATION_FAILED'
case 903: return 'UNEXPECTED_CAR_BLOCK'
case 904: return 'CANNOT_PARSE_CAR_FILE'
}
if (measurement.status_code >= 300) {
const prefix = measurement.protocol === 'http' ? 'HTTP_' : 'LASSIE_'
// I cannot use + concatenation because TypeScript would complain
return `${prefix}${measurement.status_code}`
}
const ok = measurement.status_code >= 200 && typeof measurement.end_at === 'string'
return ok ? 'OK' : 'UNKNOWN_ERROR'
}
/**
* Evaluates the alternative provider retrieval result.
*
* Alternative provider retrieval results are evaluated only if the indexer result is `NO_VALID_ADVERTISEMENT`
* and the network retrieval status code is set.
*
* @param {Partial<import('./typings.js').RawMeasurement>} measurement
* @return {import('./typings.js').RetrievalResult}
*/
export const getAlternativeProviderRetrievalResult = (measurement) => {
if (measurement.indexer_result !== 'NO_VALID_ADVERTISEMENT' || !measurement.alternative_provider_check_status_code) {
return getRetrievalResult(measurement)
}
/** @type {Partial<import('./typings.js').RawMeasurement>} */
const alternativeProviderMeasurement = {
indexer_result: 'OK',
status_code: measurement.alternative_provider_check_status_code,
timeout: measurement.alternative_provider_check_timeout,
alternative_provider_check_status_code: measurement.alternative_provider_check_status_code,
car_too_large: measurement.alternative_provider_check_car_too_large,
end_at: measurement.alternative_provider_check_end_at,
protocol: measurement.alternative_provider_check_protocol
}
return getRetrievalResult(alternativeProviderMeasurement)
}