From 73ba28613aabdd19977237797f5dce504023ff6e Mon Sep 17 00:00:00 2001 From: ajuvercr Date: Tue, 26 Mar 2024 15:26:42 +0100 Subject: [PATCH 1/6] small cleanup membermanager --- lib/memberManager.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/memberManager.ts b/lib/memberManager.ts index c7f79ba..a3ed879 100644 --- a/lib/memberManager.ts +++ b/lib/memberManager.ts @@ -127,6 +127,16 @@ export class Manager { return await this.extractor.extract(data, member, this.shapeId); } + private async extractMember( + member: Term, + data: RdfStore, + ): Promise { + const quads: Quad[] = await this.extractMemberQuads(member, data); + + // let the extractor do it's thing + return await this.extractor.extract(data, member); + } + private async extractMember( member: Term, data: RdfStore, From bc747491c265b9e8c8db59d5d4c358843387dd21 Mon Sep 17 00:00:00 2001 From: ajuvercr Date: Thu, 28 Mar 2024 14:51:30 +0100 Subject: [PATCH 2/6] don't error the stream controller on error --- bin/cli.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bin/cli.ts b/bin/cli.ts index 605d678..1e1288e 100644 --- a/bin/cli.ts +++ b/bin/cli.ts @@ -156,6 +156,10 @@ async function main() { }); } + if (!quiet) { + client.on("error", (error) => console.error("Error", error)); + } + const reader = client.stream({ highWaterMark: 10 }).getReader(); let el = await reader.read(); let count = 0; From bb3c0ed87f27621ff5fd342f633cb405137208a9 Mon Sep 17 00:00:00 2001 From: ajuvercr Date: Thu, 28 Mar 2024 15:06:25 +0100 Subject: [PATCH 3/6] Failing to fetch a page, is actually fetching an empty page --- lib/pageFetcher.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/pageFetcher.ts b/lib/pageFetcher.ts index bc46d62..e3eb7cb 100644 --- a/lib/pageFetcher.ts +++ b/lib/pageFetcher.ts @@ -157,6 +157,10 @@ export class Fetcher { } catch (ex) { logger("Fetch failed %o", ex); notifier.error(ex, state); + notifier.pageFetched( + { data: RdfStore.createDefault(), url: node.target }, + state, + ); } } } From f7729be9e87ec5b2a9a0c5fc34fae06fde9bd6a1 Mon Sep 17 00:00:00 2001 From: ajuvercr Date: Fri, 29 Mar 2024 15:22:12 +0100 Subject: [PATCH 4/6] stop emitting when errored --- lib/pageFetcher.ts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/pageFetcher.ts b/lib/pageFetcher.ts index e3eb7cb..bc46d62 100644 --- a/lib/pageFetcher.ts +++ b/lib/pageFetcher.ts @@ -157,10 +157,6 @@ export class Fetcher { } catch (ex) { logger("Fetch failed %o", ex); notifier.error(ex, state); - notifier.pageFetched( - { data: RdfStore.createDefault(), url: node.target }, - state, - ); } } } From 90b1405fd5ea6a1a9dc075aacb996389767484ba Mon Sep 17 00:00:00 2001 From: ajuvercr Date: Fri, 29 Mar 2024 16:27:55 +0100 Subject: [PATCH 5/6] better start up and adapt rdf-connect processor --- bin/cli.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/cli.ts b/bin/cli.ts index 1e1288e..1ade184 100644 --- a/bin/cli.ts +++ b/bin/cli.ts @@ -157,7 +157,9 @@ async function main() { } if (!quiet) { - client.on("error", (error) => console.error("Error", error)); + client.on("error", (error) => { + console.error("Error", error); + }); } const reader = client.stream({ highWaterMark: 10 }).getReader(); From 9044edbb53a510123c6382c1ad6f4e63fcf49cb1 Mon Sep 17 00:00:00 2001 From: Pieter Colpaert Date: Fri, 12 Apr 2024 13:23:04 +0200 Subject: [PATCH 6/6] Switch to an experimental version of the RDF Store using getsubjects --- lib/client.ts | 13 +++++++++++-- lib/memberManager.ts | 4 +++- lib/pageFetcher.ts | 31 ++++++++++++++++++------------- package-lock.json | 2 +- package.json | 2 +- 5 files changed, 34 insertions(+), 18 deletions(-) diff --git a/lib/client.ts b/lib/client.ts index f874f65..03d69cd 100644 --- a/lib/client.ts +++ b/lib/client.ts @@ -148,7 +148,6 @@ async function getInfo( config.shape ? shapeConfigStore : store, dereferencer, { - cbdDefaultGraph: config.onlyDefaultGraph, fetch: config.fetch, }, ), @@ -423,7 +422,17 @@ async function fetchPage( fetch: fetch_f, }); const url = resp.url; - const data = RdfStore.createDefault(); + const data = new RdfStore({ + indexCombinations: [ + [ 'graph', 'subject', 'predicate', 'object' ], + [ 'graph', 'predicate', 'object', 'subject' ], + [ 'graph', 'object', 'subject', 'predicate' ], + ], + indexConstructor: subOptions => new RdfStoreIndexNestedMapQuoted(subOptions), + dictionary: new TermDictionaryNumberRecordFullTerms(), + dataFactory: df, + termsCardinalitySets: [ 'graph'] //enable quick overview of graphs using getGraph(); + }); await new Promise((resolve, reject) => { data.import(resp.data).on("end", resolve).on("error", reject); }); diff --git a/lib/memberManager.ts b/lib/memberManager.ts index a3ed879..dcfcd9b 100644 --- a/lib/memberManager.ts +++ b/lib/memberManager.ts @@ -44,6 +44,8 @@ export class Manager { private extractor: CBDShapeExtractor; private shapeId?: Term; + private members: Array; + private timestampPath?: Term; private isVersionOfPath?: Term; @@ -67,7 +69,7 @@ export class Manager { ) { const logger = log.extend("extract"); const members = getObjects(page.data, this.ldesId, TREE.terms.member, null); - + this.members = members; logger("%d members", members.length); const promises: Promise[] = []; diff --git a/lib/pageFetcher.ts b/lib/pageFetcher.ts index bc46d62..a375344 100644 --- a/lib/pageFetcher.ts +++ b/lib/pageFetcher.ts @@ -3,10 +3,10 @@ import { Notifier } from "./utils"; import { extractRelations, Relation } from "./page"; import debug from "debug"; import { SimpleRelation } from "./relation"; -import { RdfStore } from "rdf-stores"; +import { RdfStore, RdfStoreIndexNestedMapQuoted, TermDictionaryNumberRecordFullTerms } from "rdf-stores"; import { DataFactory } from "rdf-data-factory"; const log = debug("fetcher"); -const { namedNode } = new DataFactory(); +const df = new DataFactory(); /** * target: url to fetch @@ -124,22 +124,27 @@ export class Fetcher { logger("Cache for %s %o", node.target, cache); - const data = RdfStore.createDefault(); - let quadCount = 0; + const data = new RdfStore({ + indexCombinations: [ + [ 'graph', 'object', 'predicate', 'subject' ], + [ 'graph', 'subject', 'predicate', 'object' ], + [ 'graph', 'predicate', 'object', 'subject' ], + [ 'graph', 'object', 'subject', 'predicate' ], + ], + indexConstructor: subOptions => new RdfStoreIndexNestedMapQuoted(subOptions), + dictionary: new TermDictionaryNumberRecordFullTerms(), + dataFactory: df, + termsCardinalitySets: ['graph'] //enable quick overview of graphs + }); + logger("Start loading " + node.target + "into store"); await new Promise((resolve, reject) => { - resp.data - .on("data", (quad) => { - data.addQuad(quad); - quadCount++; - }) - .on("end", resolve) + data.import(resp.data).on("end", resolve) .on("error", reject); }); - - logger("Got data %s (%d quads)", node.target, quadCount); + logger("Imported data %s (%d quads)", node.target, data.size); for (let rel of extractRelations( data, - namedNode(resp.url), + df.namedNode(resp.url), this.loose, this.after, this.before, diff --git a/package-lock.json b/package-lock.json index aeea3a9..ebae1ed 100644 --- a/package-lock.json +++ b/package-lock.json @@ -18,7 +18,7 @@ "n3": "^1.17.3", "rdf-data-factory": "^1.1.2", "rdf-dereference": "^2.2.0", - "rdf-stores": "^1.0.0" + "rdf-stores": "github:pietercolpaert/rdf-stores.js#feature-termcardinalities" }, "bin": { "ldes-client": "dist/bin/cli.js" diff --git a/package.json b/package.json index f836f5c..20481f5 100644 --- a/package.json +++ b/package.json @@ -32,7 +32,7 @@ "n3": "^1.17.3", "rdf-data-factory": "^1.1.2", "rdf-dereference": "^2.2.0", - "rdf-stores": "^1.0.0" + "rdf-stores": "github:pietercolpaert/rdf-stores.js#feature-termcardinalities" }, "devDependencies": { "@ajuvercr/js-runner": "^0.1.20",