|
| 1 | +IMPORT TextSearch.Common; |
| 2 | + |
| 3 | +// Aliases |
| 4 | +FileName_Info := Common.FileName_Info; |
| 5 | +FileNames := Common.FileNames; |
| 6 | +Types := Common.Types; |
| 7 | +TermDictionaryEntry := Common.Layouts.TermDictionaryEntry; |
| 8 | +TagDictionaryEntry := COmmon.Layouts.TagDictionaryEntry; |
| 9 | +TermPosting := Common.Layouts.TermPosting; |
| 10 | +TagPosting := Common.Layouts.tagPosting; |
| 11 | +PhrasePosting := Common.Layouts.PhrasePosting; |
| 12 | +DocIndex := Common.Layouts.DocIndex; |
| 13 | +DeletedDoc := Common.Layouts.DeletedDoc; |
| 14 | +// Default streams |
| 15 | +emptyDict := DATASET([], TermDictionaryEntry); |
| 16 | +emptyTagD := DATASET([], TagDictionaryEntry); |
| 17 | +emptyTerm := DATASET([], TermPosting); |
| 18 | +emptyTagP := DATASET([], TagPosting); |
| 19 | +emptyPhrs := DATASET([], PhrasePosting); |
| 20 | +emtpyDocs := DATASET([], DocIndex); |
| 21 | +emptyDelx := DATASET([], DeletedDoc); |
| 22 | + |
| 23 | +EXPORT Keys(FileName_Info info, UNSIGNED1 lvl=0) := MODULE |
| 24 | + // Term dictionary |
| 25 | + EXPORT TermDictionary(DATASET(TermDictionaryEntry) d=emptyDict) |
| 26 | + := INDEX(d, {typTerm, UNICODE20 kw20:=kw[1..20], termNominal}, |
| 27 | + {termFreq, docFreq, kw, term}, |
| 28 | + FileNames(info).TermDictionary(lvl), SORTED); |
| 29 | + |
| 30 | + // Tag Dictionary |
| 31 | + EXPORT TagDictionary(DATASET(TagDictionaryEntry) d=emptyTagD) |
| 32 | + := INDEX(d, {UNICODE20 tag20:=tagName[1..20], typData, tagNominal, |
| 33 | + pathLen}, |
| 34 | + {pathNominal, tagName, pathString}, |
| 35 | + FileNames(info).TagDictionary(lvl), SORTED); |
| 36 | + // Term Inversion |
| 37 | + EXPORT TermIndex(DATASET(TermPosting) d=emptyTerm) |
| 38 | + := INDEX(d, {typTerm, termNominal, id, kwpBegin, start, kwpEnd, stop, |
| 39 | + pathNominal, parentNominal, preorder, parentOrd}, |
| 40 | + {depth, lp, typData, kw, term}, |
| 41 | + FileNames(info).TermIndex(lvl), SORTED); |
| 42 | + |
| 43 | + // ELement Inversion |
| 44 | + EXPORT ElementIndex(DATASET(TagPosting) d=emptyTagP) |
| 45 | + := INDEX(d(typData IN Types.ElementDTypes), |
| 46 | + {tagNominal, id, kwpBegin, start, kwpEnd, stop, pathNominal, |
| 47 | + parentNominal, parentOrd, depth, preorder, typData}, |
| 48 | + {lenText, kwsText, lastOrd, tagName}, |
| 49 | + FileNames(info).ElementIndex(lvl), SORTED); |
| 50 | + |
| 51 | + // Phrase Index keys |
| 52 | + EXPORT PhraseIndex(DATASET(PhrasePosting) d=emptyPhrs) |
| 53 | + := INDEX(d, {nominal1, nominal2, id, kwpBegin, start, kwpEnd, stop, |
| 54 | + pathNominal, parentNominal, preorder, parentOrd}, |
| 55 | + {kw1, lp1, term1, kw2, lp2, term2}, |
| 56 | + FileNames(info).PhraseIndex(lvl), SORTED); |
| 57 | + |
| 58 | + // Attribute index |
| 59 | + EXPORT AttributeIndex(DATASET(TagPosting) d=emptyTagP) |
| 60 | + := INDEX(d(typData IN Types.AttribDTypes), |
| 61 | + {tagNominal, UNICODE10 v10:=tagValue[1..10], parentNominal, id, |
| 62 | + kwpBegin, start, kwpEnd, stop, pathNominal, preorder, parentOrd}, |
| 63 | + {typData, tagName, tagValue, pathString}, |
| 64 | + FileNames(info).AttributeIndex(lvl), SORTED); |
| 65 | + |
| 66 | + // Attribue Range Index |
| 67 | + EXPORT RangeIndex(DATASET(TagPosting) d=emptyTagP) |
| 68 | + := INDEX(d(typData IN Types.AttribDTypes), |
| 69 | + {tagNominal, parentNominal, id, kwpBegin, start, kwpEnd, stop, |
| 70 | + pathNominal, preorder, parentOrd, UNICODE10 v10:=tagValue[1..10]}, |
| 71 | + {typData, tagName, tagValue, pathString}, |
| 72 | + FileNames(info).RangeIndex(lvl), SORTED); |
| 73 | + |
| 74 | + // Document Index |
| 75 | + EXPORT DocumentIndex(DATASET(DocIndex) d=emtpyDocs) |
| 76 | + := INDEX(d, {id, keywords, docLength, seqKey}, {identifier, slugLine, wunit}, |
| 77 | + FileNames(info).DocumentIndex(lvl), SORTED, OPT); |
| 78 | + |
| 79 | + // Deleted document index |
| 80 | + EXPORT DeleteIndex(DATASET(DeletedDoc) d=emptyDelx) |
| 81 | + := INDEX(d, {id}, {identifier}, FileNames(info).DeleteIndex(lvl), SORTED, OPT); |
| 82 | + |
| 83 | + // Document Ident index |
| 84 | + EXPORT IdentIndex(DATASET(DocIndex) d=emtpyDocs) |
| 85 | + := INDEX(d, {Types.Nominal nominal:=HASH32(identifier), id}, |
| 86 | + {identifier}, |
| 87 | + FileNames(info).IdentIndx(lvl), SORTED, OPT); |
| 88 | +END; |
0 commit comments