diff --git a/montysolr/build.gradle.kts b/montysolr/build.gradle.kts index e27ee67a4..751b90934 100644 --- a/montysolr/build.gradle.kts +++ b/montysolr/build.gradle.kts @@ -1,6 +1,7 @@ plugins { java antlr + kotlin("jvm") version "1.9.22" } repositories { @@ -23,10 +24,16 @@ dependencies { implementation("com.anyascii:anyascii:0.3.2") implementation("org.python:jython-standalone:2.7.3") + implementation("me.lemire.integercompression:JavaFastPFOR:0.1.12") + implementation("it.unimi.dsi:fastutil-core:8.5.12") + testImplementation("junit:junit:4.13.2") testImplementation("org.antlr:stringtemplate:3.2.1") testImplementation("org.apache.solr:solr-test-framework:7.7.3") testImplementation("org.apache.lucene:lucene-test-framework:7.7.3") + + testImplementation("org.jetbrains.kotlinx:kotlinx-coroutines-core:1.8.0-RC2") + testImplementation(kotlin("stdlib-jdk8")) } java { diff --git a/montysolr/src/main/java/org/apache/solr/handler/batch/BatchProviderDumpCitationCache.java b/montysolr/src/main/java/org/apache/solr/handler/batch/BatchProviderDumpCitationCache.java index bc1d859ae..acaf0b337 100644 --- a/montysolr/src/main/java/org/apache/solr/handler/batch/BatchProviderDumpCitationCache.java +++ b/montysolr/src/main/java/org/apache/solr/handler/batch/BatchProviderDumpCitationCache.java @@ -7,6 +7,8 @@ import org.apache.solr.common.params.SolrParams; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.search.CitationCache; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.RefCounted; import java.io.BufferedWriter; import java.io.File; @@ -50,39 +52,52 @@ public void run(SolrQueryRequest req, BatchHandlerRequestQueue queue) throws Exc if (!returnDocids) { - SortedDocValues uniqueValueCache = req.getSearcher().getSlowAtomicReader().getSortedDocValues(uniqueField); - int paperid = 0; - while (it.hasNext()) { - int[][] data = it.next(); - int[] references = data[0]; - //TODO:rca - have a feeling this has become too convoluted - // and there must be a better way to un-invert; especially - // with docvalues - if (references != null && references.length > 0) { - if (uniqueValueCache.advanceExact(paperid)) { - ret = uniqueValueCache.binaryValue(); - out.write(ret.utf8ToString()); - out.write("\t"); - first = true; - for (int luceneDocId : references) { - if (luceneDocId == -1) - continue; - - uniqueValueCache.advanceExact(luceneDocId); + RefCounted searcherRef = req.getCore().getRealtimeSearcher(); + try { + SortedDocValues uniqueValueCache = searcherRef.get() + .getSlowAtomicReader().getSortedDocValues(uniqueField); + + int paperid = 0; + while (it.hasNext()) { + int[][] data = it.next(); + int[] references = data[0]; + //TODO:rca - have a feeling this has become too convoluted + // and there must be a better way to un-invert; especially + // with docvalues + if (references != null && references.length > 0) { + if (uniqueValueCache.advanceExact(paperid)) { ret = uniqueValueCache.binaryValue(); - - if (ret.length > 0) { - if (!first) { - out.write("\t"); + out.write(ret.utf8ToString()); + out.write("\t"); + first = true; + + // It's not possible to reset the SortedDocValues iterator, so create a new one and + // seek to the appropriate point for each referenced document. + SortedDocValues referenceValueCache = searcherRef.get() + .getSlowAtomicReader().getSortedDocValues(uniqueField); + for (int luceneDocId : references) { + if (luceneDocId == -1) + continue; + + if (referenceValueCache.advanceExact(luceneDocId)) { + ret = referenceValueCache.binaryValue(); + + if (ret.length > 0) { + if (!first) { + out.write("\t"); + } + out.write(ret.utf8ToString()); + first = false; + } } - out.write(ret.utf8ToString()); - first = false; } + out.write("\n"); } - out.write("\n"); } + paperid++; } - paperid++; + } finally { + searcherRef.decref(); } } else { int paperid = 0; diff --git a/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java b/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java index c92cd4134..31c143a99 100644 --- a/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java +++ b/montysolr/src/main/java/org/apache/solr/search/CitationLRUCache.java @@ -17,6 +17,9 @@ package org.apache.solr.search; +import it.unimi.dsi.fastutil.ints.Int2ObjectMap; +import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; +import it.unimi.dsi.fastutil.ints.IntArrayList; import org.apache.commons.lang.NotImplementedException; import org.apache.lucene.index.*; import org.apache.lucene.util.Bits; @@ -26,9 +29,9 @@ import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; -import org.apache.solr.schema.*; -import org.apache.solr.uninverting.UninvertingReader; -import org.apache.solr.uninverting.UninvertingReader.Type; +import org.apache.solr.schema.FieldType; +import org.apache.solr.schema.IndexSchema; +import org.apache.solr.schema.SchemaField; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -122,7 +125,7 @@ public Object init(Map args, Object persistence, CacheRegenerator regenerator) { citationFields = ((String) args.get("citationFields")).split(","); } - Float sizeInPercent = null; + float sizeInPercent = 1.0f; String str = (String) args.get("size"); if (str != null && str.endsWith("%")) { @@ -136,7 +139,7 @@ public Object init(Map args, Object persistence, CacheRegenerator regenerator) { final int initialSize = Math.min(str == null ? 1024 : Integer.parseInt(str), limit); description = generateDescription(limit, initialSize); - relationships = new RelationshipLinkedHashMap(initialSize, 0.75f, true, limit, sizeInPercent); + relationships = new RelationshipLinkedHashMap(initialSize, 0.75f, true, limit); if (persistence == null) { // must be the first time a cache of this type is being created @@ -220,11 +223,11 @@ public int getCitationsIteratorSize() { } public void insertCitation(int sourceDocid, int targetDocid) { - ((CitationLRUCache.RelationshipLinkedHashMap) relationships).addCitation(sourceDocid, targetDocid); + ((CitationLRUCache.RelationshipLinkedHashMap) relationships).addCitation(sourceDocid, targetDocid); } public void insertReference(int sourceDocid, int targetDocid) { - ((CitationLRUCache.RelationshipLinkedHashMap) relationships).addReference(sourceDocid, targetDocid); + ((CitationLRUCache.RelationshipLinkedHashMap) relationships).addReference(sourceDocid, targetDocid); } public int[] getCitations(K key) { @@ -349,6 +352,7 @@ public void warm(SolrIndexSearcher searcher, SolrCache old) { log.error("Failed loading persisted cache " + name(), e); } } else { + //noinspection AutoBoxing log.info("Will not load the cache {} current index generation differs; dump:{} != index:{}", name(), CitationCacheReaderWriter.getCacheGeneration(getCacheStorageDir(searcher)), CitationCacheReaderWriter.getIndexGeneration(searcher)); } @@ -428,7 +432,7 @@ public void set(int docbase, int docid, Object value) { if (treatIdentifiersAsText && value instanceof Integer) { value = Integer.toString((Integer) value); } - put((K) value, (V) (Integer) (docbase + docid)); + put((K) value, (V) Integer.valueOf(docbase + docid)); } }); @@ -472,37 +476,11 @@ private void warmIncrementally(SolrIndexSearcher searcher, SolrCache old) // collect ids of documents that need to be reloaded/regenerated during this // warmup run - // System.out.println("searcher: " + searcher.toString()); - // System.out.println("maxDoc: " + searcher.getIndexReader().maxDoc()); FixedBitSet toRefresh = new FixedBitSet(searcher.getIndexReader().maxDoc()); - - // System.out.println("version=" + searcher.getIndexReader().getVersion()); - // try { - // System.out.println("commit=" + searcher.getIndexReader().getIndexCommit()); - // } catch (IOException e2) { - // TODO Auto-generated catch block - // e2.printStackTrace(); - // } - - // for (IndexReaderContext c : searcher.getTopReaderContext().children()) { - // //System.out.println("context=" + c.reader().getCombinedCoreAndDeletesKey()); - // } - - // for (IndexReaderContext l : searcher.getIndexReader().leaves()) { - // //System.out.println(l); - // } - Bits liveDocs = searcher.getSlowAtomicReader().getLiveDocs(); - // System.out.println(liveDocs == null ? "liveDocs=" + null : "liveDocs=" + - // liveDocs.length()); - // System.out.println("numDeletes=" + - // searcher.getAtomicReader().numDeletedDocs()); if (liveDocs == null) { // everything is new, this could be fresh index or merged/optimized index too - // searcher.getAtomicReader().getContext().children().size() - - // other.map.clear(); // force regeneration toRefresh.set(0, toRefresh.length()); // Build the mapping from indexed values into lucene ids @@ -512,21 +490,11 @@ private void warmIncrementally(SolrIndexSearcher searcher, SolrCache old) @SuppressWarnings("unchecked") @Override public void set(int docbase, int docid, Object value) { - put((K) value, (V) (Integer) (docbase + docid)); + put((K) value, (V) Integer.valueOf(docbase + docid)); } }); - } else if (liveDocs != null) { - - Integer luceneId; - for (V v : other.relationships.values()) { - luceneId = ((Integer) v); - if (luceneId <= liveDocs.length() && !liveDocs.get(luceneId)) { // doc was either deleted or updated - // System.out.println("Found deleted: " + luceneId); - // retrieve all citations/references for this luceneId and mark these docs to be - // refreshed - } - } + } else { for (int i = 0; i < toRefresh.length(); i++) { if (liveDocs.get(i)) { @@ -584,7 +552,7 @@ public void set(int docbase, int docid, Object value) { private List getFields(SolrIndexSearcher searcher, String[] listOfFields) { - List out = new ArrayList(); + List out = new ArrayList<>(); IndexSchema schema = searcher.getCore().getLatestSchema(); if (schema.getUniqueKeyField() == null) { @@ -675,32 +643,9 @@ private void unInvertedTheDamnThing(SolrIndexSearcher searcher, List fie continue; } - SchemaField fSchema = schema.getField(field); DocValuesType fType = fi.getDocValuesType(); - Map mapping = new HashMap(); - final LeafReader unReader; - - if (fType.equals(DocValuesType.NONE)) { - Class c = fType.getClass(); - if (c.isAssignableFrom(TextField.class) || c.isAssignableFrom(StrField.class)) { - if (fSchema.multiValued()) { - mapping.put(field, Type.SORTED); - } else { - mapping.put(field, Type.BINARY); - } - } else if (c.isAssignableFrom(TrieIntField.class)) { - if (fSchema.multiValued()) { - mapping.put(field, Type.SORTED_SET_INTEGER); - } else { - mapping.put(field, Type.INTEGER_POINT); - } - } else { - continue; - } - unReader = UninvertingReader.wrap(lr, mapping::get); - } else { - unReader = lr; - } + final LeafReader unReader = lr; + switch (fType) { case NUMERIC: @@ -735,12 +680,9 @@ public void process(int docBase, int docId) throws IOException { case SORTED_SET: transformer = new Transformer() { final SortedSetDocValues dv = unReader.getSortedSetDocValues(field); - final int errs = 0; @Override public void process(int docBase, int docId) throws IOException { - if (errs > 5) - return; if (dv.advanceExact(docId)) { for (long ord = dv.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = dv.nextOrd()) { final BytesRef value = dv.lookupOrd(ord); @@ -800,7 +742,7 @@ public String getSource() { return "$URL: http://svn.apache.org/repos/asf/lucene/dev/branches/lucene_solr_4_0/solr/core/src/java/org/apache/solr/search/LRUCache.java $"; } - @SuppressWarnings({"rawtypes", "unchecked"}) + @SuppressWarnings({"rawtypes", "unchecked", "AutoBoxing"}) public NamedList getStatistics() { NamedList lst = new SimpleOrderedMap(); synchronized (relationships) { @@ -861,77 +803,6 @@ public boolean regenerateItem(SolrIndexSearcher newSearcher, SolrCache newCache, } } - /** - * Efficient resizable auto-expanding list holding int elements; - * implemented with arrays. - */ - private static final class ArrayIntList { - - private int[] elements; - private int size = 0; - - public ArrayIntList(int initialCapacity) { - elements = new int[initialCapacity]; - } - - public void add(int elem) { - if (size == elements.length) - ensureCapacity(size + 1); - elements[size++] = elem; - } - - public int[] getElements() { - int[] out = new int[size]; - System.arraycopy(elements, 0, out, 0, size); - return out; - } - - public int get(int index) { - if (index >= size) - throwIndex(index); - return elements[index]; - } - - public int size() { - return size; - } - - private void ensureCapacity(int minCapacity) { - int newCapacity = Math.max(minCapacity, (elements.length * 3) / 2 + 1); - int[] newElements = new int[newCapacity]; - System.arraycopy(elements, 0, newElements, 0, size); - elements = newElements; - } - - private void throwIndex(int index) { - throw new IndexOutOfBoundsException("index: " + index + ", size: " + size); - } - - public String toString() { - return Arrays.toString(elements); - } - - /** - * returns the first few positions (without offsets); debug only - */ - @SuppressWarnings("unused") - public String toString(int stride) { - int s = size() / stride; - int len = Math.min(10, s); // avoid printing huge lists - StringBuilder buf = new StringBuilder(4 * len); - buf.append("["); - for (int i = 0; i < len; i++) { - buf.append(get(i * stride)); - if (i < len - 1) - buf.append(", "); - } - if (len != s) - buf.append(", ..."); // and some more... - buf.append("]"); - return buf.toString(); - } - } - /* * The main datastructure holding information about the lucene documents. * @@ -944,18 +815,18 @@ public String toString(int stride) { * maxdoc size, so that no evictions happen */ @SuppressWarnings("hiding") - public class RelationshipLinkedHashMap extends LinkedHashMap { + public static class RelationshipLinkedHashMap extends LinkedHashMap { private static final long serialVersionUID = -356203002886265188L; int slimit; - List references; - List citations; + int maxDocSize; + Int2ObjectMap references; + Int2ObjectMap citations; - public RelationshipLinkedHashMap(int initialSize, float ratio, boolean accessOrder, int limit, - Float sizeInPercent) { + public RelationshipLinkedHashMap(int initialSize, float ratio, boolean accessOrder, int limit) { super(initialSize, ratio, accessOrder); slimit = limit; - references = new ArrayList(0); // just to prevent NPE - normally, is - citations = new ArrayList(0); // initialized in initializeCitationCache + references = new Int2ObjectOpenHashMap<>(0); // just to prevent NPE - normally, is + citations = new Int2ObjectOpenHashMap<>(0); // initialized in initializeCitationCache } @SuppressWarnings("rawtypes") @@ -971,10 +842,10 @@ protected boolean removeEldestEntry(Map.Entry eldest) { } public int[] getReferences(int docid) { - if (docid < references.size() && references.get(docid) != null) { - ArrayIntList c = references.get(docid); + if (references.containsKey(docid)) { + IntArrayList c = references.get(docid); if (c != null) - return c.getElements(); + return c.toIntArray(); } return null; } @@ -984,29 +855,26 @@ public Iterator getRelationshipsIterator() { } public int relationshipsDataSize() { - return citations.size(); + return maxDocSize; } public int[] getCitations(int docid) { - if (docid < citations.size() && citations.get(docid) != null) { - ArrayIntList c = citations.get(docid); + if (citations.containsKey(docid)) { + IntArrayList c = citations.get(docid); if (c != null) - return c.getElements(); + return c.toIntArray(); } return null; } public void initializeCitationCache(int maxDocSize) { - references = new ArrayList(maxDocSize); - citations = new ArrayList(maxDocSize); - - // i was hoping thi sis not necessary, but set(index, value) - // throws errors otherwise - for (int i = 0; i < maxDocSize; i++) { - references.add(null); - citations.add(null); - } + references = new Int2ObjectOpenHashMap<>(maxDocSize); + citations = new Int2ObjectOpenHashMap<>(maxDocSize); + + references.defaultReturnValue(null); + citations.defaultReturnValue(null); + this.maxDocSize = maxDocSize; } public void addReference(int sourceDocid, Object value) { @@ -1022,6 +890,10 @@ public void addReference(int sourceDocid, Integer targetDocid) { _add(references, sourceDocid, targetDocid); } + public void addReference(int sourceDocId, int targetDocId) { + _add(references, sourceDocId, targetDocId); + } + public void addCitation(int sourceDocid, Object value) { // System.out.println("addCitation(" + sourceDocid + ", " + value + ")"); if (this.containsKey(value)) { @@ -1036,45 +908,51 @@ public void addCitation(int sourceDocid, Integer targetDocid) { _add(citations, sourceDocid, targetDocid); } - private void _add(List target, int sourceDocid, int targetDocid) { + public void addCitation(int sourceDocId, int targetDocId) { + _add(citations, sourceDocId, targetDocId); + } + + private void _add(Int2ObjectMap target, int sourceDocid, int targetDocid) { // System.out.println("_add(" + sourceDocid + "," + targetDocid+")"); if (target.get(sourceDocid) == null) { - ArrayIntList pointer = new ArrayIntList(1); + IntArrayList pointer = new IntArrayList(1); pointer.add(targetDocid); - target.set(sourceDocid, pointer); + target.put(sourceDocid, pointer); } else { target.get(sourceDocid).add(targetDocid); } } public void inferCitationsFromReferences() { - int i = -1; - for (ArrayIntList refs : references) { - i += 1; + for (Int2ObjectMap.Entry entry : references.int2ObjectEntrySet()) { + int i = entry.getIntKey(); + IntArrayList refs = entry.getValue(); if (refs == null) { continue; } + for (int j = 0; j < refs.size(); j++) { - if (refs.get(j) == -1) + if (refs.getInt(j) == -1) continue; - addCitation(refs.get(j), i); + addCitation(refs.getInt(j), i); } } } public void inferReferencesFromCitations() { - int i = -1; - for (ArrayIntList refs : citations) { - i += 1; + for (Int2ObjectMap.Entry entry : citations.int2ObjectEntrySet()) { + int i = entry.getIntKey(); + IntArrayList refs = entry.getValue(); if (refs == null) { continue; } + for (int j = 0; j < refs.size(); j++) { - if (refs.get(j) == -1) + if (refs.getInt(j) == -1) continue; - addReference(refs.get(j), i); + addReference(refs.getInt(j), i); } } } @@ -1083,20 +961,20 @@ private class CitationDataIterator implements Iterator { int cursor = 0; // index of next element to return public boolean hasNext() { - return cursor != citations.size(); + return cursor < maxDocSize; } public int[][] next() { int i = cursor; - if (i >= citations.size()) + if (i >= maxDocSize) throw new NoSuchElementException(); int[][] out = new int[2][]; - ArrayIntList v1 = references.get(cursor); - ArrayIntList v2 = citations.get(cursor); + IntArrayList v1 = references.get(cursor); + IntArrayList v2 = citations.get(cursor); - out[0] = v1 != null ? v1.getElements() : new int[0]; - out[1] = v2 != null ? v2.getElements() : new int[0]; + out[0] = v1 != null ? v1.toIntArray() : new int[0]; + out[1] = v2 != null ? v2.toIntArray() : new int[0]; cursor = i + 1; return out; @@ -1111,7 +989,7 @@ public void remove() { @Override public void initializeCitationCache(int maxDocs) { - relationships = new RelationshipLinkedHashMap(maxDocs, 0.75f, true, 1024, 100f); + relationships = new RelationshipLinkedHashMap(maxDocs, 0.75f, true, 1024); ((RelationshipLinkedHashMap) relationships).initializeCitationCache(maxDocs); if (stats == null) stats = new CumulativeStats(); diff --git a/montysolr/src/test/java/org/apache/lucene/search/TestCitationsSearch.java b/montysolr/src/test/java/org/apache/lucene/search/TestCitationsSearch.java index a05926920..49e3d2348 100644 --- a/montysolr/src/test/java/org/apache/lucene/search/TestCitationsSearch.java +++ b/montysolr/src/test/java/org/apache/lucene/search/TestCitationsSearch.java @@ -150,8 +150,9 @@ public void testCitesCollector() throws Exception { int docid = es.getKey(); int[] docids = es.getValue(); for (int reference : docids) { - List a = Arrays.stream(citations.get(reference)).boxed().collect(Collectors.toList()); - List b = Arrays.stream(citationsWrapper.getLuceneDocIds(reference)).boxed().collect(Collectors.toList()); + List a = Arrays.stream(citations.get(reference)).boxed().sorted().collect(Collectors.toList()); + List b = Arrays.stream(citationsWrapper.getLuceneDocIds(reference)).boxed().sorted().collect(Collectors.toList()); + assertTrue(a.contains(docid)); assertTrue(b.contains(docid)); assertEquals(a, b); @@ -162,10 +163,9 @@ public void testCitesCollector() throws Exception { int docid = es.getKey(); int[] docids = es.getValue(); for (int reference : docids) { - List a = Arrays.stream(references.get(reference)).boxed().collect(Collectors.toList()); - List b = Arrays.stream(referencesWrapper.getLuceneDocIds(reference)).boxed().collect(Collectors.toList()); - Collections.sort(a); - Collections.sort(b); + List a = Arrays.stream(references.get(reference)).boxed().sorted().collect(Collectors.toList()); + List b = Arrays.stream(referencesWrapper.getLuceneDocIds(reference)).boxed().sorted().collect(Collectors.toList()); + assertTrue(a.contains(docid)); assertTrue(b.contains(docid)); assertEquals(docid + " produced diff cache results", a, b); diff --git a/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java b/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java index b8ccac5f1..fe921b854 100644 --- a/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java +++ b/montysolr/src/test/java/org/apache/solr/search/TestCitationCacheSolr.java @@ -16,22 +16,24 @@ */ package org.apache.solr.search; +import it.unimi.dsi.fastutil.ints.*; import monty.solr.util.MontySolrAbstractTestCase; -import monty.solr.util.MontySolrSetup; import monty.solr.util.SolrTestSetup; import org.apache.solr.request.SolrQueryRequest; import org.junit.BeforeClass; import org.junit.Test; -import java.io.IOException; import java.lang.ref.SoftReference; import java.lang.ref.WeakReference; import java.util.Arrays; import java.util.Iterator; +import java.util.Random; @SuppressWarnings({"rawtypes", "unchecked"}) public class TestCitationCacheSolr extends MontySolrAbstractTestCase { + private Random random; + @BeforeClass public static void beforeClass() throws Exception { schemaString = "solr/collection1/conf/schema-citations-transformer.xml"; @@ -114,6 +116,7 @@ public void createIndex() throws Exception { public void setUp() throws Exception { super.setUp(); createIndex(); + random = new Random(0L); } @Override @@ -615,17 +618,17 @@ public void test() throws Exception { if (cacheName.contains("from-references")) { int[][][] expected = new int[][][]{ - new int[][]{new int[]{3, 4, 2}, new int[0]}, + new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 10, 11}}, new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 2, 3, 4, 5, 6, 7, 9, 11}}, new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11}}, - new int[][]{new int[]{3, 4, 2}, new int[0]}, + new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[0], new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, - new int[][]{new int[]{4, 2, 2}, new int[0]}, + new int[][]{new int[]{2, 2, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, }; @@ -634,9 +637,9 @@ public void test() throws Exception { int[][][] expected = new int[][][]{ new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, - new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 9, 3, 4, 5, 6, 7, 10, 11, 2}}, - new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 9, 3, 4, 5, 6, 7, 11, 2}}, - new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 9, 3, 4, 5, 6, 7, 10, 11, 2}}, + new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11}}, + new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 2, 3, 4, 5, 6, 7, 9, 11}}, + new int[][]{new int[]{2, 3, 4}, new int[]{0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11}}, new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, new int[][]{new int[]{2, 3, 4}, new int[0]}, @@ -656,6 +659,91 @@ public void test() throws Exception { } + @Test + public void testRelationshipMap() throws Exception { + CitationLRUCache.RelationshipLinkedHashMap map = + new CitationLRUCache.RelationshipLinkedHashMap<>(11, 0.75f, false, 1000); + map.initializeCitationCache(10); + + for (int i = 0; i < 10; i++) { + map.put("a"+i, i); + } + + assertEquals(10, map.size()); + + // Test citation map construction & recall + Int2ObjectMap citationMap = new Int2ObjectOpenHashMap<>(); + + for (int i = 0; i < 10; i++) { + int citations = random.nextInt(9); + IntSet set = new IntOpenHashSet(); + + for (int j = 0; j < citations; j++) { + int citedDoc = random.nextInt(10); + while (set.contains(citedDoc) || citedDoc == i) { + citedDoc = random.nextInt(10); + } + set.add(citedDoc); + + map.addCitation(i, citedDoc); + } + + citationMap.put(i, set); + } + + for (int i = 0; i < 10; i++) { + int[] citations = map.getCitations(i); + if (citations == null) + citations = new int[0]; + + int[] expected = citationMap.get(i).toIntArray(); + + Arrays.sort(citations != null ? citations : new int[0]); + Arrays.sort(expected != null ? expected : new int[0]); + + assertArrayEquals(expected, citations); + } + + // Test reference inference from citations + Int2ObjectMap referenceMap = new Int2ObjectOpenHashMap<>(); + + for (int i = 0; i < 10; i++) { + for (int j : citationMap.getOrDefault(i, new IntOpenHashSet())) { + int finalI = i; + referenceMap.compute(j, (k, v) -> { + if (v == null) { + v = new IntOpenHashSet(); + } + v.add(finalI); + return v; + }); + } + } + + map.inferReferencesFromCitations(); + + for (int i = 0; i < 10; i++) { + int[] references = map.getReferences(i); + if (references == null) + references = new int[0]; + + int[] expected = referenceMap.get(i).toIntArray(); + + Arrays.sort(references); + Arrays.sort(expected != null ? expected : new int[0]); + + assertArrayEquals(expected, references); + } + + // Test iterator + Iterator it = map.getRelationshipsIterator(); + for (int i = 0; i < 10; i++) { + int[][] data = it.next(); + assertEquals(referenceMap.getOrDefault(i, new IntArraySet()).size(), data[0].length); + assertEquals(citationMap.getOrDefault(i, new IntArraySet()).size(), data[1].length); + } + } + private int[][][] getCache(CitationLRUCache cache) { int[][][] results = new int[cache.getCitationsIteratorSize()][2][]; Iterator it = cache.getCitationGraph(); @@ -663,6 +751,9 @@ private int[][][] getCache(CitationLRUCache cache) { int j = 0; while (it.hasNext()) { int[][] data = it.next(); + Arrays.sort(data[0]); + Arrays.sort(data[1]); + results[j] = data; j += 1; }