diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbRecordIterator.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbRecordIterator.java index 68c5352a73..6f9005e73f 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbRecordIterator.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbRecordIterator.java @@ -10,17 +10,19 @@ *******************************************************************************/ package org.eclipse.rdf4j.sail.lmdb; -import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.E; -import static org.lwjgl.util.lmdb.LMDB.MDB_NEXT; +import static org.lwjgl.util.lmdb.LMDB.MDB_FIRST; +import static org.lwjgl.util.lmdb.LMDB.MDB_FIRST_DUP; +import static org.lwjgl.util.lmdb.LMDB.MDB_GET_BOTH_RANGE; +import static org.lwjgl.util.lmdb.LMDB.MDB_NEXT_DUP; +import static org.lwjgl.util.lmdb.LMDB.MDB_NEXT_NODUP; import static org.lwjgl.util.lmdb.LMDB.MDB_NOTFOUND; -import static org.lwjgl.util.lmdb.LMDB.MDB_SET; import static org.lwjgl.util.lmdb.LMDB.MDB_SET_RANGE; import static org.lwjgl.util.lmdb.LMDB.MDB_SUCCESS; import static org.lwjgl.util.lmdb.LMDB.mdb_cmp; import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_close; import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_get; -import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_open; import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_renew; +import static org.lwjgl.util.lmdb.LMDB.mdb_dcmp; import java.io.IOException; import java.nio.ByteBuffer; @@ -29,9 +31,8 @@ import org.eclipse.rdf4j.sail.SailException; import org.eclipse.rdf4j.sail.lmdb.TripleStore.TripleIndex; import org.eclipse.rdf4j.sail.lmdb.TxnManager.Txn; -import org.eclipse.rdf4j.sail.lmdb.util.GroupMatcher; -import org.lwjgl.PointerBuffer; -import org.lwjgl.system.MemoryStack; +import org.eclipse.rdf4j.sail.lmdb.util.EntryMatcher; +import org.lwjgl.system.MemoryUtil; import org.lwjgl.util.lmdb.MDBVal; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,102 +41,119 @@ * A record iterator that wraps a native LMDB iterator. */ class LmdbRecordIterator implements RecordIterator { + private static final Logger log = LoggerFactory.getLogger(LmdbRecordIterator.class); - private final Pool pool; - private final TripleIndex index; + static class State { - private final long subj; - private final long pred; - private final long obj; - private final long context; + private TripleIndex index; - private final long cursor; + private long cursor; - private final MDBVal maxKey; + private final MDBVal maxKey = MDBVal.malloc(); + private final MDBVal maxValue = MDBVal.malloc(); - private final boolean matchValues; - private GroupMatcher groupMatcher; + private boolean matchValues; + private EntryMatcher matcher; - private final Txn txnRef; + private Txn txnRef; - private long txnRefVersion; + private long txnRefVersion; - private final long txn; + private long txn; - private final int dbi; + private int dbi; - private volatile boolean closed = false; + private final MDBVal keyData = MDBVal.malloc(); - private final MDBVal keyData; + private final MDBVal valueData = MDBVal.malloc(); - private final MDBVal valueData; + private ByteBuffer valueBuf; - private ByteBuffer minKeyBuf; + private final ByteBuffer minKeyBuf = MemoryUtil.memAlloc((Long.BYTES + 1) * 4); - private ByteBuffer maxKeyBuf; + private final ByteBuffer minValueBuf = MemoryUtil.memAlloc((Long.BYTES + 1) * 4); - private int lastResult; + private final ByteBuffer maxKeyBuf = MemoryUtil.memAlloc((Long.BYTES + 1) * 4); - private final long[] quad; - private final long[] originalQuad; + private final ByteBuffer maxValueBuf = MemoryUtil.memAlloc((Long.BYTES + 1) * 4); - private boolean fetchNext = false; + private long[] quad; + private long[] patternQuad; + + private StampedLongAdderLockManager txnLockManager; + + private int indexScore; - private final StampedLongAdderLockManager txnLockManager; + void close() { + if (cursor != 0) { + mdb_cursor_close(cursor); + cursor = 0; + } + keyData.close(); + valueData.close(); + MemoryUtil.memFree(minKeyBuf); + MemoryUtil.memFree(minValueBuf); + MemoryUtil.memFree(maxKeyBuf); + maxKey.close(); + MemoryUtil.memFree(maxValueBuf); + maxValue.close(); + } + } private final Thread ownerThread = Thread.currentThread(); + private final State state; + private final boolean keyELementsFixed; + private volatile boolean closed = false; + private boolean fetchNext = false; - LmdbRecordIterator(TripleIndex index, boolean rangeSearch, long subj, long pred, long obj, + LmdbRecordIterator(TripleIndex index, int indexScore, long subj, long pred, long obj, long context, boolean explicit, Txn txnRef) throws IOException { - this.subj = subj; - this.pred = pred; - this.obj = obj; - this.context = context; - this.originalQuad = new long[] { subj, pred, obj, context }; - this.quad = new long[] { subj, pred, obj, context }; - this.pool = Pool.get(); - this.keyData = pool.getVal(); - this.valueData = pool.getVal(); - this.index = index; - if (rangeSearch) { - minKeyBuf = pool.getKeyBuffer(); - index.getMinKey(minKeyBuf, subj, pred, obj, context); - minKeyBuf.flip(); - - this.maxKey = pool.getVal(); - this.maxKeyBuf = pool.getKeyBuffer(); - index.getMaxKey(maxKeyBuf, subj, pred, obj, context); - maxKeyBuf.flip(); - this.maxKey.mv_data(maxKeyBuf); - } else { - minKeyBuf = null; - this.maxKey = null; + this.state = Pool.get().getState(); + this.state.patternQuad = new long[] { subj, pred, obj, context }; + this.state.quad = new long[] { subj, pred, obj, context }; + this.state.index = index; + this.state.indexScore = indexScore; + this.keyELementsFixed = indexScore >= index.getIndexSplitPosition(); + + // prepare min and max keys if index can be used + // otherwise, leave as null to indicate full scan + if (indexScore > 0) { + state.minKeyBuf.clear(); + state.minValueBuf.clear(); + index.getMinEntry(state.minKeyBuf, state.minValueBuf, subj, pred, obj, context); + state.minKeyBuf.flip(); + state.minValueBuf.flip(); + + state.maxKeyBuf.clear(); + state.maxValueBuf.clear(); + index.getMaxEntry(state.maxKeyBuf, state.maxValueBuf, subj, pred, obj, context); + state.maxKeyBuf.flip(); + state.maxValueBuf.flip(); + state.maxKey.mv_data(state.maxKeyBuf); + state.maxValue.mv_data(state.maxValueBuf); } - this.matchValues = subj > 0 || pred > 0 || obj > 0 || context >= 0; + state.matchValues = subj > 0 || pred > 0 || obj > 0 || context >= 0; + state.matcher = null; - this.dbi = index.getDB(explicit); - this.txnRef = txnRef; - this.txnLockManager = txnRef.lockManager(); + var dbi = index.getDB(explicit); long readStamp; try { - readStamp = txnLockManager.readLock(); + readStamp = txnRef.lockManager().readLock(); } catch (InterruptedException e) { throw new SailException(e); } try { - this.txnRefVersion = txnRef.version(); - this.txn = txnRef.get(); - - try (MemoryStack stack = MemoryStack.stackPush()) { - PointerBuffer pp = stack.mallocPointer(1); - E(mdb_cursor_open(txn, dbi, pp)); - cursor = pp.get(0); - } + state.dbi = dbi; + state.txnRef = txnRef; + state.txnLockManager = txnRef.lockManager(); + state.txnRefVersion = txnRef.version(); + state.txn = txnRef.get(); + state.cursor = txnRef.getCursor(dbi); } finally { - txnLockManager.unlockRead(readStamp); + txnRef.lockManager().unlockRead(readStamp); } } @@ -143,33 +161,38 @@ class LmdbRecordIterator implements RecordIterator { public long[] next() { long readStamp; try { - readStamp = txnLockManager.readLock(); + readStamp = state.txnLockManager.readLock(); } catch (InterruptedException e) { throw new SailException(e); } + int lastResult; try { if (closed) { log.debug("Calling next() on an LmdbRecordIterator that is already closed, returning null"); return null; } - if (txnRefVersion != txnRef.version()) { + if (state.txnRefVersion != state.txnRef.version()) { // TODO: None of the tests in the LMDB Store cover this case! // cursor must be renewed - mdb_cursor_renew(txn, cursor); + mdb_cursor_renew(state.txn, state.cursor); if (fetchNext) { // cursor must be positioned on last item, reuse minKeyBuf if available - if (minKeyBuf == null) { - minKeyBuf = pool.getKeyBuffer(); - } - minKeyBuf.clear(); - index.toKey(minKeyBuf, quad[0], quad[1], quad[2], quad[3]); - minKeyBuf.flip(); - keyData.mv_data(minKeyBuf); - lastResult = mdb_cursor_get(cursor, keyData, valueData, MDB_SET); - if (lastResult != MDB_SUCCESS) { - // use MDB_SET_RANGE if key was deleted - lastResult = mdb_cursor_get(cursor, keyData, valueData, MDB_SET_RANGE); + state.minKeyBuf.clear(); + state.minValueBuf.clear(); + state.index.toEntry(state.minKeyBuf, state.minValueBuf, state.quad[0], state.quad[1], state.quad[2], + state.quad[3]); + state.minKeyBuf.flip(); + state.minValueBuf.flip(); + state.keyData.mv_data(state.minKeyBuf); + // use set range if entry was deleted + lastResult = mdb_cursor_get(state.cursor, state.keyData, state.valueData, MDB_SET_RANGE); + if (lastResult == MDB_SUCCESS) { + state.valueData.mv_data(state.minValueBuf); + lastResult = mdb_cursor_get(state.cursor, state.keyData, state.valueData, MDB_GET_BOTH_RANGE); + if (lastResult != MDB_SUCCESS) { + lastResult = mdb_cursor_get(state.cursor, state.keyData, state.valueData, MDB_FIRST_DUP); + } } if (lastResult != MDB_SUCCESS) { closeInternal(false); @@ -177,52 +200,112 @@ public long[] next() { } } // update version of txn ref - this.txnRefVersion = txnRef.version(); + state.txnRefVersion = state.txnRef.version(); } + boolean isDupValue = false; if (fetchNext) { - lastResult = mdb_cursor_get(cursor, keyData, valueData, MDB_NEXT); + if (!state.valueBuf.hasRemaining()) { + lastResult = mdb_cursor_get(state.cursor, state.keyData, state.valueData, MDB_NEXT_DUP); + if (lastResult != MDB_SUCCESS) { + // no more duplicates, move to next key + lastResult = mdb_cursor_get(state.cursor, state.keyData, state.valueData, MDB_NEXT_NODUP); + } else { + isDupValue = true; + } + if (lastResult == MDB_SUCCESS) { + state.valueBuf = state.valueData.mv_data(); + } + } else { + lastResult = MDB_SUCCESS; + } fetchNext = false; } else { - if (minKeyBuf != null) { + if (state.indexScore > 0) { // set cursor to min key - keyData.mv_data(minKeyBuf); - lastResult = mdb_cursor_get(cursor, keyData, valueData, MDB_SET_RANGE); + state.keyData.mv_data(state.minKeyBuf); + // set range on key is only required if less than the first two key elements are fixed + lastResult = keyELementsFixed ? MDB_SUCCESS + : mdb_cursor_get(state.cursor, state.keyData, state.valueData, MDB_SET_RANGE); + if (lastResult == MDB_SUCCESS) { + state.valueData.mv_data(state.minValueBuf); + lastResult = mdb_cursor_get(state.cursor, state.keyData, state.valueData, MDB_GET_BOTH_RANGE); + if (lastResult != MDB_SUCCESS) { + lastResult = mdb_cursor_get(state.cursor, state.keyData, state.valueData, MDB_FIRST_DUP); + } else { + isDupValue = keyELementsFixed; + } + } } else { // set cursor to first item - lastResult = mdb_cursor_get(cursor, keyData, valueData, MDB_NEXT); + lastResult = mdb_cursor_get(state.cursor, state.keyData, state.valueData, MDB_FIRST); + } + if (lastResult == MDB_SUCCESS) { + state.valueBuf = state.valueData.mv_data(); } } while (lastResult == MDB_SUCCESS) { - // if (maxKey != null && TripleStore.COMPARATOR.compare(keyData.mv_data(), maxKey.mv_data()) > 0) { - if (maxKey != null && mdb_cmp(txn, dbi, keyData, maxKey) > 0) { - lastResult = MDB_NOTFOUND; - } else if (matches()) { - // value doesn't match search key/mask, fetch next value - lastResult = mdb_cursor_get(cursor, keyData, valueData, MDB_NEXT); - } else { - // Matching value found - index.keyToQuad(keyData.mv_data(), originalQuad, quad); - // fetch next value - fetchNext = true; - return quad; + if (state.indexScore > 0) { + int keyDiff = isDupValue ? 0 : mdb_cmp(state.txn, state.dbi, state.keyData, state.maxKey); + if (keyDiff > 0) { + break; + } + int valueDiff = TripleStore.compareRegion(state.valueBuf, state.valueBuf.position(), + state.maxValueBuf, 0, + Math.min(state.valueBuf.remaining(), state.maxValueBuf.remaining())); + if (valueDiff > 0) { + break; + } + } + + // value doesn't match search key/mask, fetch next value + if (!state.valueBuf.hasRemaining()) { + lastResult = mdb_cursor_get(state.cursor, state.keyData, state.valueData, MDB_NEXT_DUP); + if (lastResult != MDB_SUCCESS) { + // no more duplicates, move to next key + lastResult = mdb_cursor_get(state.cursor, state.keyData, state.valueData, MDB_NEXT_NODUP); + isDupValue = false; + } + if (lastResult == MDB_SUCCESS) { + state.valueBuf = state.valueData.mv_data(); + } else { + break; + } + } + + int valueBufPos = state.valueBuf.position(); + if (notMatches(isDupValue, isDupValue ? null : state.keyData.mv_data(), state.valueBuf)) { + state.valueBuf.position(valueBufPos); + int skip = 4 - state.index.getIndexSplitPosition(); + for (int i = 0; i < skip; i++) { + TripleStore.skipVarint(state.valueBuf); + } + continue; } + state.valueBuf.position(valueBufPos); + + // Matching value found + state.index.entryToQuad(state.keyData.mv_data(), state.valueBuf, state.patternQuad, state.quad); + // fetch next value + fetchNext = true; + return state.quad; } closeInternal(false); return null; } finally { - txnLockManager.unlockRead(readStamp); + state.txnLockManager.unlockRead(readStamp); } } - private boolean matches() { - - if (groupMatcher != null) { - return !this.groupMatcher.matches(keyData.mv_data()); - } else if (matchValues) { - this.groupMatcher = index.createMatcher(subj, pred, obj, context); - return !this.groupMatcher.matches(keyData.mv_data()); + private boolean notMatches(boolean testValueOnly, ByteBuffer keyBuf, ByteBuffer valueBuf) { + if (state.matcher != null) { + return testValueOnly ? !state.matcher.matchesValue(valueBuf) : !state.matcher.matches(keyBuf, valueBuf); + } else if (state.matchValues) { + // lazy init of group matcher + state.matcher = state.index.createMatcher(state.patternQuad[0], state.patternQuad[1], state.patternQuad[2], + state.patternQuad[3]); + return testValueOnly ? !state.matcher.matchesValue(valueBuf) : !state.matcher.matches(keyBuf, valueBuf); } else { return false; } @@ -234,7 +317,7 @@ private void closeInternal(boolean maybeCalledAsync) { boolean writeLocked = false; if (maybeCalledAsync && ownerThread != Thread.currentThread()) { try { - writeStamp = txnLockManager.writeLock(); + writeStamp = state.txnLockManager.writeLock(); writeLocked = true; } catch (InterruptedException e) { throw new SailException(e); @@ -242,21 +325,14 @@ private void closeInternal(boolean maybeCalledAsync) { } try { if (!closed) { - mdb_cursor_close(cursor); - pool.free(keyData); - pool.free(valueData); - if (minKeyBuf != null) { - pool.free(minKeyBuf); - } - if (maxKey != null) { - pool.free(maxKeyBuf); - pool.free(maxKey); - } + state.txnRef.returnCursor(state.dbi, state.cursor); + state.cursor = 0; + Pool.get().free(state); } } finally { closed = true; if (writeLocked) { - txnLockManager.unlockWrite(writeStamp); + state.txnLockManager.unlockWrite(writeStamp); } } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Pool.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Pool.java index ebb9952582..8493c7b6b7 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Pool.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Pool.java @@ -22,12 +22,14 @@ class Pool { // thread-local pool instance private static final ThreadLocal threadlocal = ThreadLocal.withInitial(Pool::new); - private final MDBVal[] valPool = new MDBVal[1024]; - private final ByteBuffer[] keyPool = new ByteBuffer[1024]; + private final MDBVal[] valPool = new MDBVal[2048]; + private final ByteBuffer[] keyPool = new ByteBuffer[2048]; private final Statistics[] statisticsPool = new Statistics[512]; + private final LmdbRecordIterator.State[] statePool = new LmdbRecordIterator.State[128]; private int valPoolIndex = -1; private int keyPoolIndex = -1; private int statisticsPoolIndex = -1; + private int statePoolIndex = -1; final MDBVal getVal() { if (valPoolIndex >= 0) { @@ -36,6 +38,13 @@ final MDBVal getVal() { return MDBVal.malloc(); } + final LmdbRecordIterator.State getState() { + if (statePoolIndex >= 0) { + return statePool[statePoolIndex--]; + } + return new LmdbRecordIterator.State(); + } + final ByteBuffer getKeyBuffer() { if (keyPoolIndex >= 0) { ByteBuffer bb = keyPool[keyPoolIndex--]; @@ -74,7 +83,18 @@ final void free(Statistics statistics) { } } + final void free(LmdbRecordIterator.State state) { + if (statePoolIndex < statePool.length - 1) { + statePool[++statePoolIndex] = state; + } else { + state.close(); + } + } + final void close() { + while (statePoolIndex >= 0) { + statePool[statePoolIndex--].close(); + } while (valPoolIndex >= 0) { valPool[valPoolIndex--].close(); } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java index 059bb51e66..d71153859e 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java @@ -14,28 +14,35 @@ import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.openDatabase; import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.readTransaction; import static org.eclipse.rdf4j.sail.lmdb.LmdbUtil.transaction; -import static org.eclipse.rdf4j.sail.lmdb.Varint.readQuadUnsigned; +import static org.eclipse.rdf4j.sail.lmdb.Varint.firstToLength; import static org.lwjgl.system.MemoryStack.stackPush; import static org.lwjgl.system.MemoryUtil.NULL; +import static org.lwjgl.util.lmdb.LMDB.MDB_APPENDDUP; import static org.lwjgl.util.lmdb.LMDB.MDB_CREATE; +import static org.lwjgl.util.lmdb.LMDB.MDB_DUPSORT; import static org.lwjgl.util.lmdb.LMDB.MDB_FIRST; +import static org.lwjgl.util.lmdb.LMDB.MDB_GET_BOTH_RANGE; import static org.lwjgl.util.lmdb.LMDB.MDB_KEYEXIST; import static org.lwjgl.util.lmdb.LMDB.MDB_LAST; +import static org.lwjgl.util.lmdb.LMDB.MDB_LAST_DUP; import static org.lwjgl.util.lmdb.LMDB.MDB_NEXT; import static org.lwjgl.util.lmdb.LMDB.MDB_NOMETASYNC; -import static org.lwjgl.util.lmdb.LMDB.MDB_NOOVERWRITE; import static org.lwjgl.util.lmdb.LMDB.MDB_NOSYNC; import static org.lwjgl.util.lmdb.LMDB.MDB_NOTFOUND; import static org.lwjgl.util.lmdb.LMDB.MDB_NOTLS; import static org.lwjgl.util.lmdb.LMDB.MDB_PREV; +import static org.lwjgl.util.lmdb.LMDB.MDB_SET; import static org.lwjgl.util.lmdb.LMDB.MDB_SET_RANGE; import static org.lwjgl.util.lmdb.LMDB.MDB_SUCCESS; import static org.lwjgl.util.lmdb.LMDB.mdb_cmp; import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_close; +import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_del; import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_get; import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_open; +import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_put; import static org.lwjgl.util.lmdb.LMDB.mdb_dbi_close; import static org.lwjgl.util.lmdb.LMDB.mdb_dbi_open; +import static org.lwjgl.util.lmdb.LMDB.mdb_dcmp; import static org.lwjgl.util.lmdb.LMDB.mdb_del; import static org.lwjgl.util.lmdb.LMDB.mdb_drop; import static org.lwjgl.util.lmdb.LMDB.mdb_env_close; @@ -65,7 +72,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -85,8 +91,8 @@ import org.eclipse.rdf4j.sail.lmdb.TxnRecordCache.Record; import org.eclipse.rdf4j.sail.lmdb.TxnRecordCache.RecordCacheIterator; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; -import org.eclipse.rdf4j.sail.lmdb.util.GroupMatcher; -import org.eclipse.rdf4j.sail.lmdb.util.IndexKeyWriters; +import org.eclipse.rdf4j.sail.lmdb.util.EntryMatcher; +import org.eclipse.rdf4j.sail.lmdb.util.IndexEntryWriters; import org.lwjgl.PointerBuffer; import org.lwjgl.system.MemoryStack; import org.lwjgl.util.lmdb.MDBEnvInfo; @@ -160,6 +166,7 @@ class TripleStore implements Closeable { * The list of triple indexes that are used to store and retrieve triples. */ private final List indexes = new ArrayList<>(); + private final int splitPosition = 1; private final ValueStore valueStore; private long env; @@ -173,27 +180,6 @@ class TripleStore implements Closeable { private TxnRecordCache recordCache = null; - static final Comparator COMPARATOR = new Comparator() { - @Override - public int compare(ByteBuffer b1, ByteBuffer b2) { - int b1Len = b1.remaining(); - int b2Len = b2.remaining(); - int diff = compareRegion(b1, b1.position(), b2, b2.position(), Math.min(b1Len, b2Len)); - if (diff != 0) { - return diff; - } - return b1Len > b2Len ? 1 : -1; - } - - public int compareRegion(ByteBuffer array1, int startIdx1, ByteBuffer array2, int startIdx2, int length) { - int result = 0; - for (int i = 0; result == 0 && i < length; i++) { - result = (array1.get(startIdx1 + i) & 0xff) - (array2.get(startIdx2 + i) & 0xff); - } - return result; - } - }; - TripleStore(File dir, LmdbStoreConfig config, ValueStore valueStore) throws IOException, SailException { this.dir = dir; this.forceSync = config.getForceSync(); @@ -344,7 +330,7 @@ private Set parseIndexSpecList(String indexSpecStr) throws SailException private void initIndexes(Set indexSpecs, long tripleDbSize) throws IOException { for (String fieldSeq : indexSpecs) { logger.trace("Initializing index '{}'...", fieldSeq); - indexes.add(new TripleIndex(fieldSeq)); + indexes.add(new TripleIndex(fieldSeq, splitPosition)); } // initialize page size and set map size for env @@ -392,22 +378,26 @@ private void reindex(Set currentIndexSpecs, Set newIndexSpecs) t ByteBuffer keyBuf = stack.malloc(MAX_KEY_LENGTH); keyValue.mv_data(keyBuf); MDBVal dataValue = MDBVal.callocStack(stack); + ByteBuffer dataBuf = stack.malloc(MAX_KEY_LENGTH); + dataValue.mv_data(dataBuf); for (String fieldSeq : addedIndexSpecs) { logger.debug("Initializing new index '{}'...", fieldSeq); - TripleIndex addedIndex = new TripleIndex(fieldSeq); + TripleIndex addedIndex = new TripleIndex(fieldSeq, splitPosition); RecordIterator[] sourceIter = { null }; try { - sourceIter[0] = new LmdbRecordIterator(sourceIndex, false, -1, -1, -1, -1, + sourceIter[0] = new LmdbRecordIterator(sourceIndex, 0, -1, -1, -1, -1, explicit, txnManager.createTxn(txn)); RecordIterator it = sourceIter[0]; long[] quad; while ((quad = it.next()) != null) { keyBuf.clear(); - addedIndex.toKey(keyBuf, quad[SUBJ_IDX], quad[PRED_IDX], quad[OBJ_IDX], + dataBuf.clear(); + addedIndex.toEntry(keyBuf, dataBuf, quad[SUBJ_IDX], quad[PRED_IDX], quad[OBJ_IDX], quad[CONTEXT_IDX]); keyBuf.flip(); + dataBuf.flip(); E(mdb_put(txn, addedIndex.getDB(explicit), keyValue, dataValue, 0)); } @@ -502,7 +492,7 @@ public RecordIterator getAllTriplesSortedByContext(Txn txn) throws IOException { for (TripleIndex index : indexes) { if (index.getFieldSeq()[0] == 'c') { // found a context-first index - return getTriplesUsingIndex(txn, -1, -1, -1, -1, true, index, false); + return getTriplesUsingIndex(txn, -1, -1, -1, -1, true, index, 0); } } return null; @@ -512,8 +502,8 @@ public RecordIterator getTriples(Txn txn, long subj, long pred, long obj, long c throws IOException { TripleIndex index = getBestIndex(subj, pred, obj, context); // System.out.println("get triples: " + Arrays.asList(subj, pred, obj,context)); - boolean doRangeSearch = index.getPatternScore(subj, pred, obj, context) > 0; - return getTriplesUsingIndex(txn, subj, pred, obj, context, explicit, index, doRangeSearch); + int indexScore = index.getPatternScore(subj, pred, obj, context); + return getTriplesUsingIndex(txn, subj, pred, obj, context, explicit, index, indexScore); } boolean hasTriples(boolean explicit) throws IOException { @@ -526,8 +516,8 @@ boolean hasTriples(boolean explicit) throws IOException { } private RecordIterator getTriplesUsingIndex(Txn txn, long subj, long pred, long obj, long context, - boolean explicit, TripleIndex index, boolean rangeSearch) throws IOException { - return new LmdbRecordIterator(index, rangeSearch, subj, pred, obj, context, explicit, txn); + boolean explicit, TripleIndex index, int indexScore) throws IOException { + return new LmdbRecordIterator(index, indexScore, subj, pred, obj, context, explicit, txn); } /** @@ -562,10 +552,13 @@ protected void filterUsedIds(Collection ids) throws IOException { readTransaction(env, (stack, txn) -> { MDBVal maxKey = MDBVal.malloc(stack); ByteBuffer maxKeyBuf = stack.malloc(TripleStore.MAX_KEY_LENGTH); + MDBVal maxValue = MDBVal.malloc(stack); + ByteBuffer maxValueBuf = stack.malloc(TripleStore.MAX_KEY_LENGTH); MDBVal keyData = MDBVal.malloc(stack); ByteBuffer keyBuf = stack.malloc(TripleStore.MAX_KEY_LENGTH); MDBVal valueData = MDBVal.mallocStack(stack); + ByteBuffer valueBuf = stack.malloc(TripleStore.MAX_KEY_LENGTH); PointerBuffer pp = stack.mallocPointer(1); @@ -607,7 +600,7 @@ protected void filterUsedIds(Collection ids) throws IOException { long[] quad = new long[4]; int rc = mdb_cursor_get(cursor, keyData, valueData, MDB_FIRST); while (rc == MDB_SUCCESS && !ids.isEmpty()) { - index.keyToQuad(keyData.mv_data(), quad); + index.entryToQuad(keyData.mv_data(), valueData.mv_data(), quad); ids.remove(quad[0]); ids.remove(quad[1]); ids.remove(quad[2]); @@ -630,26 +623,33 @@ protected void filterUsedIds(Collection ids) throws IOException { long subj = c == 0 ? id : -1, pred = c == 1 ? id : -1, obj = c == 2 ? id : -1, context = c == 3 ? id : -1; - GroupMatcher matcher = index.createMatcher(subj, pred, obj, context); + EntryMatcher matcher = index.createMatcher(subj, pred, obj, context); maxKeyBuf.clear(); - index.getMaxKey(maxKeyBuf, subj, pred, obj, context); + maxValueBuf.clear(); + index.getMaxEntry(maxKeyBuf, maxValueBuf, subj, pred, obj, context); maxKeyBuf.flip(); maxKey.mv_data(maxKeyBuf); + maxKeyBuf.flip(); + maxValue.mv_data(maxValueBuf); keyBuf.clear(); - index.getMinKey(keyBuf, subj, pred, obj, context); + valueBuf.clear(); + index.getMinEntry(keyBuf, valueBuf, subj, pred, obj, context); keyBuf.flip(); + valueBuf.flip(); // set cursor to min key keyData.mv_data(keyBuf); + valueData.mv_data(valueBuf); int rc = mdb_cursor_get(cursor, keyData, valueData, MDB_SET_RANGE); boolean exists = false; while (!exists && rc == MDB_SUCCESS) { - if (mdb_cmp(txn, dbi, keyData, maxKey) > 0) { + int keyDiff = mdb_cmp(txn, dbi, keyData, maxKey); + if (keyDiff > 0 || (keyDiff == 0 && mdb_dcmp(txn, dbi, valueData, maxValue) > 0)) { // id was not found break; - } else if (!matcher.matches(keyData.mv_data())) { + } else if (!matcher.matches(keyData.mv_data(), valueData.mv_data())) { // value doesn't match search key/mask, fetch next value rc = mdb_cursor_get(cursor, keyData, valueData, MDB_NEXT); } else { @@ -697,15 +697,20 @@ protected double cardinality(long subj, long pred, long obj, long context) throw try { MDBVal maxKey = MDBVal.malloc(stack); ByteBuffer maxKeyBuf = stack.malloc(TripleStore.MAX_KEY_LENGTH); - index.getMaxKey(maxKeyBuf, subj, pred, obj, context); + MDBVal maxValue = MDBVal.malloc(stack); + ByteBuffer maxValueBuf = stack.malloc(TripleStore.MAX_KEY_LENGTH); + index.getMaxEntry(maxKeyBuf, maxValueBuf, subj, pred, obj, context); maxKeyBuf.flip(); maxKey.mv_data(maxKeyBuf); + maxValueBuf.flip(); + maxValue.mv_data(maxValueBuf); PointerBuffer pp = stack.mallocPointer(1); MDBVal keyData = MDBVal.mallocStack(stack); ByteBuffer keyBuf = stack.malloc(TripleStore.MAX_KEY_LENGTH); MDBVal valueData = MDBVal.mallocStack(stack); + ByteBuffer valueBuf = stack.malloc(TripleStore.MAX_KEY_LENGTH); double cardinality = 0; for (boolean explicit : new boolean[] { true, false }) { @@ -713,14 +718,15 @@ protected double cardinality(long subj, long pred, long obj, long context) throw Arrays.fill(s.avgRowsPerValueCounts, 0); keyBuf.clear(); - index.getMinKey(keyBuf, subj, pred, obj, context); + valueBuf.clear(); + index.getMinEntry(keyBuf, valueBuf, subj, pred, obj, context); keyBuf.flip(); + valueBuf.flip(); int dbi = index.getDB(explicit); - int pos = 0; + int pos; long cursor = 0; - try { E(mdb_cursor_open(txn, dbi, pp)); cursor = pp.get(0); @@ -728,10 +734,21 @@ protected double cardinality(long subj, long pred, long obj, long context) throw // set cursor to min key keyData.mv_data(keyBuf); int rc = mdb_cursor_get(cursor, keyData, valueData, MDB_SET_RANGE); - if (rc != MDB_SUCCESS || mdb_cmp(txn, dbi, keyData, maxKey) >= 0) { + if (rc == MDB_SUCCESS) { + valueData.mv_data(valueBuf); + rc = mdb_cursor_get(cursor, keyData, valueData, MDB_GET_BOTH_RANGE); + if (rc != MDB_SUCCESS) { + rc = mdb_cursor_get(cursor, keyData, valueData, MDB_LAST_DUP); + } + } + int keyDiff; + if (rc != MDB_SUCCESS || + (keyDiff = mdb_cmp(txn, dbi, keyData, maxKey)) >= 0 && + (keyDiff > 0 || mdb_dcmp(txn, dbi, valueData, maxValue) >= 0)) { break; } else { - Varint.readListUnsigned(keyData.mv_data(), s.minValues); + IndexEntryWriters.read(keyData.mv_data(), valueData.mv_data(), index.indexSplitPosition, + s.minValues); } // set cursor to max key @@ -741,13 +758,22 @@ protected double cardinality(long subj, long pred, long obj, long context) throw // directly go to last value rc = mdb_cursor_get(cursor, keyData, valueData, MDB_LAST); } else { - // go to previous value of selected key - rc = mdb_cursor_get(cursor, keyData, valueData, MDB_PREV); + valueData.mv_data(maxValueBuf); + rc = mdb_cursor_get(cursor, keyData, valueData, MDB_GET_BOTH_RANGE); + if (rc != MDB_SUCCESS) { + rc = mdb_cursor_get(cursor, keyData, valueData, MDB_LAST_DUP); + } else { + // TODO check if this is correct + // if (rc == MDB_SUCCESS) { + // go to previous value of selected key + rc = mdb_cursor_get(cursor, keyData, valueData, MDB_PREV); + } } if (rc == MDB_SUCCESS) { - Varint.readListUnsigned(keyData.mv_data(), s.maxValues); + IndexEntryWriters.read(keyData.mv_data(), valueData.mv_data(), index.indexSplitPosition, + s.maxValues); // this is required to correctly estimate the range size at a later point - s.startValues[s.MAX_BUCKETS] = s.maxValues; + s.startValues[Statistics.MAX_BUCKETS] = s.maxValues; } else { break; } @@ -755,20 +781,32 @@ protected double cardinality(long subj, long pred, long obj, long context) throw long allSamplesCount = 0; int bucket = 0; boolean endOfRange = false; - for (; bucket < s.MAX_BUCKETS && !endOfRange; bucket++) { + for (; bucket < Statistics.MAX_BUCKETS && !endOfRange; bucket++) { if (bucket != 0) { - bucketStart((double) bucket / s.MAX_BUCKETS, s.minValues, s.maxValues, s.values); + bucketStart((double) bucket / Statistics.MAX_BUCKETS, s.minValues, s.maxValues, + s.values); keyBuf.clear(); - Varint.writeListUnsigned(keyBuf, s.values); + valueBuf.clear(); + IndexEntryWriters.write(keyBuf, valueBuf, index.indexSplitPosition, + s.values[0], s.values[1], s.values[2], s.values[3]); keyBuf.flip(); + valueBuf.flip(); } // this is the min key for the first iteration keyData.mv_data(keyBuf); int currentSamplesCount = 0; rc = mdb_cursor_get(cursor, keyData, valueData, MDB_SET_RANGE); - while (rc == MDB_SUCCESS && currentSamplesCount < s.MAX_SAMPLES_PER_BUCKET) { - if (mdb_cmp(txn, dbi, keyData, maxKey) >= 0) { + if (rc == MDB_SUCCESS) { + valueData.mv_data(valueBuf); + rc = mdb_cursor_get(cursor, keyData, valueData, MDB_GET_BOTH_RANGE); + if (rc != MDB_SUCCESS) { + rc = mdb_cursor_get(cursor, keyData, valueData, MDB_LAST_DUP); + } + } + while (rc == MDB_SUCCESS && currentSamplesCount < Statistics.MAX_SAMPLES_PER_BUCKET) { + keyDiff = mdb_cmp(txn, dbi, keyData, maxKey); + if (keyDiff > 0 || keyDiff == 0 && mdb_dcmp(txn, dbi, valueData, maxValue) >= 0) { endOfRange = true; break; } else { @@ -776,7 +814,8 @@ protected double cardinality(long subj, long pred, long obj, long context) throw currentSamplesCount++; System.arraycopy(s.values, 0, s.lastValues[bucket], 0, s.values.length); - Varint.readListUnsigned(keyData.mv_data(), s.values); + IndexEntryWriters.read(keyData.mv_data(), valueData.mv_data(), + index.indexSplitPosition, s.values); if (currentSamplesCount == 1) { Arrays.fill(s.counts, 1); @@ -862,6 +901,113 @@ private boolean requiresResize() { } } + private int merge(long cursor, int elements, MDBVal keyVal, MDBVal dataVal, ByteBuffer newValueBuf, + ByteBuffer target) throws IOException { + int rc = mdb_cursor_get(cursor, keyVal, dataVal, MDB_SET); + int newValuesSize = 0; + if (rc == MDB_SUCCESS) { + rc = E(mdb_cursor_get(cursor, keyVal, dataVal, MDB_GET_BOTH_RANGE)); + if (rc == MDB_SUCCESS) { + ByteBuffer existing = dataVal.mv_data(); + + newValuesSize = newValueBuf.remaining(); + int existingValuesSize = existing.remaining(); + int diff = -1; + while (existing.hasRemaining() && + (diff = compareRegion(newValueBuf, 0, existing, existing.position(), + Math.min(newValuesSize, existing.remaining()))) > 0) { + for (int i = 0; i < elements; i++) { + skipVarint(existing); + } + } + if (diff == 0) { + return MDB_KEYEXIST; + } + int insertPos = existing.position(); + if (insertPos > 0) { + // copy existing elements and insert new elements in between + target.clear(); + target.put(existing.duplicate().flip()); + target.put(newValueBuf); + if (existing.hasRemaining()) { + target.put(existing); + } + // delete existing entry + E(mdb_cursor_del(cursor, 0)); + // store one or more new entries + int totalSize = target.position(); + target.flip(); + if (insertPos + newValuesSize > 500) { + target.limit(insertPos); + dataVal.mv_data(target); + E(mdb_cursor_put(cursor, keyVal, dataVal, 0)); + target.position(insertPos); + target.limit(totalSize); + } + if (target.remaining() > 500) { + target.limit(insertPos + newValuesSize); + dataVal.mv_data(target); + E(mdb_cursor_put(cursor, keyVal, dataVal, 0)); + target.position(insertPos + newValuesSize); + target.limit(totalSize); + } + if (target.hasRemaining()) { + dataVal.mv_data(target); + E(mdb_cursor_put(cursor, keyVal, dataVal, 0)); + } + } else { + // prepend to value + if (existingValuesSize + newValuesSize <= 500) { + target.clear(); + target.put(newValueBuf); + target.put(existing); + target.flip(); + dataVal.mv_data(target); + E(mdb_cursor_del(cursor, 0)); + E(mdb_cursor_put(cursor, keyVal, dataVal, 0)); + } else { + dataVal.mv_data(newValueBuf); + E(mdb_cursor_put(cursor, keyVal, dataVal, 0)); + } + } + return MDB_SUCCESS; + } else { + E(mdb_cursor_get(cursor, keyVal, dataVal, MDB_LAST_DUP)); + if (dataVal.mv_data().remaining() + newValuesSize <= 500) { + // append to last existing value + ByteBuffer existing = dataVal.mv_data(); + target.clear(); + target.put(existing); + target.put(newValueBuf); + target.flip(); + E(mdb_cursor_del(cursor, 0)); + dataVal.mv_data(target); + E(mdb_cursor_put(cursor, keyVal, dataVal, MDB_APPENDDUP)); + return MDB_SUCCESS; + } + } + } + dataVal.mv_data(newValueBuf); + E(mdb_cursor_put(cursor, keyVal, dataVal, 0)); + return MDB_SUCCESS; + } + + static int compareRegion(ByteBuffer bb1, int startIdx1, ByteBuffer bb2, int startIdx2, int length) { + int result = 0; + for (int i = 0; result == 0 && i < length; i++) { + result = (bb1.get(startIdx1 + i) & 0xff) - (bb2.get(startIdx2 + i) & 0xff); + } + return result; + } + + static void skipVarint(ByteBuffer other) { + int i = firstToLength(other.get()) - 1; + assert i >= 0; + if (i > 0) { + other.position(i + other.position()); + } + } + public boolean storeTriple(long subj, long pred, long obj, long context, boolean explicit) throws IOException { TripleIndex mainIndex = indexes.get(0); boolean stAdded; @@ -870,9 +1016,14 @@ public boolean storeTriple(long subj, long pred, long obj, long context, boolean // use calloc to get an empty data value MDBVal dataVal = MDBVal.calloc(stack); ByteBuffer keyBuf = stack.malloc(MAX_KEY_LENGTH); - mainIndex.toKey(keyBuf, subj, pred, obj, context); + ByteBuffer valueBuf = stack.malloc(MAX_KEY_LENGTH); + ByteBuffer mergedBuf = stack.malloc(500 + MAX_KEY_LENGTH); + mainIndex.toEntry(keyBuf, valueBuf, subj, pred, obj, context); keyBuf.flip(); keyVal.mv_data(keyBuf); + valueBuf.flip(); + dataVal.mv_data(valueBuf); + PointerBuffer pCursor = stack.mallocPointer(1); if (recordCache == null) { if (requiresResize()) { @@ -883,7 +1034,7 @@ public boolean storeTriple(long subj, long pred, long obj, long context, boolean } if (recordCache != null) { - long quad[] = new long[] { subj, pred, obj, context }; + long[] quad = new long[] { subj, pred, obj, context }; if (explicit) { // remove implicit statement recordCache.removeRecord(quad, false); @@ -892,36 +1043,50 @@ public boolean storeTriple(long subj, long pred, long obj, long context, boolean return recordCache.storeRecord(quad, explicit); } - int rc = mdb_put(writeTxn, mainIndex.getDB(explicit), keyVal, dataVal, MDB_NOOVERWRITE); - if (rc != MDB_SUCCESS && rc != MDB_KEYEXIST) { - throw new IOException(mdb_strerror(rc)); + mdb_cursor_open(writeTxn, mainIndex.getDB(explicit), pCursor); + long cursor = pCursor.get(0); + try { + int rc = merge(cursor, 4 - mainIndex.indexSplitPosition, keyVal, dataVal, valueBuf, mergedBuf); + if (rc != MDB_SUCCESS && rc != MDB_KEYEXIST) { + throw new IOException(mdb_strerror(rc)); + } + stAdded = rc == MDB_SUCCESS; + } finally { + mdb_cursor_close(cursor); } - stAdded = rc == MDB_SUCCESS; + boolean foundImplicit = false; if (explicit && stAdded) { - foundImplicit = mdb_del(writeTxn, mainIndex.getDB(false), keyVal, dataVal) == MDB_SUCCESS; + // foundImplicit = mdb_del(writeTxn, mainIndex.getDB(false), keyVal, dataVal) == MDB_SUCCESS; } if (stAdded) { for (int i = 1; i < indexes.size(); i++) { - TripleIndex index = indexes.get(i); keyBuf.clear(); - index.toKey(keyBuf, subj, pred, obj, context); + valueBuf.clear(); + index.toEntry(keyBuf, valueBuf, subj, pred, obj, context); keyBuf.flip(); + valueBuf.flip(); // update buffer positions in MDBVal keyVal.mv_data(keyBuf); + dataVal.mv_data(valueBuf); if (foundImplicit) { - E(mdb_del(writeTxn, mainIndex.getDB(false), keyVal, dataVal)); + // E(mdb_del(writeTxn, index.getDB(false), keyVal, dataVal)); } - E(mdb_put(writeTxn, index.getDB(explicit), keyVal, dataVal, 0)); - } - if (stAdded) { - incrementContext(stack, context); + mdb_cursor_open(writeTxn, index.getDB(explicit), pCursor); + cursor = pCursor.get(0); + try { + merge(cursor, 4 - index.indexSplitPosition, keyVal, dataVal, valueBuf, mergedBuf); + } finally { + mdb_cursor_close(cursor); + } } + + incrementContext(stack, context); } } @@ -1004,9 +1169,12 @@ public void removeTriples(RecordIterator it, boolean explicit, Consumer try (it; MemoryStack stack = MemoryStack.stackPush()) { MDBVal keyValue = MDBVal.callocStack(stack); ByteBuffer keyBuf = stack.malloc(MAX_KEY_LENGTH); + MDBVal dataValue = MDBVal.callocStack(stack); + ByteBuffer valueBuf = stack.malloc(MAX_KEY_LENGTH); - long[] quad; - while ((quad = it.next()) != null) { + long[] quad = it.next(); + long[] toDelete = new long[4]; + while (quad != null) { if (recordCache == null) { if (requiresResize()) { // map is full, resize required @@ -1018,21 +1186,30 @@ public void removeTriples(RecordIterator it, boolean explicit, Consumer if (recordCache != null) { recordCache.removeRecord(quad, explicit); handler.accept(quad); + quad = it.next(); continue; } + // copy quad that is going to be deleted and go to next one + // this is required as the it.next() relies on the key and value buffers of the current quad + System.arraycopy(quad, 0, toDelete, 0, 4); + quad = it.next(); for (TripleIndex index : indexes) { keyBuf.clear(); - index.toKey(keyBuf, quad[SUBJ_IDX], quad[PRED_IDX], quad[OBJ_IDX], quad[CONTEXT_IDX]); + valueBuf.clear(); + index.toEntry(keyBuf, valueBuf, toDelete[SUBJ_IDX], toDelete[PRED_IDX], toDelete[OBJ_IDX], + toDelete[CONTEXT_IDX]); keyBuf.flip(); + valueBuf.flip(); // update buffer positions in MDBVal keyValue.mv_data(keyBuf); + dataValue.mv_data(valueBuf); - E(mdb_del(writeTxn, index.getDB(explicit), keyValue, null)); + E(mdb_del(writeTxn, index.getDB(explicit), keyValue, dataValue)); } - decrementContext(stack, quad[CONTEXT_IDX]); - handler.accept(quad); + decrementContext(stack, toDelete[CONTEXT_IDX]); + handler.accept(toDelete); } } } @@ -1047,6 +1224,7 @@ protected void updateFromCache() throws IOException { // use calloc to get an empty data value MDBVal dataVal = MDBVal.callocStack(stack); ByteBuffer keyBuf = stack.malloc(MAX_KEY_LENGTH); + ByteBuffer dataBuf = stack.malloc(MAX_KEY_LENGTH); Record r; while ((r = it.next()) != null) { @@ -1063,15 +1241,18 @@ protected void updateFromCache() throws IOException { for (int i = 0; i < indexes.size(); i++) { TripleIndex index = indexes.get(i); keyBuf.clear(); - index.toKey(keyBuf, r.quad[0], r.quad[1], r.quad[2], r.quad[3]); + dataBuf.clear(); + index.toEntry(keyBuf, dataBuf, r.quad[0], r.quad[1], r.quad[2], r.quad[3]); keyBuf.flip(); + dataBuf.flip(); // update buffer positions in MDBVal keyVal.mv_data(keyBuf); + dataVal.mv_data(dataBuf); if (r.add) { E(mdb_put(writeTxn, index.getDB(explicit), keyVal, dataVal, 0)); } else { - E(mdb_del(writeTxn, index.getDB(explicit), keyVal, null)); + E(mdb_del(writeTxn, index.getDB(explicit), keyVal, dataVal)); } } } @@ -1179,20 +1360,24 @@ private void storeProperties(File propFile) throws IOException { class TripleIndex { + private final int indexSplitPosition; private final char[] fieldSeq; - private final IndexKeyWriters.KeyWriter keyWriter; - private final IndexKeyWriters.MatcherFactory matcherFactory; + private final IndexEntryWriters.EntryWriter entryWriter; + private final IndexEntryWriters.MatcherFactory matcherFactory; private final int dbiExplicit, dbiInferred; private final int[] indexMap; - public TripleIndex(String fieldSeq) throws IOException { + public TripleIndex(String fieldSeq, int indexSplitPosition) throws IOException { this.fieldSeq = fieldSeq.toCharArray(); - this.keyWriter = IndexKeyWriters.forFieldSeq(fieldSeq); - this.matcherFactory = IndexKeyWriters.matcherFactory(fieldSeq); + // adjust split position for indexes starting with context + this.indexSplitPosition = fieldSeq.startsWith("c") ? Math.min(indexSplitPosition + 1, 4) + : indexSplitPosition; + this.entryWriter = IndexEntryWriters.forFieldSeq(fieldSeq); + this.matcherFactory = IndexEntryWriters.matcherFactory(fieldSeq); this.indexMap = getIndexes(this.fieldSeq); // open database and use native sort order without comparator - dbiExplicit = openDatabase(env, fieldSeq, MDB_CREATE, null); - dbiInferred = openDatabase(env, fieldSeq + "-inf", MDB_CREATE, null); + dbiExplicit = openDatabase(env, fieldSeq, MDB_CREATE | MDB_DUPSORT /* | MDB_DUPFIXED */, null); + dbiInferred = openDatabase(env, fieldSeq + "-inf", MDB_CREATE | MDB_DUPSORT /* | MDB_DUPFIXED */, null); } public char[] getFieldSeq() { @@ -1277,53 +1462,37 @@ public int getPatternScore(long subj, long pred, long obj, long context) { return score; } - void getMinKey(ByteBuffer bb, long subj, long pred, long obj, long context) { + void getMinEntry(ByteBuffer key, ByteBuffer value, long subj, long pred, long obj, long context) { subj = subj <= 0 ? 0 : subj; pred = pred <= 0 ? 0 : pred; obj = obj <= 0 ? 0 : obj; context = context <= 0 ? 0 : context; - toKey(bb, subj, pred, obj, context); + toEntry(key, value, subj, pred, obj, context); } - void getMaxKey(ByteBuffer bb, long subj, long pred, long obj, long context) { + void getMaxEntry(ByteBuffer key, ByteBuffer value, long subj, long pred, long obj, long context) { subj = subj <= 0 ? Long.MAX_VALUE : subj; pred = pred <= 0 ? Long.MAX_VALUE : pred; obj = obj <= 0 ? Long.MAX_VALUE : obj; context = context < 0 ? Long.MAX_VALUE : context; - toKey(bb, subj, pred, obj, context); + toEntry(key, value, subj, pred, obj, context); } - GroupMatcher createMatcher(long subj, long pred, long obj, long context) { - int length = getLength(subj, pred, obj, context); - - ByteBuffer bb = ByteBuffer.allocate(length); - toKey(bb, subj == -1 ? 0 : subj, pred == -1 ? 0 : pred, obj == -1 ? 0 : obj, context == -1 ? 0 : context); - bb.flip(); - - return new GroupMatcher(bb.array(), matcherFactory.create(subj, pred, obj, context)); + EntryMatcher createMatcher(long subj, long pred, long obj, long context) { + ByteBuffer key = ByteBuffer.allocate(Math.max(1, indexSplitPosition) * (Long.BYTES + 1)); + ByteBuffer value = ByteBuffer.allocate((4 - indexSplitPosition) * (Long.BYTES + 1)); + toEntry(key, value, subj == -1 ? 0 : subj, pred == -1 ? 0 : pred, obj == -1 ? 0 : obj, + context == -1 ? 0 : context); + return new EntryMatcher(indexSplitPosition, key.array(), value.array(), + matcherFactory.create(subj, pred, obj, context)); } - private int getLength(long subj, long pred, long obj, long context) { - int length = 4; - if (subj > 240) { - length += 8; - } - if (pred > 240) { - length += 8; - - } - if (obj > 240) { - length += 8; - - } - if (context > 240) { - length += 8; - - } - return length; + public int getIndexSplitPosition() { + return indexSplitPosition; } class KeyStats { + long subj; long pred; long obj; @@ -1359,7 +1528,6 @@ public int hashCode() { public void print() { if (count.sum() % 1000000 == 0) { - try { System.out.println("Key " + new String(getFieldSeq()) + " " + Arrays.asList(valueStore.getValue(subj), valueStore.getValue(pred), @@ -1373,51 +1541,161 @@ public void print() { } } - void toKey(ByteBuffer bb, long subj, long pred, long obj, long context) { - - boolean shouldCache = threeOfFourAreZeroOrMax(subj, pred, obj, context); - if (shouldCache) { - long sum = subj + pred + obj + context; - if (sum == 0 && subj == pred && obj == context) { - bb.put(Varint.ALL_ZERO_QUAD); - return; - } - - if (sum < 241) { // keys with sum < 241 only need 4 bytes to write and don't need caching - shouldCache = false; - } - - } - - // Pass through to the keyWriter with caching hint - keyWriter.write(bb, subj, pred, obj, context, shouldCache); - } - - void keyToQuad(ByteBuffer key, long[] quad) { - readQuadUnsigned(key, indexMap, quad); + void toEntry(ByteBuffer key, ByteBuffer value, long subj, long pred, long obj, long context) { + entryWriter.write(key, value, indexSplitPosition, subj, pred, obj, context); } - void keyToQuad(ByteBuffer key, long[] originalQuad, long[] quad) { - // directly use index map to read values in to correct positions - if (originalQuad[indexMap[0]] != -1) { - Varint.skipUnsigned(key); - } else { + void entryToQuad(ByteBuffer key, ByteBuffer value, long[] quad) { + switch (indexSplitPosition) { + case 0: + quad[indexMap[0]] = Varint.readUnsigned(value); + quad[indexMap[1]] = Varint.readUnsigned(value); + quad[indexMap[2]] = Varint.readUnsigned(value); + quad[indexMap[3]] = Varint.readUnsigned(value); + break; + case 1: + quad[indexMap[0]] = Varint.readUnsigned(key); + quad[indexMap[1]] = Varint.readUnsigned(value); + quad[indexMap[2]] = Varint.readUnsigned(value); + quad[indexMap[3]] = Varint.readUnsigned(value); + break; + case 2: + quad[indexMap[0]] = Varint.readUnsigned(key); + quad[indexMap[1]] = Varint.readUnsigned(key); + quad[indexMap[2]] = Varint.readUnsigned(value); + quad[indexMap[3]] = Varint.readUnsigned(value); + break; + case 3: + quad[indexMap[0]] = Varint.readUnsigned(key); + quad[indexMap[1]] = Varint.readUnsigned(key); + quad[indexMap[2]] = Varint.readUnsigned(key); + quad[indexMap[3]] = Varint.readUnsigned(value); + break; + case 4: quad[indexMap[0]] = Varint.readUnsigned(key); - } - if (originalQuad[indexMap[1]] != -1) { - Varint.skipUnsigned(key); - } else { quad[indexMap[1]] = Varint.readUnsigned(key); - } - if (originalQuad[indexMap[2]] != -1) { - Varint.skipUnsigned(key); - } else { quad[indexMap[2]] = Varint.readUnsigned(key); - } - if (originalQuad[indexMap[3]] != -1) { - Varint.skipUnsigned(key); - } else { quad[indexMap[3]] = Varint.readUnsigned(key); + break; + } + } + + void entryToQuad(ByteBuffer key, ByteBuffer value, long[] originalQuad, long[] quad) { + switch (indexSplitPosition) { + case 0: + // directly use index map to read values in to correct positions + if (originalQuad[indexMap[0]] != -1) { + Varint.skipUnsigned(value); + } else { + quad[indexMap[0]] = Varint.readUnsigned(value); + } + if (originalQuad[indexMap[1]] != -1) { + Varint.skipUnsigned(value); + } else { + quad[indexMap[1]] = Varint.readUnsigned(value); + } + if (originalQuad[indexMap[2]] != -1) { + Varint.skipUnsigned(value); + } else { + quad[indexMap[2]] = Varint.readUnsigned(value); + } + if (originalQuad[indexMap[3]] != -1) { + Varint.skipUnsigned(value); + } else { + quad[indexMap[3]] = Varint.readUnsigned(value); + } + break; + case 1: + // directly use index map to read values in to correct positions + if (originalQuad[indexMap[0]] != -1) { + Varint.skipUnsigned(key); + } else { + quad[indexMap[0]] = Varint.readUnsigned(key); + } + if (originalQuad[indexMap[1]] != -1) { + Varint.skipUnsigned(value); + } else { + quad[indexMap[1]] = Varint.readUnsigned(value); + } + if (originalQuad[indexMap[2]] != -1) { + Varint.skipUnsigned(value); + } else { + quad[indexMap[2]] = Varint.readUnsigned(value); + } + if (originalQuad[indexMap[3]] != -1) { + Varint.skipUnsigned(value); + } else { + quad[indexMap[3]] = Varint.readUnsigned(value); + } + break; + case 2: + // directly use index map to read values in to correct positions + if (originalQuad[indexMap[0]] != -1) { + Varint.skipUnsigned(key); + } else { + quad[indexMap[0]] = Varint.readUnsigned(key); + } + if (originalQuad[indexMap[1]] != -1) { + Varint.skipUnsigned(key); + } else { + quad[indexMap[1]] = Varint.readUnsigned(key); + } + if (originalQuad[indexMap[2]] != -1) { + Varint.skipUnsigned(value); + } else { + quad[indexMap[2]] = Varint.readUnsigned(value); + } + if (originalQuad[indexMap[3]] != -1) { + Varint.skipUnsigned(value); + } else { + quad[indexMap[3]] = Varint.readUnsigned(value); + } + break; + case 3: + // directly use index map to read values in to correct positions + if (originalQuad[indexMap[0]] != -1) { + Varint.skipUnsigned(key); + } else { + quad[indexMap[0]] = Varint.readUnsigned(key); + } + if (originalQuad[indexMap[1]] != -1) { + Varint.skipUnsigned(key); + } else { + quad[indexMap[1]] = Varint.readUnsigned(key); + } + if (originalQuad[indexMap[2]] != -1) { + Varint.skipUnsigned(key); + } else { + quad[indexMap[2]] = Varint.readUnsigned(key); + } + if (originalQuad[indexMap[3]] != -1) { + Varint.skipUnsigned(value); + } else { + quad[indexMap[3]] = Varint.readUnsigned(value); + } + break; + case 4: + // directly use index map to read values in to correct positions + if (originalQuad[indexMap[0]] != -1) { + Varint.skipUnsigned(key); + } else { + quad[indexMap[0]] = Varint.readUnsigned(key); + } + if (originalQuad[indexMap[1]] != -1) { + Varint.skipUnsigned(key); + } else { + quad[indexMap[1]] = Varint.readUnsigned(key); + } + if (originalQuad[indexMap[2]] != -1) { + Varint.skipUnsigned(key); + } else { + quad[indexMap[2]] = Varint.readUnsigned(key); + } + if (originalQuad[indexMap[3]] != -1) { + Varint.skipUnsigned(key); + } else { + quad[indexMap[3]] = Varint.readUnsigned(key); + } } } @@ -1441,20 +1719,4 @@ void destroy(long txn) { mdb_drop(txn, dbiInferred, true); } } - - static boolean threeOfFourAreZeroOrMax(long subj, long pred, long obj, long context) { - // Precompute the 8 equalities once (cheapest operations here) - boolean zS = subj == 0L, zP = pred == 0L, zO = obj == 0L, zC = context == 0L; - boolean mS = subj == Long.MAX_VALUE, mP = pred == Long.MAX_VALUE, mO = obj == Long.MAX_VALUE, - mC = context == Long.MAX_VALUE; - - // ≥3-of-4 ≡ ab(c∨d) ∨ cd(a∨b). Apply once for zeros and once for maxes. - // Using '&' and '|' (not &&/||) keeps it branchless and predictable. - - return (((zS & zP & (zO | zC)) | (zO & zC & (zS | zP)))// ≥3 zeros - | ((mS & mP & (mO | mC)) | (mO & mC & (mS | mP))));// ≥3 Long.MAX_VALUE -// & !(zS & zP & zO & zC) // not all zeros -// & !(mS & mP & mO & mC); // not all max - } - } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnManager.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnManager.java index cf3d486b6f..77c1d5ea32 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnManager.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnManager.java @@ -14,6 +14,9 @@ import static org.lwjgl.system.MemoryStack.stackPush; import static org.lwjgl.system.MemoryUtil.NULL; import static org.lwjgl.util.lmdb.LMDB.MDB_RDONLY; +import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_close; +import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_open; +import static org.lwjgl.util.lmdb.LMDB.mdb_cursor_renew; import static org.lwjgl.util.lmdb.LMDB.mdb_txn_abort; import static org.lwjgl.util.lmdb.LMDB.mdb_txn_begin; import static org.lwjgl.util.lmdb.LMDB.mdb_txn_renew; @@ -64,7 +67,7 @@ private long startReadTxn() throws IOException { * @return the txn reference object */ Txn createTxn(long txn) { - return new Txn(txn) { + return new Txn(txn, false) { @Override public void close() { // do nothing @@ -79,7 +82,7 @@ public void close() { * @throws IOException if the transaction cannot be started for some reason */ Txn createReadTxn() throws IOException { - Txn txnRef = new Txn(createReadTxnInternal()); + Txn txnRef = new Txn(createReadTxnInternal(), true); synchronized (active) { active.put(txnRef, Boolean.TRUE); } @@ -161,12 +164,24 @@ enum Mode { } class Txn implements Closeable, AutoCloseable { + private static final int MAX_DBI = 16; - private long txn; + private final long txn; private long version; + private final boolean poolCursors; + private final long[][] cursorPool; + private final int[] cursorPoolIndex; - Txn(long txn) { + Txn(long txn, boolean poolCursors) { this.txn = txn; + this.poolCursors = poolCursors; + if (poolCursors) { + cursorPool = new long[MAX_DBI][64]; + cursorPoolIndex = new int[MAX_DBI]; + } else { + cursorPool = new long[0][]; + cursorPoolIndex = new int[0]; + } } long get() { @@ -197,8 +212,48 @@ private void free(long txn) { } } + long getCursor(int dbi) throws IOException { + if (poolCursors) { + synchronized (cursorPool[dbi]) { + if (cursorPoolIndex[dbi] > 0) { + return cursorPool[dbi][--cursorPoolIndex[dbi]]; + } + } + } + try (MemoryStack stack = MemoryStack.stackPush()) { + PointerBuffer pp = stack.mallocPointer(1); + E(mdb_cursor_open(txn, dbi, pp)); + return pp.get(0); + } + } + + void returnCursor(int dbi, long cursor) { + if (poolCursors) { + synchronized (cursorPool[dbi]) { + if (cursorPoolIndex[dbi] < cursorPool[dbi].length) { + cursorPool[dbi][cursorPoolIndex[dbi]++] = cursor; + } else { + mdb_cursor_close(cursor); + } + } + } else { + mdb_cursor_close(cursor); + } + } + + void closeCursors() { + for (int i = 0; i < cursorPool.length; i++) { + synchronized (cursorPool[i]) { + while (cursorPoolIndex[i] > 0) { + mdb_cursor_close(cursorPool[i][--cursorPoolIndex[i]]); + } + } + } + } + @Override public void close() { + closeCursors(); synchronized (active) { active.remove(this); } @@ -212,6 +267,7 @@ void reset() throws IOException { mdb_txn_reset(txn); E(mdb_txn_renew(txn)); version++; + closeCursors(); } /** diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnRecordCache.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnRecordCache.java index 9974f6a2df..1803ba1ed2 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnRecordCache.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TxnRecordCache.java @@ -197,7 +197,11 @@ protected RecordCacheIterator(int dbi) throws IOException { public Record next() { if (mdb_cursor_get(cursor, keyData, valueData, MDB_NEXT) == MDB_SUCCESS) { - Varint.readListUnsigned(keyData.mv_data(), quad); + ByteBuffer keyBuf = keyData.mv_data(); + quad[0] = Varint.readUnsigned(keyBuf); + quad[1] = Varint.readUnsigned(keyBuf); + quad[2] = Varint.readUnsigned(keyBuf); + quad[3] = Varint.readUnsigned(keyBuf); byte op = valueData.mv_data().get(0); Record r = new Record(); r.quad = quad; diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java index af6632804d..63c9817b88 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java @@ -13,6 +13,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; +import org.eclipse.rdf4j.sail.lmdb.util.EntryMatcher; import org.eclipse.rdf4j.sail.lmdb.util.SignificantBytesBE; /** @@ -42,6 +43,7 @@ public final class Varint { }; static final byte[] ALL_ZERO_QUAD = new byte[] { 0, 0, 0, 0 }; + static final byte[] ALL_ZERO_PAIR = new byte[] { 0, 0 }; private Varint() { } @@ -552,7 +554,7 @@ private static int compareRegion(ByteBuffer bb1, int startIdx1, ByteBuffer bb2, } /** - * Use of this class is deprecated, use {@link org.eclipse.rdf4j.sail.lmdb.util.GroupMatcher} instead. + * Use of this class is deprecated, use {@link EntryMatcher} instead. */ @Deprecated(forRemoval = true) public static class GroupMatcher { diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/util/EntryMatcher.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/util/EntryMatcher.java new file mode 100644 index 0000000000..7b3e6669fb --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/util/EntryMatcher.java @@ -0,0 +1,66 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.lmdb.util; + +import java.nio.ByteBuffer; + +/** + * A matcher for entries consisting of a key and a value, each matched by their respective matcher. + */ +public class EntryMatcher { + private final VarintMatcher keyMatcher; + private final VarintMatcher valueMatcher; + + public EntryMatcher(int split, byte[] keyArray, byte[] valueArray, boolean[] shouldMatch) { + assert shouldMatch.length == 4; + boolean[] keyShouldMatch; + boolean[] valueShouldMatch; + switch (split) { + case 0: + keyShouldMatch = new boolean[] {}; + valueShouldMatch = new boolean[] { shouldMatch[0], shouldMatch[1], shouldMatch[2], shouldMatch[3] }; + break; + case 1: + keyShouldMatch = new boolean[] { shouldMatch[0] }; + valueShouldMatch = new boolean[] { shouldMatch[1], shouldMatch[2], shouldMatch[3] }; + break; + case 2: + keyShouldMatch = new boolean[] { shouldMatch[0], shouldMatch[1] }; + valueShouldMatch = new boolean[] { shouldMatch[2], shouldMatch[3] }; + break; + case 3: + keyShouldMatch = new boolean[] { shouldMatch[0], shouldMatch[1], shouldMatch[2] }; + valueShouldMatch = new boolean[] { shouldMatch[3] }; + break; + case 4: + keyShouldMatch = new boolean[] { shouldMatch[0], shouldMatch[1], shouldMatch[2], shouldMatch[3] }; + valueShouldMatch = new boolean[] {}; + break; + default: + throw new IllegalArgumentException("Split must be between 0 and 4 inclusive"); + } + this.keyMatcher = new VarintMatcher(keyArray, keyShouldMatch); + this.valueMatcher = new VarintMatcher(valueArray, valueShouldMatch); + } + + public boolean matches(ByteBuffer key, ByteBuffer value) { + return keyMatcher.matches(key) && valueMatcher.matches(value); + } + + public boolean matchesKey(ByteBuffer key) { + return keyMatcher.matches(key); + } + + public boolean matchesValue(ByteBuffer value) { + return valueMatcher.matches(value); + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/util/IndexEntryWriters.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/util/IndexEntryWriters.java new file mode 100644 index 0000000000..0b405ac6fb --- /dev/null +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/util/IndexEntryWriters.java @@ -0,0 +1,449 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb.util; + +import java.nio.ByteBuffer; + +import org.eclipse.rdf4j.sail.lmdb.Varint; + +public final class IndexEntryWriters { + + private IndexEntryWriters() { + } + + @FunctionalInterface + public interface EntryWriter { + void write(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context); + } + + @FunctionalInterface + public interface MatcherFactory { + boolean[] create(long subj, long pred, long obj, long context); + } + + public static EntryWriter forFieldSeq(String fieldSeq) { + final EntryWriter writer; + switch (fieldSeq) { + case "spoc": + writer = IndexEntryWriters::spoc; + break; + case "spco": + writer = IndexEntryWriters::spco; + break; + case "sopc": + writer = IndexEntryWriters::sopc; + break; + case "socp": + writer = IndexEntryWriters::socp; + break; + case "scpo": + writer = IndexEntryWriters::scpo; + break; + case "scop": + writer = IndexEntryWriters::scop; + break; + case "psoc": + writer = IndexEntryWriters::psoc; + break; + case "psco": + writer = IndexEntryWriters::psco; + break; + case "posc": + writer = IndexEntryWriters::posc; + break; + case "pocs": + writer = IndexEntryWriters::pocs; + break; + case "pcso": + writer = IndexEntryWriters::pcso; + break; + case "pcos": + writer = IndexEntryWriters::pcos; + break; + case "ospc": + writer = IndexEntryWriters::ospc; + break; + case "oscp": + writer = IndexEntryWriters::oscp; + break; + case "opsc": + writer = IndexEntryWriters::opsc; + break; + case "opcs": + writer = IndexEntryWriters::opcs; + break; + case "ocsp": + writer = IndexEntryWriters::ocsp; + break; + case "ocps": + writer = IndexEntryWriters::ocps; + break; + case "cspo": + writer = IndexEntryWriters::cspo; + break; + case "csop": + writer = IndexEntryWriters::csop; + break; + case "cpso": + writer = IndexEntryWriters::cpso; + break; + case "cpos": + writer = IndexEntryWriters::cpos; + break; + case "cosp": + writer = IndexEntryWriters::cosp; + break; + case "cops": + writer = IndexEntryWriters::cops; + break; + default: + throw new IllegalArgumentException("Unsupported field sequence: " + fieldSeq); + } + return writer; + } + + public static IndexEntryWriters.MatcherFactory matcherFactory(String fieldSeq) { + switch (fieldSeq) { + case "spoc": + return IndexEntryWriters::spocShouldMatch; + case "spco": + return IndexEntryWriters::spcoShouldMatch; + case "sopc": + return IndexEntryWriters::sopcShouldMatch; + case "socp": + return IndexEntryWriters::socpShouldMatch; + case "scpo": + return IndexEntryWriters::scpoShouldMatch; + case "scop": + return IndexEntryWriters::scopShouldMatch; + case "psoc": + return IndexEntryWriters::psocShouldMatch; + case "psco": + return IndexEntryWriters::pscoShouldMatch; + case "posc": + return IndexEntryWriters::poscShouldMatch; + case "pocs": + return IndexEntryWriters::pocsShouldMatch; + case "pcso": + return IndexEntryWriters::pcsoShouldMatch; + case "pcos": + return IndexEntryWriters::pcosShouldMatch; + case "ospc": + return IndexEntryWriters::ospcShouldMatch; + case "oscp": + return IndexEntryWriters::oscpShouldMatch; + case "opsc": + return IndexEntryWriters::opscShouldMatch; + case "opcs": + return IndexEntryWriters::opcsShouldMatch; + case "ocsp": + return IndexEntryWriters::ocspShouldMatch; + case "ocps": + return IndexEntryWriters::ocpsShouldMatch; + case "cspo": + return IndexEntryWriters::cspoShouldMatch; + case "csop": + return IndexEntryWriters::csopShouldMatch; + case "cpso": + return IndexEntryWriters::cpsoShouldMatch; + case "cpos": + return IndexEntryWriters::cposShouldMatch; + case "cosp": + return IndexEntryWriters::cospShouldMatch; + case "cops": + return IndexEntryWriters::copsShouldMatch; + default: + throw new IllegalArgumentException("Unsupported field sequence: " + fieldSeq); + } + } + + static final byte[] ZERO_BYTES = new byte[4 * (Long.BYTES + 1)]; + + static void fill(ByteBuffer buffer, int length) { + buffer.put(ZERO_BYTES, 0, length); + } + + public static void read(ByteBuffer key, ByteBuffer value, int indexSplitPosition, long[] values) { + switch (indexSplitPosition) { + case 0: + values[0] = Varint.readUnsigned(value); + values[1] = Varint.readUnsigned(value); + values[2] = Varint.readUnsigned(value); + values[3] = Varint.readUnsigned(value); + break; + case 1: + values[0] = Varint.readUnsigned(key); + values[1] = Varint.readUnsigned(value); + values[2] = Varint.readUnsigned(value); + values[3] = Varint.readUnsigned(value); + break; + case 2: + values[0] = Varint.readUnsigned(key); + values[1] = Varint.readUnsigned(key); + values[2] = Varint.readUnsigned(value); + values[3] = Varint.readUnsigned(value); + break; + case 3: + values[0] = Varint.readUnsigned(key); + values[1] = Varint.readUnsigned(key); + values[2] = Varint.readUnsigned(key); + values[3] = Varint.readUnsigned(value); + break; + case 4: + values[0] = Varint.readUnsigned(key); + values[1] = Varint.readUnsigned(key); + values[2] = Varint.readUnsigned(key); + values[3] = Varint.readUnsigned(key); + break; + } + } + + public static void write(ByteBuffer key, ByteBuffer value, int splitIndex, long first, long second, long third, + long fourth) { + // int valuePos = value.position(); + switch (splitIndex) { + case 0: + key.put((byte) 1); // to ensure key is non-empty + Varint.writeUnsigned(value, first); + Varint.writeUnsigned(value, second); + Varint.writeUnsigned(value, third); + Varint.writeUnsigned(value, fourth); + // used to pad the value to a fixed length if needed + // fill(value, 4 * (Long.BYTES + 1) - (value.position() - valuePos)); + break; + case 1: + Varint.writeUnsigned(key, first); + Varint.writeUnsigned(value, second); + Varint.writeUnsigned(value, third); + Varint.writeUnsigned(value, fourth); + // used to pad the value to a fixed length if needed + // fill(value, 3 * (Long.BYTES + 1) - (value.position() - valuePos)); + break; + case 2: + Varint.writeUnsigned(key, first); + Varint.writeUnsigned(key, second); + Varint.writeUnsigned(value, third); + Varint.writeUnsigned(value, fourth); + // used to pad the value to a fixed length if needed + // fill(value, 2 * (Long.BYTES + 1) - (value.position() - valuePos)); + break; + case 3: + Varint.writeUnsigned(key, first); + Varint.writeUnsigned(key, second); + Varint.writeUnsigned(key, third); + Varint.writeUnsigned(value, fourth); + // used to pad the value to a fixed length if needed + // fill(value, (Long.BYTES + 1) - (value.position() - valuePos)); + break; + case 4: + Varint.writeUnsigned(key, first); + Varint.writeUnsigned(key, second); + Varint.writeUnsigned(key, third); + Varint.writeUnsigned(key, fourth); + break; + default: + throw new IllegalArgumentException("Split index must be between 0 and 4 inclusive"); + } + + } + + static void spoc(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, subj, pred, obj, context); + } + + static void spco(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, subj, pred, context, obj); + } + + static void sopc(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, subj, obj, pred, context); + } + + static void socp(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, subj, obj, context, pred); + } + + static void scpo(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, subj, context, pred, obj); + } + + static void scop(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, subj, context, obj, pred); + } + + static void psoc(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, pred, subj, obj, context); + } + + static void psco(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, pred, subj, context, obj); + } + + static void posc(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, pred, obj, subj, context); + } + + static void pocs(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, pred, obj, context, subj); + } + + static void pcso(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, pred, context, subj, obj); + } + + static void pcos(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, pred, context, obj, subj); + } + + static void ospc(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, obj, subj, pred, context); + } + + static void oscp(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, obj, subj, context, pred); + } + + static void opsc(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, obj, pred, subj, context); + } + + static void opcs(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, obj, pred, context, subj); + } + + static void ocsp(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, obj, context, subj, pred); + } + + static void ocps(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, obj, context, pred, subj); + } + + static void cspo(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, context, subj, pred, obj); + } + + static void csop(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, context, subj, obj, pred); + } + + static void cpso(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, context, pred, subj, obj); + } + + static void cpos(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, context, pred, obj, subj); + } + + static void cosp(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, context, obj, subj, pred); + } + + static void cops(ByteBuffer key, ByteBuffer value, int splitIndex, long subj, long pred, long obj, long context) { + write(key, value, splitIndex, context, obj, pred, subj); + } + + static boolean[] spocShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { subj > 0, pred > 0, obj > 0, context >= 0 }; + } + + static boolean[] spcoShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { subj > 0, pred > 0, context >= 0, obj > 0 }; + } + + static boolean[] sopcShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { subj > 0, obj > 0, pred > 0, context >= 0 }; + } + + static boolean[] socpShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { subj > 0, obj > 0, context >= 0, pred > 0 }; + } + + static boolean[] scpoShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { subj > 0, context >= 0, pred > 0, obj > 0 }; + } + + static boolean[] scopShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { subj > 0, context >= 0, obj > 0, pred > 0 }; + } + + static boolean[] psocShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { pred > 0, subj > 0, obj > 0, context >= 0 }; + } + + static boolean[] pscoShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { pred > 0, subj > 0, context >= 0, obj > 0 }; + } + + static boolean[] poscShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { pred > 0, obj > 0, subj > 0, context >= 0 }; + } + + static boolean[] pocsShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { pred > 0, obj > 0, context >= 0, subj > 0 }; + } + + static boolean[] pcsoShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { pred > 0, context >= 0, subj > 0, obj > 0 }; + } + + static boolean[] pcosShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { pred > 0, context >= 0, obj > 0, subj > 0 }; + } + + static boolean[] ospcShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { obj > 0, subj > 0, pred > 0, context >= 0 }; + } + + static boolean[] oscpShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { obj > 0, subj > 0, context >= 0, pred > 0 }; + } + + static boolean[] opscShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { obj > 0, pred > 0, subj > 0, context >= 0 }; + } + + static boolean[] opcsShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { obj > 0, pred > 0, context >= 0, subj > 0 }; + } + + static boolean[] ocspShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { obj > 0, context >= 0, subj > 0, pred > 0 }; + } + + static boolean[] ocpsShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { obj > 0, context >= 0, pred > 0, subj > 0 }; + } + + static boolean[] cspoShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { context >= 0, subj > 0, pred > 0, obj > 0 }; + } + + static boolean[] csopShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { context >= 0, subj > 0, obj > 0, pred > 0 }; + } + + static boolean[] cpsoShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { context >= 0, pred > 0, subj > 0, obj > 0 }; + } + + static boolean[] cposShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { context >= 0, pred > 0, obj > 0, subj > 0 }; + } + + static boolean[] cospShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { context >= 0, obj > 0, subj > 0, pred > 0 }; + } + + static boolean[] copsShouldMatch(long subj, long pred, long obj, long context) { + return new boolean[] { context >= 0, obj > 0, pred > 0, subj > 0 }; + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/util/IndexKeyWriters.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/util/IndexKeyWriters.java deleted file mode 100644 index bfd81aae41..0000000000 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/util/IndexKeyWriters.java +++ /dev/null @@ -1,497 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2025 Eclipse RDF4J contributors. - * - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Distribution License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/org/documents/edl-v10.php. - * - * SPDX-License-Identifier: BSD-3-Clause - ******************************************************************************/ -package org.eclipse.rdf4j.sail.lmdb.util; - -import java.nio.ByteBuffer; - -import org.eclipse.rdf4j.sail.lmdb.Varint; - -public final class IndexKeyWriters { - - private static final int CACHE_SIZE = 1 << 12; // 4096 entries - private static final int MASK = CACHE_SIZE - 1; - - private IndexKeyWriters() { - } - - @FunctionalInterface - public interface KeyWriter { - void write(ByteBuffer bb, long subj, long pred, long obj, long context, boolean shouldCache); - } - - @FunctionalInterface - interface BasicWriter { - void write(ByteBuffer bb, long subj, long pred, long obj, long context); - } - - @FunctionalInterface - public interface MatcherFactory { - boolean[] create(long subj, long pred, long obj, long context); - } - - public static KeyWriter forFieldSeq(String fieldSeq) { - final BasicWriter basic; - switch (fieldSeq) { - case "spoc": - basic = IndexKeyWriters::spoc; - break; - case "spco": - basic = IndexKeyWriters::spco; - break; - case "sopc": - basic = IndexKeyWriters::sopc; - break; - case "socp": - basic = IndexKeyWriters::socp; - break; - case "scpo": - basic = IndexKeyWriters::scpo; - break; - case "scop": - basic = IndexKeyWriters::scop; - break; - case "psoc": - basic = IndexKeyWriters::psoc; - break; - case "psco": - basic = IndexKeyWriters::psco; - break; - case "posc": - basic = IndexKeyWriters::posc; - break; - case "pocs": - basic = IndexKeyWriters::pocs; - break; - case "pcso": - basic = IndexKeyWriters::pcso; - break; - case "pcos": - basic = IndexKeyWriters::pcos; - break; - case "ospc": - basic = IndexKeyWriters::ospc; - break; - case "oscp": - basic = IndexKeyWriters::oscp; - break; - case "opsc": - basic = IndexKeyWriters::opsc; - break; - case "opcs": - basic = IndexKeyWriters::opcs; - break; - case "ocsp": - basic = IndexKeyWriters::ocsp; - break; - case "ocps": - basic = IndexKeyWriters::ocps; - break; - case "cspo": - basic = IndexKeyWriters::cspo; - break; - case "csop": - basic = IndexKeyWriters::csop; - break; - case "cpso": - basic = IndexKeyWriters::cpso; - break; - case "cpos": - basic = IndexKeyWriters::cpos; - break; - case "cosp": - basic = IndexKeyWriters::cosp; - break; - case "cops": - basic = IndexKeyWriters::cops; - break; - default: - throw new IllegalArgumentException("Unsupported field sequence: " + fieldSeq); - } - // Wrap the basic writer with a caching KeyWriter implementation - return new CachingKeyWriter(basic); - } - - // Simple array-based cache keyed by a masked index computed from a hashCode. - private static final class CachingKeyWriter implements KeyWriter { - - private final CachingKeyWriter.Entry[] cache = new CachingKeyWriter.Entry[CACHE_SIZE]; - - private static final class Entry { - final long hashCode; - final long s, p, o, c; - final byte[] bytes; - final int length; - - Entry(long hashCode, long s, long p, long o, long c, byte[] bytes) { - this.hashCode = hashCode; - this.s = s; - this.p = p; - this.o = o; - this.c = c; - this.bytes = bytes; - this.length = bytes.length; - } - } - - private final BasicWriter basic; - // Races are acceptable; we overwrite slots without synchronization. - - CachingKeyWriter(BasicWriter basic) { - this.basic = basic; - } - - @Override - public void write(ByteBuffer bb, long subj, long pred, long obj, long context, boolean shouldCache) { - if (!shouldCache) { - basic.write(bb, subj, pred, obj, context); - return; - } - - long hashCode = subj - Long.MAX_VALUE + (pred - Long.MAX_VALUE) * 2 + (obj - Long.MAX_VALUE) * 3 - + (context - Long.MAX_VALUE) * 4; - int slot = (int) (hashCode & MASK); - - Entry e = cache[slot]; - - if (e != null && e.hashCode == hashCode && e.s == subj && e.p == pred && e.o == obj && e.c == context) { - bb.put(e.bytes, 0, e.length); - return; - } - - int len = Varint.calcListLengthUnsigned(subj, pred, obj, context); - byte[] bytes = new byte[len]; - ByteBuffer out = ByteBuffer.wrap(bytes); - basic.write(out, subj, pred, obj, context); - out.flip(); - bb.put(out); - cache[slot] = new Entry(hashCode, subj, pred, obj, context, bytes); - } - } - - public static MatcherFactory matcherFactory(String fieldSeq) { - switch (fieldSeq) { - case "spoc": - return IndexKeyWriters::spocShouldMatch; - case "spco": - return IndexKeyWriters::spcoShouldMatch; - case "sopc": - return IndexKeyWriters::sopcShouldMatch; - case "socp": - return IndexKeyWriters::socpShouldMatch; - case "scpo": - return IndexKeyWriters::scpoShouldMatch; - case "scop": - return IndexKeyWriters::scopShouldMatch; - case "psoc": - return IndexKeyWriters::psocShouldMatch; - case "psco": - return IndexKeyWriters::pscoShouldMatch; - case "posc": - return IndexKeyWriters::poscShouldMatch; - case "pocs": - return IndexKeyWriters::pocsShouldMatch; - case "pcso": - return IndexKeyWriters::pcsoShouldMatch; - case "pcos": - return IndexKeyWriters::pcosShouldMatch; - case "ospc": - return IndexKeyWriters::ospcShouldMatch; - case "oscp": - return IndexKeyWriters::oscpShouldMatch; - case "opsc": - return IndexKeyWriters::opscShouldMatch; - case "opcs": - return IndexKeyWriters::opcsShouldMatch; - case "ocsp": - return IndexKeyWriters::ocspShouldMatch; - case "ocps": - return IndexKeyWriters::ocpsShouldMatch; - case "cspo": - return IndexKeyWriters::cspoShouldMatch; - case "csop": - return IndexKeyWriters::csopShouldMatch; - case "cpso": - return IndexKeyWriters::cpsoShouldMatch; - case "cpos": - return IndexKeyWriters::cposShouldMatch; - case "cosp": - return IndexKeyWriters::cospShouldMatch; - case "cops": - return IndexKeyWriters::copsShouldMatch; - default: - throw new IllegalArgumentException("Unsupported field sequence: " + fieldSeq); - } - } - - static void spoc(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, context); - } - - static void spco(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, obj); - } - - static void sopc(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, context); - } - - static void socp(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, pred); - } - - static void scpo(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, obj); - } - - static void scop(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, pred); - } - - static void psoc(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, context); - } - - static void psco(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, obj); - } - - static void posc(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, context); - } - - static void pocs(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, subj); - } - - static void pcso(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, obj); - } - - static void pcos(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, subj); - } - - static void ospc(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, context); - } - - static void oscp(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, pred); - } - - static void opsc(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, context); - } - - static void opcs(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, subj); - } - - static void ocsp(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, pred); - } - - static void ocps(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, subj); - } - - static void cspo(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, obj); - } - - static void csop(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, pred); - } - - static void cpso(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, obj); - } - - static void cpos(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, subj); - } - - static void cosp(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, subj); - Varint.writeUnsigned(bb, pred); - } - - static void cops(ByteBuffer bb, long subj, long pred, long obj, long context) { - Varint.writeUnsigned(bb, context); - Varint.writeUnsigned(bb, obj); - Varint.writeUnsigned(bb, pred); - Varint.writeUnsigned(bb, subj); - } - - static boolean[] spocShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { subj > 0, pred > 0, obj > 0, context >= 0 }; - } - - static boolean[] spcoShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { subj > 0, pred > 0, context >= 0, obj > 0 }; - } - - static boolean[] sopcShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { subj > 0, obj > 0, pred > 0, context >= 0 }; - } - - static boolean[] socpShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { subj > 0, obj > 0, context >= 0, pred > 0 }; - } - - static boolean[] scpoShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { subj > 0, context >= 0, pred > 0, obj > 0 }; - } - - static boolean[] scopShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { subj > 0, context >= 0, obj > 0, pred > 0 }; - } - - static boolean[] psocShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { pred > 0, subj > 0, obj > 0, context >= 0 }; - } - - static boolean[] pscoShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { pred > 0, subj > 0, context >= 0, obj > 0 }; - } - - static boolean[] poscShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { pred > 0, obj > 0, subj > 0, context >= 0 }; - } - - static boolean[] pocsShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { pred > 0, obj > 0, context >= 0, subj > 0 }; - } - - static boolean[] pcsoShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { pred > 0, context >= 0, subj > 0, obj > 0 }; - } - - static boolean[] pcosShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { pred > 0, context >= 0, obj > 0, subj > 0 }; - } - - static boolean[] ospcShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { obj > 0, subj > 0, pred > 0, context >= 0 }; - } - - static boolean[] oscpShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { obj > 0, subj > 0, context >= 0, pred > 0 }; - } - - static boolean[] opscShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { obj > 0, pred > 0, subj > 0, context >= 0 }; - } - - static boolean[] opcsShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { obj > 0, pred > 0, context >= 0, subj > 0 }; - } - - static boolean[] ocspShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { obj > 0, context >= 0, subj > 0, pred > 0 }; - } - - static boolean[] ocpsShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { obj > 0, context >= 0, pred > 0, subj > 0 }; - } - - static boolean[] cspoShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { context >= 0, subj > 0, pred > 0, obj > 0 }; - } - - static boolean[] csopShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { context >= 0, subj > 0, obj > 0, pred > 0 }; - } - - static boolean[] cpsoShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { context >= 0, pred > 0, subj > 0, obj > 0 }; - } - - static boolean[] cposShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { context >= 0, pred > 0, obj > 0, subj > 0 }; - } - - static boolean[] cospShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { context >= 0, obj > 0, subj > 0, pred > 0 }; - } - - static boolean[] copsShouldMatch(long subj, long pred, long obj, long context) { - return new boolean[] { context >= 0, obj > 0, pred > 0, subj > 0 }; - } -} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/util/GroupMatcher.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/util/VarintMatcher.java similarity index 87% rename from core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/util/GroupMatcher.java rename to core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/util/VarintMatcher.java index 016090b949..19501bedf7 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/util/GroupMatcher.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/util/VarintMatcher.java @@ -7,8 +7,7 @@ * http://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause - ******************************************************************************/ - + *******************************************************************************/ package org.eclipse.rdf4j.sail.lmdb.util; import static org.eclipse.rdf4j.sail.lmdb.Varint.firstToLength; @@ -16,10 +15,9 @@ import java.nio.ByteBuffer; /** - * A matcher for partial equality tests of varint lists. + * Matcher for partial equality tests of varint lists in keys. */ -public class GroupMatcher { - +public class VarintMatcher { public static final Bytes.RegionComparator NULL_REGION_COMPARATOR = (a, b) -> true; private final int length0; private final int length1; @@ -35,14 +33,12 @@ public class GroupMatcher { private final byte firstByte3; private final MatchFn matcher; - public GroupMatcher(byte[] valueArray, boolean[] shouldMatch) { - assert shouldMatch.length == 4; - + public VarintMatcher(byte[] valueArray, boolean[] shouldMatch) { int baseOffset = 0; // Loop is unrolled for performance. Do not change back to a loop, do not extract into method, unless you // benchmark with QueryBenchmark first! - { + if (shouldMatch.length > 0) { byte fb = valueArray[0]; this.firstByte0 = fb; int len = firstToLength(fb); @@ -51,13 +47,15 @@ public GroupMatcher(byte[] valueArray, boolean[] shouldMatch) { this.cmp0 = Bytes.capturedComparator(valueArray, 0, len); } else { this.cmp0 = NULL_REGION_COMPARATOR; - ; } - baseOffset += len; + } else { + this.firstByte0 = 0; + this.length0 = 0; + this.cmp0 = NULL_REGION_COMPARATOR; } - { + if (shouldMatch.length > 1) { byte fb = valueArray[baseOffset]; this.firstByte1 = fb; int len = firstToLength(fb); @@ -70,8 +68,13 @@ public GroupMatcher(byte[] valueArray, boolean[] shouldMatch) { } baseOffset += len; + } else { + this.firstByte1 = 0; + this.length1 = 0; + this.cmp1 = NULL_REGION_COMPARATOR; } - { + + if (shouldMatch.length > 2) { byte fb = valueArray[baseOffset]; this.firstByte2 = fb; int len = firstToLength(fb); @@ -81,10 +84,14 @@ public GroupMatcher(byte[] valueArray, boolean[] shouldMatch) { } else { this.cmp2 = NULL_REGION_COMPARATOR; } - baseOffset += len; + } else { + this.firstByte2 = 0; + this.length2 = 0; + this.cmp2 = NULL_REGION_COMPARATOR; } - { + + if (shouldMatch.length > 3) { byte fb = valueArray[baseOffset]; this.firstByte3 = fb; int len = firstToLength(fb); @@ -95,6 +102,10 @@ public GroupMatcher(byte[] valueArray, boolean[] shouldMatch) { } else { this.cmp3 = NULL_REGION_COMPARATOR; } + } else { + this.firstByte3 = 0; + this.length3 = 0; + this.cmp3 = NULL_REGION_COMPARATOR; } this.matcher = selectMatcher(shouldMatch); @@ -112,16 +123,16 @@ private interface MatchFn { private MatchFn selectMatcher(boolean[] shouldMatch) { byte mask = 0; - if (shouldMatch[0]) { + if (shouldMatch.length > 0 && shouldMatch[0]) { mask |= 0b0001; } - if (shouldMatch[1]) { + if (shouldMatch.length > 1 && shouldMatch[1]) { mask |= 0b0010; } - if (shouldMatch[2]) { + if (shouldMatch.length > 2 && shouldMatch[2]) { mask |= 0b0100; } - if (shouldMatch[3]) { + if (shouldMatch.length > 3 && shouldMatch[3]) { mask |= 0b1000; } @@ -176,8 +187,7 @@ private boolean match0001(ByteBuffer other) { } private boolean match0010(ByteBuffer other) { - - skipAhead(other); + skipVarint(other); byte otherFirst1 = other.get(); if (firstByte1 == otherFirst1) { @@ -201,9 +211,8 @@ private boolean match0011(ByteBuffer other) { } private boolean match0100(ByteBuffer other) { - - skipAhead(other); - skipAhead(other); + skipVarint(other); + skipVarint(other); byte otherFirst2 = other.get(); if (firstByte2 == otherFirst2) { @@ -213,11 +222,10 @@ private boolean match0100(ByteBuffer other) { } private boolean match0101(ByteBuffer other) { - byte otherFirst0 = other.get(); if (firstByte0 == otherFirst0) { if (length0 == 1 || cmp0.equals(otherFirst0, other)) { - skipAhead(other); + skipVarint(other); byte otherFirst2 = other.get(); if (firstByte2 == otherFirst2) { @@ -229,8 +237,7 @@ private boolean match0101(ByteBuffer other) { } private boolean match0110(ByteBuffer other) { - - skipAhead(other); + skipVarint(other); byte otherFirst1 = other.get(); if (firstByte1 == otherFirst1) { @@ -244,7 +251,7 @@ private boolean match0110(ByteBuffer other) { return false; } - private void skipAhead(ByteBuffer other) { + private void skipVarint(ByteBuffer other) { int i = firstToLength(other.get()) - 1; assert i >= 0; if (i > 0) { @@ -253,7 +260,6 @@ private void skipAhead(ByteBuffer other) { } private boolean match0111(ByteBuffer other) { - byte otherFirst0 = other.get(); if (firstByte0 == otherFirst0) { if (length0 == 1 || cmp0.equals(otherFirst0, other)) { @@ -272,10 +278,9 @@ private boolean match0111(ByteBuffer other) { } private boolean match1000(ByteBuffer other) { - - skipAhead(other); - skipAhead(other); - skipAhead(other); + skipVarint(other); + skipVarint(other); + skipVarint(other); byte otherFirst3 = other.get(); if (firstByte3 == otherFirst3) { @@ -289,8 +294,8 @@ private boolean match1001(ByteBuffer other) { byte otherFirst0 = other.get(); if (firstByte0 == otherFirst0) { if (length0 == 1 || cmp0.equals(otherFirst0, other)) { - skipAhead(other); - skipAhead(other); + skipVarint(other); + skipVarint(other); byte otherFirst3 = other.get(); if (firstByte3 == otherFirst3) { @@ -302,12 +307,11 @@ private boolean match1001(ByteBuffer other) { } private boolean match1010(ByteBuffer other) { - - skipAhead(other); + skipVarint(other); byte otherFirst1 = other.get(); if (firstByte1 == otherFirst1) { if (length1 == 1 || cmp1.equals(otherFirst1, other)) { - skipAhead(other); + skipVarint(other); byte otherFirst3 = other.get(); if (firstByte3 == otherFirst3) { @@ -319,14 +323,13 @@ private boolean match1010(ByteBuffer other) { } private boolean match1011(ByteBuffer other) { - byte otherFirst0 = other.get(); if (firstByte0 == otherFirst0) { if (length0 == 1 || cmp0.equals(otherFirst0, other)) { byte otherFirst1 = other.get(); if (firstByte1 == otherFirst1) { if (length1 == 1 || cmp1.equals(otherFirst1, other)) { - skipAhead(other); + skipVarint(other); byte otherFirst3 = other.get(); if (firstByte3 == otherFirst3) { @@ -340,9 +343,8 @@ private boolean match1011(ByteBuffer other) { } private boolean match1100(ByteBuffer other) { - - skipAhead(other); - skipAhead(other); + skipVarint(other); + skipVarint(other); byte otherFirst2 = other.get(); if (firstByte2 == otherFirst2) { @@ -357,11 +359,10 @@ private boolean match1100(ByteBuffer other) { } private boolean match1101(ByteBuffer other) { - byte otherFirst0 = other.get(); if (firstByte0 == otherFirst0) { if (length0 == 1 || cmp0.equals(otherFirst0, other)) { - skipAhead(other); + skipVarint(other); byte otherFirst2 = other.get(); if (firstByte2 == otherFirst2) { @@ -378,8 +379,7 @@ private boolean match1101(ByteBuffer other) { } private boolean match1110(ByteBuffer other) { - - skipAhead(other); + skipVarint(other); byte otherFirst1 = other.get(); if (firstByte1 == otherFirst1) { @@ -421,4 +421,4 @@ private boolean match1111(ByteBuffer other) { return false; } -} +} \ No newline at end of file diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityTest.java index 685db90c83..3414dfa5a8 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityTest.java @@ -64,6 +64,7 @@ public void testCardinalities() throws Exception { tripleStore.storeTriple(resource, 2 + random.nextInt(1000) + 1, random.nextInt(100000) * (long) Math.pow(10, random.nextInt(4)), 1, true); } + tripleStore.storeTriple(resource, 2 + random.nextInt(1000) + 1, 3, 1, true); } tripleStore.commit(); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/GroupMatcherTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/EntryMatcherTest.java similarity index 86% rename from core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/GroupMatcherTest.java rename to core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/EntryMatcherTest.java index a259b0edd5..6c4e692667 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/GroupMatcherTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/EntryMatcherTest.java @@ -19,10 +19,10 @@ import java.util.Arrays; import java.util.List; -import org.eclipse.rdf4j.sail.lmdb.util.GroupMatcher; +import org.eclipse.rdf4j.sail.lmdb.util.EntryMatcher; import org.junit.jupiter.api.Test; -class GroupMatcherTest { +class EntryMatcherTest { private static final int FIELD_COUNT = 4; private static final int MAX_LENGTH = 9; @@ -39,16 +39,18 @@ void coversEveryMatcherMaskAcrossAllLengthCombinations() { for (byte[] valueLengths : ALL_LENGTH_COMBINATIONS) { final byte[] lengthsRef = valueLengths; long[] referenceValues = valuesForLengths(valueLengths); - GroupMatcher matcher = new GroupMatcher(encodeBE(referenceValues).duplicate().array(), shouldMatch); + EntryMatcher matcher = new EntryMatcher(2, encodeKey(referenceValues).duplicate().array(), + encodeValue(referenceValues).duplicate().array(), shouldMatch); for (CandidateStrategy strategy : CANDIDATE_STRATEGIES) { final CandidateStrategy strategyRef = strategy; - long[] candidateValues = buildCandidateValues(referenceValues, valueLengths, shouldMatch, strategy); - final long[] candidateCopy = candidateValues; - ByteBuffer matchBuffer = encode(candidateCopy); + final long[] candidateValues = buildCandidateValues(referenceValues, valueLengths, shouldMatch, + strategy); + ByteBuffer matchKey = encodeKey(candidateValues); + ByteBuffer matchValue = encodeValue(candidateValues); - assertTrue(matcher.matches(nativeOrder(matchBuffer.duplicate())), - () -> failureMessage("expected match", maskBits, lengthsRef, strategyRef, candidateCopy, + assertTrue(matcher.matches(nativeOrder(matchKey.duplicate()), nativeOrder(matchValue.duplicate())), + () -> failureMessage("expected match", maskBits, lengthsRef, strategyRef, candidateValues, null)); if (hasMatch(shouldMatch)) { @@ -57,13 +59,17 @@ void coversEveryMatcherMaskAcrossAllLengthCombinations() { continue; } for (MismatchType mismatchType : MismatchType.values()) { - long[] mismatchValues = createMismatch(candidateCopy, lengthsRef, index, mismatchType); + long[] mismatchValues = createMismatch(candidateValues, lengthsRef, index, + mismatchType); if (mismatchValues == null) { continue; } final long[] mismatchCopy = mismatchValues; - ByteBuffer mismatchBuffer = encode(mismatchCopy); - assertFalse(matcher.matches(nativeOrder(mismatchBuffer.duplicate())), + ByteBuffer mismatchKey = encodeKey(mismatchCopy); + ByteBuffer mismatchValue = encodeValue(mismatchCopy); + assertFalse( + matcher.matches(nativeOrder(mismatchKey.duplicate()), + nativeOrder(mismatchValue.duplicate())), () -> failureMessage("expected mismatch", maskBits, lengthsRef, strategyRef, mismatchCopy, mismatchType)); } @@ -116,23 +122,23 @@ private static int selectLengthIndex(byte[] lengths, int position, CandidateStra } } - private static ByteBuffer encode(long[] values) { - ByteBuffer buffer = ByteBuffer - .allocate(Varint.calcListLengthUnsigned(values[0], values[1], values[2], values[3])); + private static ByteBuffer encodeKey(long[] values) { + ByteBuffer buffer = ByteBuffer.allocate(Varint.calcLengthUnsigned(values[0]) + + Varint.calcLengthUnsigned(values[1])); buffer.order(ByteOrder.nativeOrder()); - for (long value : values) { - Varint.writeUnsigned(buffer, value); + for (int i = 0; i < 2; i++) { + Varint.writeUnsigned(buffer, values[i]); } buffer.flip(); return buffer; } - private static ByteBuffer encodeBE(long[] values) { + private static ByteBuffer encodeValue(long[] values) { ByteBuffer buffer = ByteBuffer - .allocate(Varint.calcListLengthUnsigned(values[0], values[1], values[2], values[3])); + .allocate(Varint.calcLengthUnsigned(values[2]) + Varint.calcLengthUnsigned(values[3])); buffer.order(ByteOrder.nativeOrder()); - for (long value : values) { - Varint.writeUnsigned(buffer, value); + for (int i = 2; i < 4; i++) { + Varint.writeUnsigned(buffer, values[i]); } buffer.flip(); return buffer; diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/GroupMatcherTest2.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/GroupMatcherTest2.java deleted file mode 100644 index 00f58e384a..0000000000 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/GroupMatcherTest2.java +++ /dev/null @@ -1,312 +0,0 @@ -///******************************************************************************* -// * Copyright (c) 2025 Eclipse RDF4J contributors. -// * -// * All rights reserved. This program and the accompanying materials -// * are made available under the terms of the Eclipse Distribution License v1.0 -// * which accompanies this distribution, and is available at -// * http://www.eclipse.org/org/documents/edl-v10.php. -// * -// * SPDX-License-Identifier: BSD-3-Clause -// ******************************************************************************/ -//package org.eclipse.rdf4j.sail.lmdb; -// -//import org.eclipse.rdf4j.sail.lmdb.util.GroupMatcher; -//import org.junit.jupiter.api.DynamicTest; -//import org.junit.jupiter.api.TestFactory; -// -//import java.nio.ByteBuffer; -//import java.util.ArrayList; -//import java.util.Arrays; -//import java.util.List; -//import java.util.Optional; -//import java.util.stream.IntStream; -//import java.util.stream.Stream; -// -//import static org.junit.jupiter.api.Assertions.assertFalse; -//import static org.junit.jupiter.api.Assertions.assertTrue; -// -//class GroupMatcherTest2 { -// -// private static final int FIELD_COUNT = 4; -// private static final int MAX_LENGTH = 9; -// -// private static final ValueVariants[] VALUE_VARIANTS = buildValueVariants(); -// private static final List ALL_LENGTH_COMBINATIONS = buildAllLengthCombinations(); -// private static final CandidateStrategy[] CANDIDATE_STRATEGIES = CandidateStrategy.values(); -// -// @TestFactory -// Stream coversEveryMatcherMaskAcrossAllLengthCombinations() { -// return IntStream.range(0, 1 << FIELD_COUNT) -// .mapToObj(Integer::valueOf) -// .flatMap(this::dynamicTestsForMask); -// } -// -// private Stream dynamicTestsForMask(int maskBits) { -// boolean[] shouldMatch = maskToArray(maskBits); -// return ALL_LENGTH_COMBINATIONS.stream() -// .flatMap(lengths -> dynamicTestsForLengths(maskBits, shouldMatch, lengths)); -// } -// -// private Stream dynamicTestsForLengths(int maskBits, boolean[] shouldMatch, byte[] valueLengths) { -// return Arrays.stream(CANDIDATE_STRATEGIES) -// .flatMap(strategy -> dynamicTestsForStrategy(maskBits, shouldMatch, valueLengths, strategy)); -// } -// -// private Stream dynamicTestsForStrategy(int maskBits, boolean[] shouldMatch, byte[] valueLengths, -// CandidateStrategy strategy) { -// long[] referenceValues = valuesForLengths(valueLengths); -// Stream matchTest = Stream.of(createMatchTest(maskBits, shouldMatch, valueLengths, strategy, -// referenceValues)); -// Stream mismatchTests = hasMatch(shouldMatch) -// ? dynamicMismatchTests(maskBits, shouldMatch, valueLengths, referenceValues, strategy) -// : Stream.empty(); -// return Stream.concat(matchTest, mismatchTests); -// } -// -// private DynamicTest createMatchTest(int maskBits, boolean[] shouldMatch, byte[] valueLengths, -// CandidateStrategy strategy, long[] referenceValues) { -// String displayName = "match mask=" + toMask(maskBits) + ", valueLengths=" -// + Arrays.toString(toIntArray(valueLengths)) -// + ", strategy=" + strategy; -// return DynamicTest.dynamicTest(displayName, () -> { -// boolean[] shouldMatchCopy = Arrays.copyOf(shouldMatch, shouldMatch.length); -// GroupMatcher matcher = new GroupMatcher(encode(referenceValues).duplicate(), shouldMatchCopy); -// long[] candidateValues = buildCandidateValues(referenceValues, valueLengths, shouldMatchCopy, strategy); -// assertTrue(matcher.matches(encode(candidateValues).duplicate()), -// () -> failureMessage("expected match", maskBits, valueLengths, strategy, candidateValues, null)); -// }); -// } -// -// private Stream dynamicMismatchTests(int maskBits, boolean[] shouldMatch, byte[] valueLengths, -// long[] referenceValues, CandidateStrategy strategy) { -// return IntStream.range(0, FIELD_COUNT) -// .filter(index -> shouldMatch[index]) -// .mapToObj(Integer::valueOf) -// .flatMap(index -> Arrays.stream(MismatchType.values()) -// .map(mismatchType -> createMismatchTest(maskBits, shouldMatch, valueLengths, referenceValues, -// strategy, -// index, mismatchType)) -// .flatMap(Optional::stream)); -// } -// -// private Optional createMismatchTest(int maskBits, boolean[] shouldMatch, byte[] valueLengths, -// long[] referenceValues, CandidateStrategy strategy, int index, MismatchType mismatchType) { -// long[] candidateValues = buildCandidateValues(referenceValues, valueLengths, shouldMatch, strategy); -// long[] mismatchValues = createMismatch(candidateValues, valueLengths, index, mismatchType); -// if (mismatchValues == null) { -// return Optional.empty(); -// } -// String displayName = "mismatch mask=" + toMask(maskBits) + ", valueLengths=" -// + Arrays.toString(toIntArray(valueLengths)) + ", strategy=" + strategy + ", index=" + index + ", type=" -// + mismatchType; -// return Optional.of(DynamicTest.dynamicTest(displayName, () -> { -// boolean[] shouldMatchCopy = Arrays.copyOf(shouldMatch, shouldMatch.length); -// GroupMatcher matcher = new GroupMatcher(encode(referenceValues).duplicate(), shouldMatchCopy); -// assertFalse(matcher.matches(encode(mismatchValues).duplicate()), -// () -> failureMessage("expected mismatch", maskBits, valueLengths, strategy, mismatchValues, -// mismatchType)); -// })); -// } -// -// private static long[] valuesForLengths(byte[] lengthIndices) { -// long[] values = new long[FIELD_COUNT]; -// for (int i = 0; i < FIELD_COUNT; i++) { -// int lengthIndex = Byte.toUnsignedInt(lengthIndices[i]); -// values[i] = VALUE_VARIANTS[lengthIndex].base; -// } -// return values; -// } -// -// private static long[] buildCandidateValues(long[] referenceValues, byte[] valueLengths, boolean[] shouldMatch, -// CandidateStrategy strategy) { -// long[] candidateValues = new long[FIELD_COUNT]; -// for (int i = 0; i < FIELD_COUNT; i++) { -// if (shouldMatch[i]) { -// candidateValues[i] = referenceValues[i]; -// } else { -// int lengthIndex = selectLengthIndex(valueLengths, i, strategy); -// candidateValues[i] = VALUE_VARIANTS[lengthIndex].nonMatchingSameLength; -// } -// } -// return candidateValues; -// } -// -// private static int selectLengthIndex(byte[] lengths, int position, CandidateStrategy strategy) { -// int base = Byte.toUnsignedInt(lengths[position]); -// switch (strategy) { -// case SAME_LENGTHS: -// return base; -// case ROTATED_LENGTHS: -// return Byte.toUnsignedInt(lengths[(position + 1) % FIELD_COUNT]); -// case INCREMENTED_LENGTHS: -// return base == MAX_LENGTH ? 1 : base + 1; -// default: -// throw new IllegalStateException("Unsupported strategy: " + strategy); -// } -// } -// -// private static ByteBuffer encode(long[] values) { -// ByteBuffer buffer = ByteBuffer -// .allocate(Varint.calcListLengthUnsigned(values[0], values[1], values[2], values[3])); -// for (long value : values) { -// Varint.writeUnsigned(buffer, value); -// } -// buffer.flip(); -// return buffer; -// } -// -// private static boolean[] maskToArray(int mask) { -// boolean[] shouldMatch = new boolean[FIELD_COUNT]; -// for (int i = 0; i < FIELD_COUNT; i++) { -// shouldMatch[i] = (mask & (1 << i)) != 0; -// } -// return shouldMatch; -// } -// -// private static boolean hasMatch(boolean[] shouldMatch) { -// for (boolean flag : shouldMatch) { -// if (flag) { -// return true; -// } -// } -// return false; -// } -// -// private static int firstMatchedIndex(boolean[] shouldMatch) { -// for (int i = 0; i < FIELD_COUNT; i++) { -// if (shouldMatch[i]) { -// return i; -// } -// } -// return -1; -// } -// -// private static List buildAllLengthCombinations() { -// List combos = new ArrayList<>((int) Math.pow(MAX_LENGTH, FIELD_COUNT)); -// buildCombos(combos, new byte[FIELD_COUNT], 0); -// return combos; -// } -// -// private static void buildCombos(List combos, byte[] current, int index) { -// if (index == FIELD_COUNT) { -// combos.add(current.clone()); -// return; -// } -// for (int len = 1; len <= MAX_LENGTH; len++) { -// current[index] = (byte) len; -// buildCombos(combos, current, index + 1); -// } -// } -// -// private static String failureMessage(String expectation, int mask, byte[] valueLengths, CandidateStrategy strategy, -// long[] candidateValues, MismatchType mismatchType) { -// return expectation + " for mask " + toMask(mask) + ", valueLengths=" + Arrays.toString(toIntArray(valueLengths)) -// + ", strategy=" + strategy -// + (mismatchType == null ? "" : ", mismatchType=" + mismatchType) -// + ", candidate=" + Arrays.toString(candidateValues); -// } -// -// private static String toMask(int mask) { -// return String.format("%4s", Integer.toBinaryString(mask)).replace(' ', '0'); -// } -// -// private static int[] toIntArray(byte[] values) { -// int[] ints = new int[values.length]; -// for (int i = 0; i < values.length; i++) { -// ints[i] = Byte.toUnsignedInt(values[i]); -// } -// return ints; -// } -// -// private static long[] createMismatch(long[] baseCandidate, byte[] valueLengths, int index, -// MismatchType mismatchType) { -// int lengthIndex = Byte.toUnsignedInt(valueLengths[index]); -// ValueVariants variants = VALUE_VARIANTS[lengthIndex]; -// long replacement; -// switch (mismatchType) { -// case SAME_FIRST_BYTE: -// if (variants.sameFirstVariant == null) { -// return null; -// } -// replacement = variants.sameFirstVariant; -// break; -// case DIFFERENT_FIRST_BYTE: -// replacement = variants.differentFirstVariant; -// break; -// default: -// throw new IllegalStateException("Unsupported mismatch type: " + mismatchType); -// } -// if (replacement == baseCandidate[index]) { -// return null; -// } -// long[] mismatch = baseCandidate.clone(); -// mismatch[index] = replacement; -// return mismatch; -// } -// -// private static ValueVariants[] buildValueVariants() { -// ValueVariants[] variants = new ValueVariants[MAX_LENGTH + 1]; -// variants[1] = new ValueVariants(42L, 99L, null, 99L); -// variants[2] = new ValueVariants(241L, 330L, 330L, 600L); -// variants[3] = new ValueVariants(50_000L, 60_000L, 60_000L, 70_000L); -// variants[4] = new ValueVariants(1_048_576L, 1_048_577L, 1_048_577L, 16_777_216L); -// variants[5] = new ValueVariants(16_777_216L, 16_777_217L, 16_777_217L, 4_294_967_296L); -// variants[6] = new ValueVariants(4_294_967_296L, 4_294_967_297L, 4_294_967_297L, 1_099_511_627_776L); -// variants[7] = new ValueVariants(1_099_511_627_776L, 1_099_511_627_777L, 1_099_511_627_777L, -// 281_474_976_710_656L); -// variants[8] = new ValueVariants(281_474_976_710_656L, 281_474_976_710_657L, 281_474_976_710_657L, -// 72_057_594_037_927_936L); -// variants[9] = new ValueVariants(72_057_594_037_927_936L, 72_057_594_037_927_937L, -// 72_057_594_037_927_937L, 281_474_976_710_656L); -// -// for (int len = 1; len <= MAX_LENGTH; len++) { -// ValueVariants v = variants[len]; -// if (Varint.calcLengthUnsigned(v.base) != len) { -// throw new IllegalStateException("Unexpected length for base value " + v.base + " (len=" + len + ")"); -// } -// if (Varint.calcLengthUnsigned(v.nonMatchingSameLength) != len) { -// throw new IllegalStateException( -// "Unexpected length for same-length variant " + v.nonMatchingSameLength + " (len=" + len + ")"); -// } -// if (v.sameFirstVariant != null && firstByte(v.sameFirstVariant.longValue()) != firstByte(v.base)) { -// throw new IllegalStateException("Expected same-first variant to share header for length " + len); -// } -// if (firstByte(v.differentFirstVariant) == firstByte(v.base)) { -// throw new IllegalStateException("Expected different-first variant to differ for length " + len); -// } -// } -// -// return variants; -// } -// -// private static byte firstByte(long value) { -// ByteBuffer buffer = ByteBuffer.allocate(Varint.calcLengthUnsigned(value)); -// Varint.writeUnsigned(buffer, value); -// return buffer.array()[0]; -// } -// -// private static final class ValueVariants { -// final long base; -// final long nonMatchingSameLength; -// final Long sameFirstVariant; -// final long differentFirstVariant; -// -// ValueVariants(long base, long nonMatchingSameLength, Long sameFirstVariant, long differentFirstVariant) { -// this.base = base; -// this.nonMatchingSameLength = nonMatchingSameLength; -// this.sameFirstVariant = sameFirstVariant; -// this.differentFirstVariant = differentFirstVariant; -// } -// } -// -// private enum MismatchType { -// SAME_FIRST_BYTE, -// DIFFERENT_FIRST_BYTE -// } -// -// private enum CandidateStrategy { -// SAME_LENGTHS, -// ROTATED_LENGTHS, -// INCREMENTED_LENGTHS -// } -//} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java index 0e82f5246f..63bfecfc52 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java @@ -22,8 +22,8 @@ import org.junit.jupiter.api.io.TempDir; /** - * Focused tests that directly exercise TripleStore.TripleIndex#toKey to provide coverage for behavior-neutral - * optimizations such as internal key encoding caching. + * Focused tests that directly exercise TripleStore.TripleIndex#toEntry to provide coverage for behavior-neutral + * optimizations such as internal encoding caching. */ class TripleIndexToKeyCacheTest { @@ -50,22 +50,25 @@ void spoc_subjectBound_othersWildcard() throws Exception { long obj = Long.MAX_VALUE; long context = Long.MAX_VALUE; - TripleStore.TripleIndex index = tripleStore.new TripleIndex("spoc"); + TripleStore.TripleIndex index = tripleStore.new TripleIndex("spoc", 2); - int len = Varint.calcListLengthUnsigned(subj, pred, obj, context); - ByteBuffer actual = ByteBuffer.allocate(len); - index.toKey(actual, subj, pred, obj, context); - actual.flip(); + ByteBuffer actualKey = ByteBuffer.allocate(Varint.calcLengthUnsigned(subj) + Varint.calcLengthUnsigned(pred)); + ByteBuffer actualValue = ByteBuffer.allocate(2 * (Long.BYTES + 1)); + index.toEntry(actualKey, actualValue, subj, pred, obj, context); + actualKey.flip(); + actualValue.flip(); // Expected: varints in spoc order - ByteBuffer expected = ByteBuffer.allocate(len); - Varint.writeUnsigned(expected, subj); - Varint.writeUnsigned(expected, pred); - Varint.writeUnsigned(expected, obj); - Varint.writeUnsigned(expected, context); - expected.flip(); - - assertArrayEquals(expected.array(), actual.array()); + ByteBuffer expectedKey = ByteBuffer.allocate(actualKey.capacity()); + Varint.writeUnsigned(expectedKey, subj); + Varint.writeUnsigned(expectedKey, pred); + ByteBuffer expectedValue = ByteBuffer.allocate(actualValue.capacity()); + Varint.writeUnsigned(expectedValue, obj); + Varint.writeUnsigned(expectedValue, context); + expectedValue.position(actualValue.capacity()); + + assertArrayEquals(expectedKey.array(), actualKey.array()); + assertArrayEquals(expectedValue.array(), actualValue.array()); } @Test @@ -76,21 +79,24 @@ void posc_predicateBound_othersWildcard() throws Exception { long obj = Long.MAX_VALUE; long context = Long.MAX_VALUE; - TripleStore.TripleIndex index = tripleStore.new TripleIndex("posc"); + TripleStore.TripleIndex index = tripleStore.new TripleIndex("posc", 2); - int len = Varint.calcListLengthUnsigned(subj, pred, obj, context); - ByteBuffer actual = ByteBuffer.allocate(len); - index.toKey(actual, subj, pred, obj, context); - actual.flip(); + ByteBuffer actualKey = ByteBuffer.allocate(Varint.calcLengthUnsigned(subj) + Varint.calcLengthUnsigned(pred)); + ByteBuffer actualValue = ByteBuffer.allocate(2 * (Long.BYTES + 1)); + index.toEntry(actualKey, actualValue, subj, pred, obj, context); + actualKey.flip(); + actualValue.flip(); - // Expected: varints in posc order - ByteBuffer expected = ByteBuffer.allocate(len); - Varint.writeUnsigned(expected, pred); - Varint.writeUnsigned(expected, obj); - Varint.writeUnsigned(expected, subj); - Varint.writeUnsigned(expected, context); - expected.flip(); - - assertArrayEquals(expected.array(), actual.array()); + // Expected: varints in spoc order + ByteBuffer expectedKey = ByteBuffer.allocate(actualKey.capacity()); + Varint.writeUnsigned(expectedKey, pred); + Varint.writeUnsigned(expectedKey, obj); + ByteBuffer expectedValue = ByteBuffer.allocate(actualValue.capacity()); + Varint.writeUnsigned(expectedValue, subj); + Varint.writeUnsigned(expectedValue, context); + expectedValue.position(actualValue.capacity()); + + assertArrayEquals(expectedKey.array(), actualKey.array()); + assertArrayEquals(expectedValue.array(), actualValue.array()); } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/VarintTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/VarintTest.java index fef3bd6731..3b569d982d 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/VarintTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/VarintTest.java @@ -122,21 +122,6 @@ public void testVarintSequential() { } - @Test - public void testVarintList() { - ByteBuffer bb = ByteBuffer.allocate(2 + 4 * Long.BYTES).order(ByteOrder.nativeOrder()); - for (int i = 0; i < values.length - 4; i++) { - long[] expected = new long[4]; - System.arraycopy(values, 0, expected, 0, 4); - bb.clear(); - Varint.writeListUnsigned(bb, expected); - bb.flip(); - long[] actual = new long[4]; - Varint.readListUnsigned(bb, actual); - assertArrayEquals("Encoded and decoded value should be equal", expected, actual); - } - } - @Test public void testVarintReadUnsignedAtPositionThreeByteEncoding() { long value = 3000L;