diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/btree/BTreeReader.java b/marginalia_nu/src/main/java/nu/marginalia/util/btree/BTreeReader.java index ec8f204b..de675776 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/util/btree/BTreeReader.java +++ b/marginalia_nu/src/main/java/nu/marginalia/util/btree/BTreeReader.java @@ -11,94 +11,68 @@ public class BTreeReader { private final MultimapFileLong file; private final BTreeContext ctx; + private final Logger logger = LoggerFactory.getLogger(BTreeReader.class); - private final long mask; - private final MultimapSearcher searcher; + + private final MultimapSearcher indexSearcher; + private final MultimapSearcher dataSearcher; public BTreeReader(MultimapFileLong file, BTreeContext ctx) { this.file = file; - this.searcher = file.createSearcher(); + this.indexSearcher = MultimapSearcher.forContext(file, ~0, 1); + this.dataSearcher = MultimapSearcher.forContext(file, ctx.equalityMask(), ctx.entrySize()); + this.ctx = ctx; - this.mask = ctx.equalityMask(); } - public long fileSize() { - return file.size(); + public BTreeHeader getHeader(long fileOffset) { + return new BTreeHeader(file.get(fileOffset), file.get(fileOffset+1), file.get(fileOffset+2)); } - public BTreeHeader getHeader(long offset) { - return new BTreeHeader(file.get(offset), file.get(offset+1), file.get(offset+2)); - } + /** + * + * @return file offset of entry matching keyRaw, negative if absent + */ + public long findEntry(BTreeHeader header, final long keyRaw) { + final long key = keyRaw & ctx.equalityMask(); - public long offsetForEntry(BTreeHeader header, final long keyRaw) { - final long key = keyRaw & mask; + final long dataAddress = header.dataOffsetLongs(); + final int entrySize = ctx.entrySize(); + final int blockSize = ctx.BLOCK_SIZE_WORDS(); - if (header.layers() == 0) { - return trivialSearch(header, key); + if (header.layers() == 0) { // For small data, we only have a data block + return dataSearcher.binarySearchUpperBound(key, dataAddress, header.numEntries()); } - long p = searchEntireTopLayer(header, key); - if (p < 0) return -1; + final long indexOffset = header.indexOffsetLongs(); - long cumOffset = p * ctx.BLOCK_SIZE_WORDS(); + // Search the top layer + long layerOffset = indexSearch(key, indexOffset, blockSize); + if (layerOffset < 0) return -1; + + // Search intermediary layers for (int i = header.layers() - 2; i >= 0; --i) { - long offsetBase = header.indexOffsetLongs() + header.relativeLayerOffset(ctx, i); - p = searchLayerBlock(key, offsetBase+cumOffset); - if (p < 0) + final long layerAddressBase = indexOffset + header.relativeIndexLayerOffset(ctx, i); + final long layerBlockOffset = layerAddressBase + layerOffset; + + final long nextLayerOffset = indexSearch(key, layerBlockOffset, blockSize); + if (nextLayerOffset < 0) return -1; - cumOffset = ctx.BLOCK_SIZE_WORDS()*(p + cumOffset); + + layerOffset = blockSize*(nextLayerOffset + layerOffset); } - long dataMax = header.dataOffsetLongs() + (long) header.numEntries() * ctx.entrySize(); - return searchDataBlock(key, - header.dataOffsetLongs() + ctx.entrySize()*cumOffset, - dataMax); + // Search the corresponding data block + final long searchStart = dataAddress + layerOffset * entrySize; + final long lastDataAddress = dataAddress + (long) header.numEntries() * entrySize; + final long lastItemInBlockAddress = searchStart + (long) blockSize * entrySize; + final long searchEnd = Math.min(lastItemInBlockAddress, lastDataAddress); + + return dataSearcher.binarySearchUpperBound(key, searchStart, (searchEnd - searchStart) / entrySize); } - - private long searchEntireTopLayer(BTreeHeader header, long key) { - long offset = header.indexOffsetLongs(); - - return searcher.binarySearchUpperBound(key, offset, offset + ctx.BLOCK_SIZE_WORDS()) - offset; - } - - private long searchLayerBlock(long key, long blockOffset) { - if (blockOffset < 0) - return blockOffset; - - return searcher.binarySearchUpperBound(key, blockOffset, blockOffset + ctx.BLOCK_SIZE_WORDS()) - blockOffset; - } - - - private long searchDataBlock(long key, long blockOffset, long dataMax) { - if (blockOffset < 0) - return blockOffset; - - long lastOffset = Math.min(blockOffset+ctx.BLOCK_SIZE_WORDS()*(long)ctx.entrySize(), dataMax); - int length = (int)(lastOffset - blockOffset); - - if (ctx.entrySize() == 1) { - if (mask == ~0L) return searcher.binarySearchUpperBoundNoMiss(key, blockOffset, blockOffset+length); - return searcher.binarySearchUpperBoundNoMiss(key, blockOffset, blockOffset+length, mask); - } - - return searcher.binarySearchUpperBoundNoMiss(key, blockOffset, ctx.entrySize(), length/ctx.entrySize(), mask); - } - - private long trivialSearch(BTreeHeader header, long key) { - long offset = header.dataOffsetLongs(); - - if (ctx.entrySize() == 1) { - if (mask == ~0L) { - return searcher.binarySearchUpperBoundNoMiss(key, offset, offset+header.numEntries()); - } - else { - return searcher.binarySearchUpperBoundNoMiss(key, offset, offset+header.numEntries(), mask); - } - } - - return searcher.binarySearchUpperBoundNoMiss(key, offset, ctx.entrySize(), header.numEntries(), mask); - + private long indexSearch(long key, long start, long n) { + return indexSearcher.binarySearch(key, start, n) - start; } } diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/btree/BTreeWriter.java b/marginalia_nu/src/main/java/nu/marginalia/util/btree/BTreeWriter.java index b43faca7..0c1f0789 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/util/btree/BTreeWriter.java +++ b/marginalia_nu/src/main/java/nu/marginalia/util/btree/BTreeWriter.java @@ -2,16 +2,12 @@ package nu.marginalia.util.btree; import nu.marginalia.util.btree.model.BTreeContext; import nu.marginalia.util.btree.model.BTreeHeader; -import nu.marginalia.util.multimap.MultimapFileLong; import nu.marginalia.util.multimap.MultimapFileLongSlice; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import java.io.IOException; public class BTreeWriter { - private final Logger logger = LoggerFactory.getLogger(BTreeWriter.class); private final BTreeContext ctx; private final MultimapFileLongSlice map; @@ -27,7 +23,7 @@ public class BTreeWriter { long size = 0; for (int layer = 0; layer < numLayers; layer++) { - size += ctx.layerSize(numWords, layer); + size += ctx.indexLayerSize(numWords, layer); } return size; } @@ -45,17 +41,17 @@ public class BTreeWriter { writeIndexCallback.write(map.atOffset(header.dataOffsetLongs())); - if (header.layers() < 1) { + if (header.layers() < 1) { // The data is too small to benefit from indexing + return ctx.calculateSize(numEntries); + } + else { + writeIndex(header); return ctx.calculateSize(numEntries); } - - writeIndex(header); - - return ctx.calculateSize(numEntries); } public static BTreeHeader makeHeader(BTreeContext ctx, long offset, int numEntries) { - final int numLayers = ctx.numLayers(numEntries); + final int numLayers = ctx.numIndexLayers(numEntries); final int padding = BTreeHeader.getPadding(ctx, offset, numLayers); @@ -71,46 +67,50 @@ public class BTreeWriter { private void writeIndex(BTreeHeader header) { - var layerOffsets = getRelativeLayerOffsets(header); + var layerOffsets = header.getRelativeLayerOffsets(ctx); - long stride = ctx.BLOCK_SIZE_WORDS(); + long indexedDataStepSize = ctx.BLOCK_SIZE_WORDS(); + + /* Index layer 0 indexes the data itself + Index layer 1 indexes layer 0 + Index layer 2 indexes layer 1 + And so on + */ for (int layer = 0; layer < header.layers(); layer++, - stride*=ctx.BLOCK_SIZE_WORDS()) { - long indexWord = 0; - long offsetBase = layerOffsets[layer] + header.indexOffsetLongs(); - long numEntries = header.numEntries(); - for (long idx = 0; idx < numEntries; idx += stride, indexWord++) { - long dataOffset = header.dataOffsetLongs() + (idx + (stride-1)) * ctx.entrySize(); - long val; + indexedDataStepSize*=ctx.BLOCK_SIZE_WORDS()) { - if (idx + (stride-1) < numEntries) { - val = map.get(dataOffset) & ctx.equalityMask(); - } - else { - val = Long.MAX_VALUE; - } - if (offsetBase + indexWord < 0) { - logger.error("bad put @ {}", offsetBase + indexWord); - logger.error("layer{}", layer); - logger.error("layer offsets {}", layerOffsets); - logger.error("offsetBase = {}", offsetBase); - logger.error("numEntries = {}", numEntries); - logger.error("indexWord = {}", indexWord); - } - map.put(offsetBase + indexWord, val); - } - for (; (indexWord % ctx.BLOCK_SIZE_WORDS()) != 0; indexWord++) { - map.put(offsetBase + indexWord, Long.MAX_VALUE); - } + writeIndexLayer(header, layerOffsets, indexedDataStepSize, layer); } } - private long[] getRelativeLayerOffsets(BTreeHeader header) { - long[] layerOffsets = new long[header.layers()]; - for (int i = 0; i < header.layers(); i++) { - layerOffsets[i] = header.relativeLayerOffset(ctx, i); + private void writeIndexLayer(BTreeHeader header, long[] layerOffsets, + final long indexedDataStepSize, + final int layer) { + + final long indexOffsetBase = layerOffsets[layer] + header.indexOffsetLongs(); + final long dataOffsetBase = header.dataOffsetLongs(); + + final long dataEntriesMax = header.numEntries(); + final int entrySize = ctx.entrySize(); + + final long lastDataEntryOffset = indexedDataStepSize - 1; + + long indexWord = 0; + + for (long dataPtr = 0; + dataPtr + lastDataEntryOffset < dataEntriesMax; + dataPtr += indexedDataStepSize) + { + long dataOffset = dataOffsetBase + (dataPtr + lastDataEntryOffset) * entrySize; + map.put(indexOffsetBase + indexWord++, map.get(dataOffset) & ctx.equalityMask()); } - return layerOffsets; + + // Fill the remaining block with LONG_MAX + map.setRange(indexOffsetBase+indexWord, + (int) (ctx.BLOCK_SIZE_WORDS() - (indexWord % ctx.BLOCK_SIZE_WORDS())), + Long.MAX_VALUE); } + + } diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/btree/model/BTreeContext.java b/marginalia_nu/src/main/java/nu/marginalia/util/btree/model/BTreeContext.java index 4655946c..a7d6b22b 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/util/btree/model/BTreeContext.java +++ b/marginalia_nu/src/main/java/nu/marginalia/util/btree/model/BTreeContext.java @@ -10,7 +10,6 @@ public record BTreeContext(int MAX_LAYERS, public BTreeContext(int MAX_LAYERS, int entrySize, long equalityMask, int BLOCK_SIZE_BITS) { this(MAX_LAYERS, entrySize, equalityMask, BLOCK_SIZE_BITS, 1 << BLOCK_SIZE_BITS); - } public long calculateSize(int numEntries) { @@ -19,7 +18,7 @@ public record BTreeContext(int MAX_LAYERS, return header.dataOffsetLongs() + (long)numEntries * entrySize; } - public int numLayers(int numEntries) { + public int numIndexLayers(int numEntries) { if (numEntries <= BLOCK_SIZE_WORDS*2) { return 0; } @@ -36,11 +35,7 @@ public record BTreeContext(int MAX_LAYERS, return MAX_LAYERS; } - public long layerSize(int numEntries, int level) { - return BLOCK_SIZE_WORDS * numBlocks(numEntries, level); - } - - private long numBlocks(int numWords, int level) { + public long indexLayerSize(int numWords, int level) { long layerSize = 1L<<(BLOCK_SIZE_BITS*(level+1)); int numBlocks = 0; @@ -50,7 +45,7 @@ public record BTreeContext(int MAX_LAYERS, numBlocks++; } - return numBlocks; + return (long) BLOCK_SIZE_WORDS * numBlocks; } } diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/btree/model/BTreeHeader.java b/marginalia_nu/src/main/java/nu/marginalia/util/btree/model/BTreeHeader.java index 8d68b424..8cdcd355 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/util/btree/model/BTreeHeader.java +++ b/marginalia_nu/src/main/java/nu/marginalia/util/btree/model/BTreeHeader.java @@ -1,6 +1,5 @@ package nu.marginalia.util.btree.model; -import nu.marginalia.util.multimap.MultimapFileLong; import nu.marginalia.util.multimap.MultimapFileLongSlice; public record BTreeHeader(int layers, int numEntries, long indexOffsetLongs, long dataOffsetLongs) { @@ -36,12 +35,20 @@ public record BTreeHeader(int layers, int numEntries, long indexOffsetLongs, lon } - public long relativeLayerOffset(BTreeContext ctx, int n) { + public long relativeIndexLayerOffset(BTreeContext ctx, int n) { long offset = 0; for (int i = n+1; i < layers; i++) { - offset += ctx.layerSize( numEntries, i); + offset += ctx.indexLayerSize( numEntries, i); } return offset; } + public long[] getRelativeLayerOffsets(BTreeContext ctx) { + long[] layerOffsets = new long[layers()]; + for (int i = 0; i < layers(); i++) { + layerOffsets[i] = relativeIndexLayerOffset(ctx, i); + } + return layerOffsets; + } + } diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLong.java b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLong.java index e9a9b4fe..00ccd82c 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLong.java +++ b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLong.java @@ -97,8 +97,8 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice { readableSize(mapSizeBytes), readableSize(8L*bufferSizeWords), mode); } - public MultimapSearcher createSearcher() { - return new MultimapSearcher(this); + public MultimapSearcherBase createSearcher() { + return new MultimapSearcherBase(this); } public MultimapSorter createSorter(Path tmpFile, int internalSortLimit) { return new MultimapSorter(this, tmpFile, internalSortLimit); @@ -332,6 +332,34 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice { } + @Override + public void setRange(long idx, int n, long val) { + if (n == 0) return; + + if (idx+n >= mappedSize) { + grow(idx+n); + } + int iN = (int)((idx + n) / bufferSize); + + for (int i = 0; i < n; ) { + int i0 = (int)((idx + i) / bufferSize); + + int bufferOffset = (int) ((idx+i) % bufferSize); + var buffer = buffers.get(i0); + + final int l; + + if (i0 < iN) l = bufferSize - bufferOffset; + else l = Math.min(n - i, bufferSize - bufferOffset); + + for (int p = 0; p < l; p++) { + buffer.put(bufferOffset + p, val); + } + + i+=l; + } + } + @Override public void transferFromFileChannel(FileChannel sourceChannel, long destOffset, long sourceStart, long sourceEnd) throws IOException { diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLongOffsetSlice.java b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLongOffsetSlice.java index bd35bd9b..f379d1c6 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLongOffsetSlice.java +++ b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLongOffsetSlice.java @@ -23,6 +23,11 @@ public class MultimapFileLongOffsetSlice implements MultimapFileLongSlice { map.put(off+idx, val); } + @Override + public void setRange(long idx, int n, long val) { + map.setRange(off+idx, n, val); + } + @Override public long get(long idx) { return map.get(off+idx); diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLongSlice.java b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLongSlice.java index 27d6ae06..29f9994d 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLongSlice.java +++ b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLongSlice.java @@ -9,6 +9,8 @@ public interface MultimapFileLongSlice { void put(long idx, long val); + void setRange(long idx, int n, long val); + long get(long idx); void read(long[] vals, long idx); diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapSearcher.java b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapSearcher.java index 005888d8..886912c5 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapSearcher.java +++ b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapSearcher.java @@ -1,128 +1,80 @@ package nu.marginalia.util.multimap; -import lombok.experimental.Delegate; +public interface MultimapSearcher { + long binarySearch(long key, long fromIndex, long n); + long binarySearchUpperBound(long key, long fromIndex, long n); -public class MultimapSearcher { - @Delegate - private final MultimapFileLongSlice mmf; - - public MultimapSearcher(MultimapFileLongSlice mmf) { - this.mmf = mmf; - } - - public boolean binarySearch(long key, long fromIndex, long toIndex) { - - long low = fromIndex; - long high = toIndex - 1; - - while (low <= high) { - long mid = (low + high) >>> 1; - long midVal = get(mid); - - if (midVal < key) - low = mid + 1; - else if (midVal > key) - high = mid - 1; - else - return true; // key found + static MultimapSearcher forContext(MultimapFileLongSlice slice, long mask, int stepSize) { + if (mask == ~0L && stepSize == 1) { + return new SimpleMultimapSearcher(new MultimapSearcherBase(slice)); } - return false; // key not found. - } - - public long binarySearchUpperBound(long key, long fromIndex, long toIndex) { - - long low = fromIndex; - long high = toIndex - 1; - - while (low <= high) { - long mid = (low + high) >>> 1; - long midVal = get(mid); - - if (midVal < key) - low = mid + 1; - else if (midVal > key) - high = mid - 1; - else - return mid; + else if (stepSize == 1) { + return new MaskedMultimapSearcher(new MultimapSearcherBase(slice), mask); } - return low; - } - - public long binarySearchUpperBound(long key, long fromIndex, long toIndex, long mask) { - - long low = fromIndex; - long high = toIndex - 1; - - while (low <= high) { - long mid = (low + high) >>> 1; - long midVal = get(mid) & mask; - - if (midVal < key) - low = mid + 1; - else if (midVal > key) - high = mid - 1; - else - return mid; + else { + return new SteppingMaskedMultimapSearcher(new MultimapSearcherBase(slice), mask, stepSize); } - return low; - } - - public long binarySearchUpperBoundNoMiss(long key, long fromIndex, long toIndex) { - - long low = fromIndex; - long high = toIndex - 1; - - while (low <= high) { - long mid = (low + high) >>> 1; - long midVal = get(mid); - - if (midVal < key) - low = mid + 1; - else if (midVal > key) - high = mid - 1; - else - return mid; - } - return -1; - } - - - public long binarySearchUpperBoundNoMiss(long key, long fromIndex, long toIndex, long mask) { - - long low = fromIndex; - long high = toIndex - 1; - - while (low <= high) { - long mid = (low + high) >>> 1; - long midVal = get(mid) & mask; - - if (midVal < key) - low = mid + 1; - else if (midVal > key) - high = mid - 1; - else - return mid; - } - return -1; - } - - - public long binarySearchUpperBoundNoMiss(long key, long fromIndex, long step, long steps, long mask) { - - long low = 0; - long high = steps - 1; - - while (low <= high) { - long mid = (low + high) >>> 1; - long midVal = get(fromIndex + mid*step) & mask; - - if (midVal < key) - low = mid + 1; - else if (midVal > key) - high = mid - 1; - else - return fromIndex + mid*step; - } - return -1; } } + +class SimpleMultimapSearcher implements MultimapSearcher { + private final MultimapSearcherBase base; + + SimpleMultimapSearcher(MultimapSearcherBase base) { + this.base = base; + } + + @Override + public long binarySearch(long key, long fromIndex, long n) { + return base.binarySearchOffset(key, fromIndex, n); + } + + @Override + public long binarySearchUpperBound(long key, long fromIndex, long n) { + return base.binarySearchUpperBound(key, fromIndex, n); + } +} + + +class MaskedMultimapSearcher implements MultimapSearcher { + private final MultimapSearcherBase base; + private final long mask; + + MaskedMultimapSearcher(MultimapSearcherBase base, long mask) { + this.base = base; + this.mask = mask; + } + + @Override + public long binarySearch(long key, long fromIndex, long n) { + return base.binarySearchOffset(key, fromIndex, n, mask); + } + + @Override + public long binarySearchUpperBound(long key, long fromIndex, long n) { + return base.binarySearchUpperBound(key, fromIndex, n, mask); + } +} + + +class SteppingMaskedMultimapSearcher implements MultimapSearcher { + private final MultimapSearcherBase base; + private final long mask; + private final int step; + + SteppingMaskedMultimapSearcher(MultimapSearcherBase base, long mask, int step) { + this.base = base; + this.mask = mask; + this.step = step; + } + + @Override + public long binarySearch(long key, long fromIndex, long n) { + return base.binarySearchOffset(key, fromIndex, step, n, mask); + } + + @Override + public long binarySearchUpperBound(long key, long fromIndex, long n) { + return base.binarySearchUpperBound(key, fromIndex, step, n, mask); + } +} \ No newline at end of file diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapSearcherBase.java b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapSearcherBase.java new file mode 100644 index 00000000..2bd8c166 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapSearcherBase.java @@ -0,0 +1,143 @@ +package nu.marginalia.util.multimap; + +import lombok.experimental.Delegate; + +public class MultimapSearcherBase { + @Delegate + private final MultimapFileLongSlice mmf; + + public MultimapSearcherBase(MultimapFileLongSlice mmf) { + this.mmf = mmf; + } + + public boolean binarySearchTest(long key, long fromIndex, long n) { + + long low = 0; + long high = n - 1; + + while (low <= high) { + long mid = (low + high) >>> 1; + long midVal = get(fromIndex + mid); + + if (midVal < key) + low = mid + 1; + else if (midVal > key) + high = mid - 1; + else + return true; + } + return false; + } + + public long binarySearchOffset(long key, long fromIndex, long n) { + long low = 0; + long high = n - 1; + + while (low <= high) { + long mid = (low + high) >>> 1; + long midVal = get(fromIndex + mid); + + if (midVal < key) + low = mid + 1; + else if (midVal > key) + high = mid - 1; + else + return fromIndex + mid; + } + return fromIndex + low; + } + + + public long binarySearchOffset(long key, long fromIndex, long n, long mask) { + long low = 0; + long high = n - 1; + + while (low <= high) { + long mid = (low + high) >>> 1; + long midVal = get(fromIndex + mid) & mask; + + if (midVal < key) + low = mid + 1; + else if (midVal > key) + high = mid - 1; + else + return fromIndex + mid; + } + return fromIndex + low; + } + + + public long binarySearchOffset(long key, long fromIndex, int step, long n, long mask) { + long low = 0; + long high = n - 1; + + while (low <= high) { + long mid = (low + high) >>> 1; + long midVal = get(fromIndex + mid*step) & mask; + + if (midVal < key) + low = mid + 1; + else if (midVal > key) + high = mid - 1; + else + return fromIndex + mid*step; + } + return fromIndex + low; + } + + public long binarySearchUpperBound(long key, long fromIndex, long n) { + long low = 0; + long high = n - 1; + + while (low <= high) { + long mid = (low + high) >>> 1; + long midVal = get(fromIndex + mid); + + if (midVal < key) + low = mid + 1; + else if (midVal > key) + high = mid - 1; + else + return fromIndex + mid; + } + return -1; + } + + + public long binarySearchUpperBound(long key, long fromIndex, long n, long mask) { + long low = 0; + long high = n - 1; + + while (low <= high) { + long mid = (low + high) >>> 1; + long midVal = get(fromIndex + mid) & mask; + + if (midVal < key) + low = mid + 1; + else if (midVal > key) + high = mid - 1; + else + return fromIndex + mid; + } + return -1; + } + + + public long binarySearchUpperBound(long key, long fromIndex, int step, long n, long mask) { + long low = 0; + long high = n - 1; + + while (low <= high) { + long mid = (low + high) >>> 1; + long midVal = get(fromIndex + mid*step) & mask; + + if (midVal < key) + low = mid + 1; + else if (midVal > key) + high = mid - 1; + else + return fromIndex + mid*step; + } + return -1; + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/IndexWordsTable.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/IndexWordsTable.java index 2bde1aa7..681e42ea 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/IndexWordsTable.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/IndexWordsTable.java @@ -45,12 +45,12 @@ public class IndexWordsTable implements AutoCloseable { private static MultimapFileLong openWordsFile(RandomAccessFile wordsFile) throws IOException { return new MultimapFileLong(wordsFile, - FileChannel.MapMode.READ_ONLY, wordsFile.length(), BUFFER_SIZE, false); + FileChannel.MapMode.READ_ONLY, wordsFile.length(), BUFFER_SIZE); } public long positionForWord(int wordId) { - long offset = reader.offsetForEntry(header, wordId); + long offset = reader.findEntry(header, wordId); if (offset < 0) { return -1L; } @@ -60,7 +60,7 @@ public class IndexWordsTable implements AutoCloseable { public int wordLength(int wordId) { - long offset = reader.offsetForEntry(header, wordId); + long offset = reader.findEntry(header, wordId); if (offset < 0) { return -1; } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndex.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndex.java index 042f8f54..0ab4d80b 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndex.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndex.java @@ -82,7 +82,7 @@ public class SearchIndex implements AutoCloseable { if (!range.isPresent()) return false; - return bTreeReader.offsetForEntry(bTreeReader.getHeader(range.dataOffset), url) >= 0; + return bTreeReader.findEntry(bTreeReader.getHeader(range.dataOffset), url) >= 0; } public class UrlIndexTree { diff --git a/marginalia_nu/src/test/java/nu/marginalia/util/btree/BTreeWriterTest.java b/marginalia_nu/src/test/java/nu/marginalia/util/btree/BTreeWriterTest.java index 875cda37..73aa4dc3 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/util/btree/BTreeWriterTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/util/btree/BTreeWriterTest.java @@ -48,9 +48,9 @@ class BTreeWriterTest { @Test void testLayerOffset() { int wcub = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS(); - System.out.println(writer.makeHeader(1025, wcub).relativeLayerOffset(ctx, 0)); - System.out.println(writer.makeHeader(1025, wcub).relativeLayerOffset(ctx, 1)); - System.out.println(writer.makeHeader(1025, wcub).relativeLayerOffset(ctx, 2)); + System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 0)); + System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 1)); + System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 2)); for (int i = 0; i < 1024; i++) { var header = writer.makeHeader(0, i); @@ -59,7 +59,7 @@ class BTreeWriterTest { printTreeLayout(i, header, ctx); if (header.layers() >= 1) { - assertEquals(1, ctx.layerSize(i, header.layers() - 1) / ctx.BLOCK_SIZE_WORDS()); + assertEquals(1, ctx.indexLayerSize(i, header.layers() - 1) / ctx.BLOCK_SIZE_WORDS()); } } } @@ -67,7 +67,7 @@ class BTreeWriterTest { private void printTreeLayout(int numEntries, BTreeHeader header, BTreeContext ctx) { StringJoiner sj = new StringJoiner(","); for (int l = 0; l < header.layers(); l++) { - sj.add(""+ctx.layerSize(numEntries, l)/ctx.BLOCK_SIZE_WORDS()); + sj.add(""+ctx.indexLayerSize(numEntries, l)/ctx.BLOCK_SIZE_WORDS()); } System.out.println(numEntries + ":" + sj); } @@ -86,7 +86,7 @@ class BTreeWriterTest { try { RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw"); - MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000, true); + MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000); { var writer = new BTreeWriter(mmf, ctx); @@ -103,7 +103,7 @@ class BTreeWriterTest { var reader = new BTreeReader(mmf, ctx); var header = reader.getHeader(0); for (int i = 0; i < data.length; i++) { - long offset = reader.offsetForEntry(header, data[i]); + long offset = reader.findEntry(header, data[i]); assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); assertEquals(i, mmf.get(offset+1)); } @@ -129,7 +129,7 @@ class BTreeWriterTest { try { RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw"); - MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000, true); + MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000); { var writer = new BTreeWriter(mmf, ctx); @@ -146,7 +146,7 @@ class BTreeWriterTest { var reader = new BTreeReader(mmf, ctx); var header = reader.getHeader(0); for (int i = 0; i < data.length; i++) { - long offset = reader.offsetForEntry(header, data[i]); + long offset = reader.findEntry(header, data[i]); assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); assertEquals(i, mmf.get(offset+1)); } @@ -154,7 +154,7 @@ class BTreeWriterTest { for (int i = 0; i < 500; i++) { long val = (long)(Long.MAX_VALUE * Math.random()); while (toPut.contains((int)val)) val = (long)(Long.MAX_VALUE * Math.random()); - assertEquals(-1, reader.offsetForEntry(header, val)); + assertEquals(-1, reader.findEntry(header, val)); } } } catch (Exception e) { @@ -197,7 +197,7 @@ class BTreeWriterTest { printTreeLayout(toPut.size(), header, ctx); for (int i = 0; i < data.length; i++) { - long offset = reader.offsetForEntry(header, data[i]); + long offset = reader.findEntry(header, data[i]); assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); assertEquals(data[i], mmf.get(offset)); } @@ -205,7 +205,7 @@ class BTreeWriterTest { for (int i = 0; i < 500; i++) { long val = (long) (Long.MAX_VALUE * Math.random()); while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random()); - assertEquals(-1, reader.offsetForEntry(header, val)); + assertEquals(-1, reader.findEntry(header, val)); } } } catch (Exception e) { @@ -250,7 +250,7 @@ class BTreeWriterTest { printTreeLayout(toPut.size(), header, ctx); for (int i = 0; i < data.length; i++) { - long offset = reader.offsetForEntry(header, data[i] & mask); + long offset = reader.findEntry(header, data[i] & mask); assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); assertEquals(data[i], mmf.get(offset)); } @@ -258,7 +258,7 @@ class BTreeWriterTest { for (int i = 0; i < 500; i++) { long val = (long) (Long.MAX_VALUE * Math.random()); while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random()); - assertEquals(-1, reader.offsetForEntry(header, val & mask)); + assertEquals(-1, reader.findEntry(header, val & mask)); } } } catch (Exception e) { @@ -304,7 +304,7 @@ class BTreeWriterTest { printTreeLayout(toPut.size(), header, ctx); for (int i = 0; i < data.length; i++) { - long offset = reader.offsetForEntry(header, data[i] & mask); + long offset = reader.findEntry(header, data[i] & mask); assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); assertEquals(data[i], mmf.get(offset)); assertEquals(i, mmf.get(offset+1)); @@ -313,7 +313,7 @@ class BTreeWriterTest { for (int i = 0; i < 500; i++) { long val = (long) (Long.MAX_VALUE * Math.random()); while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random()); - assertEquals(-1, reader.offsetForEntry(header, val & mask)); + assertEquals(-1, reader.findEntry(header, val & mask)); } } } catch (Exception e) { diff --git a/marginalia_nu/src/test/java/nu/marginalia/util/hash/LongPairHashMapTest.java b/marginalia_nu/src/test/java/nu/marginalia/util/hash/LongPairHashMapTest.java index 9331a998..d2bec272 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/util/hash/LongPairHashMapTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/util/hash/LongPairHashMapTest.java @@ -26,7 +26,7 @@ class LongPairHashMapTest { try { RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw"); - MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000, true); + MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000); var lphm = LongPairHashMap.createNew(mmf, 1024); toPut.forEach(i -> { lphm.put(new LongPairHashMap.CellData(i, i)); @@ -35,7 +35,7 @@ class LongPairHashMapTest { lphm.close(); RandomAccessFile raf2 = new RandomAccessFile(tempFile.toFile(), "rw"); - MultimapFileLong mmf2 = new MultimapFileLong(raf2, FileChannel.MapMode.READ_WRITE, 10000, 1000, true); + MultimapFileLong mmf2 = new MultimapFileLong(raf2, FileChannel.MapMode.READ_WRITE, 10000, 1000); var lphm2 = LongPairHashMap.loadExisting(mmf2); toPut.forEach(i -> { Assertions.assertTrue(lphm2.get(i).isSet()); diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/MultimapFileTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/MultimapFileTest.java index 44e4207a..bb7b360e 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/MultimapFileTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/service/MultimapFileTest.java @@ -56,7 +56,7 @@ class MultimapFileTest { @SneakyThrows @Test void put() { - var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8, false); + var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8); for (int i = 0; i < 32; i++) { file.put(i, i); } @@ -68,7 +68,7 @@ class MultimapFileTest { @SneakyThrows @Test void read() { - var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8, false); + var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8); for (int i = 0; i < 32; i++) { file.put(i, i); } @@ -85,7 +85,7 @@ class MultimapFileTest { @Test void write() throws IOException { - var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8, false); + var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8); for (int i = 0; i < 32-6; i++) { file.write(new long[] { 0,1,2,3,4,5}, i); @@ -98,7 +98,7 @@ class MultimapFileTest { @Test void sortInternal() throws IOException { - var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8, false); + var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8); var sorter = file.createSorter(Path.of("/tmp"), 16); var searcher = file.createSearcher(); for (int i = 0; i < 32; i++) { @@ -109,13 +109,13 @@ class MultimapFileTest { for (int i = 2+1; i < 16; i++) { assertTrue(file.get(i) > file.get(i-1)); - assertTrue(searcher.binarySearch(file.get(i), 2, 18)); + assertTrue(searcher.binarySearchTest(file.get(i), 2, 16)); } } @Test void sortExternal() throws IOException { - var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8, false); + var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8); var sorter = file.createSorter(Path.of("/tmp"), 2); var searcher = file.createSearcher(); @@ -128,7 +128,7 @@ class MultimapFileTest { for (int i = 2+1; i < 16; i++) { assertTrue(file.get(i) > file.get(i-1)); - assertTrue(searcher.binarySearch(file.get(i), 2, 18)); + assertTrue(searcher.binarySearchTest(file.get(i), 2, 16)); } }