Refactoring BTreeReader and binary search code

This commit is contained in:
vlofgren 2022-06-20 12:02:01 +02:00
parent f76af4ca79
commit 420b9bb7e0
14 changed files with 380 additions and 274 deletions

View File

@ -11,94 +11,68 @@ public class BTreeReader {
private final MultimapFileLong file; private final MultimapFileLong file;
private final BTreeContext ctx; private final BTreeContext ctx;
private final Logger logger = LoggerFactory.getLogger(BTreeReader.class); private final Logger logger = LoggerFactory.getLogger(BTreeReader.class);
private final long mask;
private final MultimapSearcher searcher; private final MultimapSearcher indexSearcher;
private final MultimapSearcher dataSearcher;
public BTreeReader(MultimapFileLong file, BTreeContext ctx) { public BTreeReader(MultimapFileLong file, BTreeContext ctx) {
this.file = file; this.file = file;
this.searcher = file.createSearcher(); this.indexSearcher = MultimapSearcher.forContext(file, ~0, 1);
this.dataSearcher = MultimapSearcher.forContext(file, ctx.equalityMask(), ctx.entrySize());
this.ctx = ctx; this.ctx = ctx;
this.mask = ctx.equalityMask();
} }
public long fileSize() { public BTreeHeader getHeader(long fileOffset) {
return file.size(); return new BTreeHeader(file.get(fileOffset), file.get(fileOffset+1), file.get(fileOffset+2));
} }
public BTreeHeader getHeader(long offset) { /**
return new BTreeHeader(file.get(offset), file.get(offset+1), file.get(offset+2)); *
} * @return file offset of entry matching keyRaw, negative if absent
*/
public long findEntry(BTreeHeader header, final long keyRaw) {
final long key = keyRaw & ctx.equalityMask();
public long offsetForEntry(BTreeHeader header, final long keyRaw) { final long dataAddress = header.dataOffsetLongs();
final long key = keyRaw & mask; final int entrySize = ctx.entrySize();
final int blockSize = ctx.BLOCK_SIZE_WORDS();
if (header.layers() == 0) { if (header.layers() == 0) { // For small data, we only have a data block
return trivialSearch(header, key); return dataSearcher.binarySearchUpperBound(key, dataAddress, header.numEntries());
} }
long p = searchEntireTopLayer(header, key); final long indexOffset = header.indexOffsetLongs();
if (p < 0) return -1;
long cumOffset = p * ctx.BLOCK_SIZE_WORDS(); // Search the top layer
long layerOffset = indexSearch(key, indexOffset, blockSize);
if (layerOffset < 0) return -1;
// Search intermediary layers
for (int i = header.layers() - 2; i >= 0; --i) { for (int i = header.layers() - 2; i >= 0; --i) {
long offsetBase = header.indexOffsetLongs() + header.relativeLayerOffset(ctx, i); final long layerAddressBase = indexOffset + header.relativeIndexLayerOffset(ctx, i);
p = searchLayerBlock(key, offsetBase+cumOffset); final long layerBlockOffset = layerAddressBase + layerOffset;
if (p < 0)
final long nextLayerOffset = indexSearch(key, layerBlockOffset, blockSize);
if (nextLayerOffset < 0)
return -1; return -1;
cumOffset = ctx.BLOCK_SIZE_WORDS()*(p + cumOffset);
layerOffset = blockSize*(nextLayerOffset + layerOffset);
} }
long dataMax = header.dataOffsetLongs() + (long) header.numEntries() * ctx.entrySize(); // Search the corresponding data block
return searchDataBlock(key, final long searchStart = dataAddress + layerOffset * entrySize;
header.dataOffsetLongs() + ctx.entrySize()*cumOffset, final long lastDataAddress = dataAddress + (long) header.numEntries() * entrySize;
dataMax); final long lastItemInBlockAddress = searchStart + (long) blockSize * entrySize;
final long searchEnd = Math.min(lastItemInBlockAddress, lastDataAddress);
return dataSearcher.binarySearchUpperBound(key, searchStart, (searchEnd - searchStart) / entrySize);
} }
private long indexSearch(long key, long start, long n) {
private long searchEntireTopLayer(BTreeHeader header, long key) { return indexSearcher.binarySearch(key, start, n) - start;
long offset = header.indexOffsetLongs();
return searcher.binarySearchUpperBound(key, offset, offset + ctx.BLOCK_SIZE_WORDS()) - offset;
}
private long searchLayerBlock(long key, long blockOffset) {
if (blockOffset < 0)
return blockOffset;
return searcher.binarySearchUpperBound(key, blockOffset, blockOffset + ctx.BLOCK_SIZE_WORDS()) - blockOffset;
}
private long searchDataBlock(long key, long blockOffset, long dataMax) {
if (blockOffset < 0)
return blockOffset;
long lastOffset = Math.min(blockOffset+ctx.BLOCK_SIZE_WORDS()*(long)ctx.entrySize(), dataMax);
int length = (int)(lastOffset - blockOffset);
if (ctx.entrySize() == 1) {
if (mask == ~0L) return searcher.binarySearchUpperBoundNoMiss(key, blockOffset, blockOffset+length);
return searcher.binarySearchUpperBoundNoMiss(key, blockOffset, blockOffset+length, mask);
}
return searcher.binarySearchUpperBoundNoMiss(key, blockOffset, ctx.entrySize(), length/ctx.entrySize(), mask);
}
private long trivialSearch(BTreeHeader header, long key) {
long offset = header.dataOffsetLongs();
if (ctx.entrySize() == 1) {
if (mask == ~0L) {
return searcher.binarySearchUpperBoundNoMiss(key, offset, offset+header.numEntries());
}
else {
return searcher.binarySearchUpperBoundNoMiss(key, offset, offset+header.numEntries(), mask);
}
}
return searcher.binarySearchUpperBoundNoMiss(key, offset, ctx.entrySize(), header.numEntries(), mask);
} }
} }

View File

@ -2,16 +2,12 @@ package nu.marginalia.util.btree;
import nu.marginalia.util.btree.model.BTreeContext; import nu.marginalia.util.btree.model.BTreeContext;
import nu.marginalia.util.btree.model.BTreeHeader; import nu.marginalia.util.btree.model.BTreeHeader;
import nu.marginalia.util.multimap.MultimapFileLong;
import nu.marginalia.util.multimap.MultimapFileLongSlice; import nu.marginalia.util.multimap.MultimapFileLongSlice;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
public class BTreeWriter { public class BTreeWriter {
private final Logger logger = LoggerFactory.getLogger(BTreeWriter.class);
private final BTreeContext ctx; private final BTreeContext ctx;
private final MultimapFileLongSlice map; private final MultimapFileLongSlice map;
@ -27,7 +23,7 @@ public class BTreeWriter {
long size = 0; long size = 0;
for (int layer = 0; layer < numLayers; layer++) { for (int layer = 0; layer < numLayers; layer++) {
size += ctx.layerSize(numWords, layer); size += ctx.indexLayerSize(numWords, layer);
} }
return size; return size;
} }
@ -45,17 +41,17 @@ public class BTreeWriter {
writeIndexCallback.write(map.atOffset(header.dataOffsetLongs())); writeIndexCallback.write(map.atOffset(header.dataOffsetLongs()));
if (header.layers() < 1) { if (header.layers() < 1) { // The data is too small to benefit from indexing
return ctx.calculateSize(numEntries);
}
else {
writeIndex(header);
return ctx.calculateSize(numEntries); return ctx.calculateSize(numEntries);
} }
writeIndex(header);
return ctx.calculateSize(numEntries);
} }
public static BTreeHeader makeHeader(BTreeContext ctx, long offset, int numEntries) { public static BTreeHeader makeHeader(BTreeContext ctx, long offset, int numEntries) {
final int numLayers = ctx.numLayers(numEntries); final int numLayers = ctx.numIndexLayers(numEntries);
final int padding = BTreeHeader.getPadding(ctx, offset, numLayers); final int padding = BTreeHeader.getPadding(ctx, offset, numLayers);
@ -71,46 +67,50 @@ public class BTreeWriter {
private void writeIndex(BTreeHeader header) { private void writeIndex(BTreeHeader header) {
var layerOffsets = getRelativeLayerOffsets(header); var layerOffsets = header.getRelativeLayerOffsets(ctx);
long stride = ctx.BLOCK_SIZE_WORDS(); long indexedDataStepSize = ctx.BLOCK_SIZE_WORDS();
/* Index layer 0 indexes the data itself
Index layer 1 indexes layer 0
Index layer 2 indexes layer 1
And so on
*/
for (int layer = 0; layer < header.layers(); layer++, for (int layer = 0; layer < header.layers(); layer++,
stride*=ctx.BLOCK_SIZE_WORDS()) { indexedDataStepSize*=ctx.BLOCK_SIZE_WORDS()) {
long indexWord = 0;
long offsetBase = layerOffsets[layer] + header.indexOffsetLongs();
long numEntries = header.numEntries();
for (long idx = 0; idx < numEntries; idx += stride, indexWord++) {
long dataOffset = header.dataOffsetLongs() + (idx + (stride-1)) * ctx.entrySize();
long val;
if (idx + (stride-1) < numEntries) { writeIndexLayer(header, layerOffsets, indexedDataStepSize, layer);
val = map.get(dataOffset) & ctx.equalityMask();
}
else {
val = Long.MAX_VALUE;
}
if (offsetBase + indexWord < 0) {
logger.error("bad put @ {}", offsetBase + indexWord);
logger.error("layer{}", layer);
logger.error("layer offsets {}", layerOffsets);
logger.error("offsetBase = {}", offsetBase);
logger.error("numEntries = {}", numEntries);
logger.error("indexWord = {}", indexWord);
}
map.put(offsetBase + indexWord, val);
}
for (; (indexWord % ctx.BLOCK_SIZE_WORDS()) != 0; indexWord++) {
map.put(offsetBase + indexWord, Long.MAX_VALUE);
}
} }
} }
private long[] getRelativeLayerOffsets(BTreeHeader header) { private void writeIndexLayer(BTreeHeader header, long[] layerOffsets,
long[] layerOffsets = new long[header.layers()]; final long indexedDataStepSize,
for (int i = 0; i < header.layers(); i++) { final int layer) {
layerOffsets[i] = header.relativeLayerOffset(ctx, i);
final long indexOffsetBase = layerOffsets[layer] + header.indexOffsetLongs();
final long dataOffsetBase = header.dataOffsetLongs();
final long dataEntriesMax = header.numEntries();
final int entrySize = ctx.entrySize();
final long lastDataEntryOffset = indexedDataStepSize - 1;
long indexWord = 0;
for (long dataPtr = 0;
dataPtr + lastDataEntryOffset < dataEntriesMax;
dataPtr += indexedDataStepSize)
{
long dataOffset = dataOffsetBase + (dataPtr + lastDataEntryOffset) * entrySize;
map.put(indexOffsetBase + indexWord++, map.get(dataOffset) & ctx.equalityMask());
} }
return layerOffsets;
// Fill the remaining block with LONG_MAX
map.setRange(indexOffsetBase+indexWord,
(int) (ctx.BLOCK_SIZE_WORDS() - (indexWord % ctx.BLOCK_SIZE_WORDS())),
Long.MAX_VALUE);
} }
} }

View File

@ -10,7 +10,6 @@ public record BTreeContext(int MAX_LAYERS,
public BTreeContext(int MAX_LAYERS, int entrySize, long equalityMask, int BLOCK_SIZE_BITS) { public BTreeContext(int MAX_LAYERS, int entrySize, long equalityMask, int BLOCK_SIZE_BITS) {
this(MAX_LAYERS, entrySize, equalityMask, BLOCK_SIZE_BITS, 1 << BLOCK_SIZE_BITS); this(MAX_LAYERS, entrySize, equalityMask, BLOCK_SIZE_BITS, 1 << BLOCK_SIZE_BITS);
} }
public long calculateSize(int numEntries) { public long calculateSize(int numEntries) {
@ -19,7 +18,7 @@ public record BTreeContext(int MAX_LAYERS,
return header.dataOffsetLongs() + (long)numEntries * entrySize; return header.dataOffsetLongs() + (long)numEntries * entrySize;
} }
public int numLayers(int numEntries) { public int numIndexLayers(int numEntries) {
if (numEntries <= BLOCK_SIZE_WORDS*2) { if (numEntries <= BLOCK_SIZE_WORDS*2) {
return 0; return 0;
} }
@ -36,11 +35,7 @@ public record BTreeContext(int MAX_LAYERS,
return MAX_LAYERS; return MAX_LAYERS;
} }
public long layerSize(int numEntries, int level) { public long indexLayerSize(int numWords, int level) {
return BLOCK_SIZE_WORDS * numBlocks(numEntries, level);
}
private long numBlocks(int numWords, int level) {
long layerSize = 1L<<(BLOCK_SIZE_BITS*(level+1)); long layerSize = 1L<<(BLOCK_SIZE_BITS*(level+1));
int numBlocks = 0; int numBlocks = 0;
@ -50,7 +45,7 @@ public record BTreeContext(int MAX_LAYERS,
numBlocks++; numBlocks++;
} }
return numBlocks; return (long) BLOCK_SIZE_WORDS * numBlocks;
} }
} }

View File

@ -1,6 +1,5 @@
package nu.marginalia.util.btree.model; package nu.marginalia.util.btree.model;
import nu.marginalia.util.multimap.MultimapFileLong;
import nu.marginalia.util.multimap.MultimapFileLongSlice; import nu.marginalia.util.multimap.MultimapFileLongSlice;
public record BTreeHeader(int layers, int numEntries, long indexOffsetLongs, long dataOffsetLongs) { public record BTreeHeader(int layers, int numEntries, long indexOffsetLongs, long dataOffsetLongs) {
@ -36,12 +35,20 @@ public record BTreeHeader(int layers, int numEntries, long indexOffsetLongs, lon
} }
public long relativeLayerOffset(BTreeContext ctx, int n) { public long relativeIndexLayerOffset(BTreeContext ctx, int n) {
long offset = 0; long offset = 0;
for (int i = n+1; i < layers; i++) { for (int i = n+1; i < layers; i++) {
offset += ctx.layerSize( numEntries, i); offset += ctx.indexLayerSize( numEntries, i);
} }
return offset; return offset;
} }
public long[] getRelativeLayerOffsets(BTreeContext ctx) {
long[] layerOffsets = new long[layers()];
for (int i = 0; i < layers(); i++) {
layerOffsets[i] = relativeIndexLayerOffset(ctx, i);
}
return layerOffsets;
}
} }

View File

@ -97,8 +97,8 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice {
readableSize(mapSizeBytes), readableSize(8L*bufferSizeWords), mode); readableSize(mapSizeBytes), readableSize(8L*bufferSizeWords), mode);
} }
public MultimapSearcher createSearcher() { public MultimapSearcherBase createSearcher() {
return new MultimapSearcher(this); return new MultimapSearcherBase(this);
} }
public MultimapSorter createSorter(Path tmpFile, int internalSortLimit) { public MultimapSorter createSorter(Path tmpFile, int internalSortLimit) {
return new MultimapSorter(this, tmpFile, internalSortLimit); return new MultimapSorter(this, tmpFile, internalSortLimit);
@ -332,6 +332,34 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice {
} }
@Override
public void setRange(long idx, int n, long val) {
if (n == 0) return;
if (idx+n >= mappedSize) {
grow(idx+n);
}
int iN = (int)((idx + n) / bufferSize);
for (int i = 0; i < n; ) {
int i0 = (int)((idx + i) / bufferSize);
int bufferOffset = (int) ((idx+i) % bufferSize);
var buffer = buffers.get(i0);
final int l;
if (i0 < iN) l = bufferSize - bufferOffset;
else l = Math.min(n - i, bufferSize - bufferOffset);
for (int p = 0; p < l; p++) {
buffer.put(bufferOffset + p, val);
}
i+=l;
}
}
@Override @Override
public void transferFromFileChannel(FileChannel sourceChannel, long destOffset, long sourceStart, long sourceEnd) throws IOException { public void transferFromFileChannel(FileChannel sourceChannel, long destOffset, long sourceStart, long sourceEnd) throws IOException {

View File

@ -23,6 +23,11 @@ public class MultimapFileLongOffsetSlice implements MultimapFileLongSlice {
map.put(off+idx, val); map.put(off+idx, val);
} }
@Override
public void setRange(long idx, int n, long val) {
map.setRange(off+idx, n, val);
}
@Override @Override
public long get(long idx) { public long get(long idx) {
return map.get(off+idx); return map.get(off+idx);

View File

@ -9,6 +9,8 @@ public interface MultimapFileLongSlice {
void put(long idx, long val); void put(long idx, long val);
void setRange(long idx, int n, long val);
long get(long idx); long get(long idx);
void read(long[] vals, long idx); void read(long[] vals, long idx);

View File

@ -1,128 +1,80 @@
package nu.marginalia.util.multimap; package nu.marginalia.util.multimap;
import lombok.experimental.Delegate; public interface MultimapSearcher {
long binarySearch(long key, long fromIndex, long n);
long binarySearchUpperBound(long key, long fromIndex, long n);
public class MultimapSearcher { static MultimapSearcher forContext(MultimapFileLongSlice slice, long mask, int stepSize) {
@Delegate if (mask == ~0L && stepSize == 1) {
private final MultimapFileLongSlice mmf; return new SimpleMultimapSearcher(new MultimapSearcherBase(slice));
public MultimapSearcher(MultimapFileLongSlice mmf) {
this.mmf = mmf;
}
public boolean binarySearch(long key, long fromIndex, long toIndex) {
long low = fromIndex;
long high = toIndex - 1;
while (low <= high) {
long mid = (low + high) >>> 1;
long midVal = get(mid);
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return true; // key found
} }
return false; // key not found. else if (stepSize == 1) {
} return new MaskedMultimapSearcher(new MultimapSearcherBase(slice), mask);
public long binarySearchUpperBound(long key, long fromIndex, long toIndex) {
long low = fromIndex;
long high = toIndex - 1;
while (low <= high) {
long mid = (low + high) >>> 1;
long midVal = get(mid);
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return mid;
} }
return low; else {
} return new SteppingMaskedMultimapSearcher(new MultimapSearcherBase(slice), mask, stepSize);
public long binarySearchUpperBound(long key, long fromIndex, long toIndex, long mask) {
long low = fromIndex;
long high = toIndex - 1;
while (low <= high) {
long mid = (low + high) >>> 1;
long midVal = get(mid) & mask;
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return mid;
} }
return low;
}
public long binarySearchUpperBoundNoMiss(long key, long fromIndex, long toIndex) {
long low = fromIndex;
long high = toIndex - 1;
while (low <= high) {
long mid = (low + high) >>> 1;
long midVal = get(mid);
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return mid;
}
return -1;
}
public long binarySearchUpperBoundNoMiss(long key, long fromIndex, long toIndex, long mask) {
long low = fromIndex;
long high = toIndex - 1;
while (low <= high) {
long mid = (low + high) >>> 1;
long midVal = get(mid) & mask;
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return mid;
}
return -1;
}
public long binarySearchUpperBoundNoMiss(long key, long fromIndex, long step, long steps, long mask) {
long low = 0;
long high = steps - 1;
while (low <= high) {
long mid = (low + high) >>> 1;
long midVal = get(fromIndex + mid*step) & mask;
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return fromIndex + mid*step;
}
return -1;
} }
} }
class SimpleMultimapSearcher implements MultimapSearcher {
private final MultimapSearcherBase base;
SimpleMultimapSearcher(MultimapSearcherBase base) {
this.base = base;
}
@Override
public long binarySearch(long key, long fromIndex, long n) {
return base.binarySearchOffset(key, fromIndex, n);
}
@Override
public long binarySearchUpperBound(long key, long fromIndex, long n) {
return base.binarySearchUpperBound(key, fromIndex, n);
}
}
class MaskedMultimapSearcher implements MultimapSearcher {
private final MultimapSearcherBase base;
private final long mask;
MaskedMultimapSearcher(MultimapSearcherBase base, long mask) {
this.base = base;
this.mask = mask;
}
@Override
public long binarySearch(long key, long fromIndex, long n) {
return base.binarySearchOffset(key, fromIndex, n, mask);
}
@Override
public long binarySearchUpperBound(long key, long fromIndex, long n) {
return base.binarySearchUpperBound(key, fromIndex, n, mask);
}
}
class SteppingMaskedMultimapSearcher implements MultimapSearcher {
private final MultimapSearcherBase base;
private final long mask;
private final int step;
SteppingMaskedMultimapSearcher(MultimapSearcherBase base, long mask, int step) {
this.base = base;
this.mask = mask;
this.step = step;
}
@Override
public long binarySearch(long key, long fromIndex, long n) {
return base.binarySearchOffset(key, fromIndex, step, n, mask);
}
@Override
public long binarySearchUpperBound(long key, long fromIndex, long n) {
return base.binarySearchUpperBound(key, fromIndex, step, n, mask);
}
}

View File

@ -0,0 +1,143 @@
package nu.marginalia.util.multimap;
import lombok.experimental.Delegate;
public class MultimapSearcherBase {
@Delegate
private final MultimapFileLongSlice mmf;
public MultimapSearcherBase(MultimapFileLongSlice mmf) {
this.mmf = mmf;
}
public boolean binarySearchTest(long key, long fromIndex, long n) {
long low = 0;
long high = n - 1;
while (low <= high) {
long mid = (low + high) >>> 1;
long midVal = get(fromIndex + mid);
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return true;
}
return false;
}
public long binarySearchOffset(long key, long fromIndex, long n) {
long low = 0;
long high = n - 1;
while (low <= high) {
long mid = (low + high) >>> 1;
long midVal = get(fromIndex + mid);
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return fromIndex + mid;
}
return fromIndex + low;
}
public long binarySearchOffset(long key, long fromIndex, long n, long mask) {
long low = 0;
long high = n - 1;
while (low <= high) {
long mid = (low + high) >>> 1;
long midVal = get(fromIndex + mid) & mask;
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return fromIndex + mid;
}
return fromIndex + low;
}
public long binarySearchOffset(long key, long fromIndex, int step, long n, long mask) {
long low = 0;
long high = n - 1;
while (low <= high) {
long mid = (low + high) >>> 1;
long midVal = get(fromIndex + mid*step) & mask;
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return fromIndex + mid*step;
}
return fromIndex + low;
}
public long binarySearchUpperBound(long key, long fromIndex, long n) {
long low = 0;
long high = n - 1;
while (low <= high) {
long mid = (low + high) >>> 1;
long midVal = get(fromIndex + mid);
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return fromIndex + mid;
}
return -1;
}
public long binarySearchUpperBound(long key, long fromIndex, long n, long mask) {
long low = 0;
long high = n - 1;
while (low <= high) {
long mid = (low + high) >>> 1;
long midVal = get(fromIndex + mid) & mask;
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return fromIndex + mid;
}
return -1;
}
public long binarySearchUpperBound(long key, long fromIndex, int step, long n, long mask) {
long low = 0;
long high = n - 1;
while (low <= high) {
long mid = (low + high) >>> 1;
long midVal = get(fromIndex + mid*step) & mask;
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return fromIndex + mid*step;
}
return -1;
}
}

View File

@ -45,12 +45,12 @@ public class IndexWordsTable implements AutoCloseable {
private static MultimapFileLong openWordsFile(RandomAccessFile wordsFile) throws IOException { private static MultimapFileLong openWordsFile(RandomAccessFile wordsFile) throws IOException {
return new MultimapFileLong(wordsFile, return new MultimapFileLong(wordsFile,
FileChannel.MapMode.READ_ONLY, wordsFile.length(), BUFFER_SIZE, false); FileChannel.MapMode.READ_ONLY, wordsFile.length(), BUFFER_SIZE);
} }
public long positionForWord(int wordId) { public long positionForWord(int wordId) {
long offset = reader.offsetForEntry(header, wordId); long offset = reader.findEntry(header, wordId);
if (offset < 0) { if (offset < 0) {
return -1L; return -1L;
} }
@ -60,7 +60,7 @@ public class IndexWordsTable implements AutoCloseable {
public int wordLength(int wordId) { public int wordLength(int wordId) {
long offset = reader.offsetForEntry(header, wordId); long offset = reader.findEntry(header, wordId);
if (offset < 0) { if (offset < 0) {
return -1; return -1;
} }

View File

@ -82,7 +82,7 @@ public class SearchIndex implements AutoCloseable {
if (!range.isPresent()) if (!range.isPresent())
return false; return false;
return bTreeReader.offsetForEntry(bTreeReader.getHeader(range.dataOffset), url) >= 0; return bTreeReader.findEntry(bTreeReader.getHeader(range.dataOffset), url) >= 0;
} }
public class UrlIndexTree { public class UrlIndexTree {

View File

@ -48,9 +48,9 @@ class BTreeWriterTest {
@Test @Test
void testLayerOffset() { void testLayerOffset() {
int wcub = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS(); int wcub = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS();
System.out.println(writer.makeHeader(1025, wcub).relativeLayerOffset(ctx, 0)); System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 0));
System.out.println(writer.makeHeader(1025, wcub).relativeLayerOffset(ctx, 1)); System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 1));
System.out.println(writer.makeHeader(1025, wcub).relativeLayerOffset(ctx, 2)); System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 2));
for (int i = 0; i < 1024; i++) { for (int i = 0; i < 1024; i++) {
var header = writer.makeHeader(0, i); var header = writer.makeHeader(0, i);
@ -59,7 +59,7 @@ class BTreeWriterTest {
printTreeLayout(i, header, ctx); printTreeLayout(i, header, ctx);
if (header.layers() >= 1) { if (header.layers() >= 1) {
assertEquals(1, ctx.layerSize(i, header.layers() - 1) / ctx.BLOCK_SIZE_WORDS()); assertEquals(1, ctx.indexLayerSize(i, header.layers() - 1) / ctx.BLOCK_SIZE_WORDS());
} }
} }
} }
@ -67,7 +67,7 @@ class BTreeWriterTest {
private void printTreeLayout(int numEntries, BTreeHeader header, BTreeContext ctx) { private void printTreeLayout(int numEntries, BTreeHeader header, BTreeContext ctx) {
StringJoiner sj = new StringJoiner(","); StringJoiner sj = new StringJoiner(",");
for (int l = 0; l < header.layers(); l++) { for (int l = 0; l < header.layers(); l++) {
sj.add(""+ctx.layerSize(numEntries, l)/ctx.BLOCK_SIZE_WORDS()); sj.add(""+ctx.indexLayerSize(numEntries, l)/ctx.BLOCK_SIZE_WORDS());
} }
System.out.println(numEntries + ":" + sj); System.out.println(numEntries + ":" + sj);
} }
@ -86,7 +86,7 @@ class BTreeWriterTest {
try { try {
RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw"); RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw");
MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000, true); MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000);
{ {
var writer = new BTreeWriter(mmf, ctx); var writer = new BTreeWriter(mmf, ctx);
@ -103,7 +103,7 @@ class BTreeWriterTest {
var reader = new BTreeReader(mmf, ctx); var reader = new BTreeReader(mmf, ctx);
var header = reader.getHeader(0); var header = reader.getHeader(0);
for (int i = 0; i < data.length; i++) { for (int i = 0; i < data.length; i++) {
long offset = reader.offsetForEntry(header, data[i]); long offset = reader.findEntry(header, data[i]);
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
assertEquals(i, mmf.get(offset+1)); assertEquals(i, mmf.get(offset+1));
} }
@ -129,7 +129,7 @@ class BTreeWriterTest {
try { try {
RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw"); RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw");
MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000, true); MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000);
{ {
var writer = new BTreeWriter(mmf, ctx); var writer = new BTreeWriter(mmf, ctx);
@ -146,7 +146,7 @@ class BTreeWriterTest {
var reader = new BTreeReader(mmf, ctx); var reader = new BTreeReader(mmf, ctx);
var header = reader.getHeader(0); var header = reader.getHeader(0);
for (int i = 0; i < data.length; i++) { for (int i = 0; i < data.length; i++) {
long offset = reader.offsetForEntry(header, data[i]); long offset = reader.findEntry(header, data[i]);
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
assertEquals(i, mmf.get(offset+1)); assertEquals(i, mmf.get(offset+1));
} }
@ -154,7 +154,7 @@ class BTreeWriterTest {
for (int i = 0; i < 500; i++) { for (int i = 0; i < 500; i++) {
long val = (long)(Long.MAX_VALUE * Math.random()); long val = (long)(Long.MAX_VALUE * Math.random());
while (toPut.contains((int)val)) val = (long)(Long.MAX_VALUE * Math.random()); while (toPut.contains((int)val)) val = (long)(Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.offsetForEntry(header, val)); assertEquals(-1, reader.findEntry(header, val));
} }
} }
} catch (Exception e) { } catch (Exception e) {
@ -197,7 +197,7 @@ class BTreeWriterTest {
printTreeLayout(toPut.size(), header, ctx); printTreeLayout(toPut.size(), header, ctx);
for (int i = 0; i < data.length; i++) { for (int i = 0; i < data.length; i++) {
long offset = reader.offsetForEntry(header, data[i]); long offset = reader.findEntry(header, data[i]);
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
assertEquals(data[i], mmf.get(offset)); assertEquals(data[i], mmf.get(offset));
} }
@ -205,7 +205,7 @@ class BTreeWriterTest {
for (int i = 0; i < 500; i++) { for (int i = 0; i < 500; i++) {
long val = (long) (Long.MAX_VALUE * Math.random()); long val = (long) (Long.MAX_VALUE * Math.random());
while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random()); while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.offsetForEntry(header, val)); assertEquals(-1, reader.findEntry(header, val));
} }
} }
} catch (Exception e) { } catch (Exception e) {
@ -250,7 +250,7 @@ class BTreeWriterTest {
printTreeLayout(toPut.size(), header, ctx); printTreeLayout(toPut.size(), header, ctx);
for (int i = 0; i < data.length; i++) { for (int i = 0; i < data.length; i++) {
long offset = reader.offsetForEntry(header, data[i] & mask); long offset = reader.findEntry(header, data[i] & mask);
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
assertEquals(data[i], mmf.get(offset)); assertEquals(data[i], mmf.get(offset));
} }
@ -258,7 +258,7 @@ class BTreeWriterTest {
for (int i = 0; i < 500; i++) { for (int i = 0; i < 500; i++) {
long val = (long) (Long.MAX_VALUE * Math.random()); long val = (long) (Long.MAX_VALUE * Math.random());
while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random()); while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.offsetForEntry(header, val & mask)); assertEquals(-1, reader.findEntry(header, val & mask));
} }
} }
} catch (Exception e) { } catch (Exception e) {
@ -304,7 +304,7 @@ class BTreeWriterTest {
printTreeLayout(toPut.size(), header, ctx); printTreeLayout(toPut.size(), header, ctx);
for (int i = 0; i < data.length; i++) { for (int i = 0; i < data.length; i++) {
long offset = reader.offsetForEntry(header, data[i] & mask); long offset = reader.findEntry(header, data[i] & mask);
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset); assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
assertEquals(data[i], mmf.get(offset)); assertEquals(data[i], mmf.get(offset));
assertEquals(i, mmf.get(offset+1)); assertEquals(i, mmf.get(offset+1));
@ -313,7 +313,7 @@ class BTreeWriterTest {
for (int i = 0; i < 500; i++) { for (int i = 0; i < 500; i++) {
long val = (long) (Long.MAX_VALUE * Math.random()); long val = (long) (Long.MAX_VALUE * Math.random());
while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random()); while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.offsetForEntry(header, val & mask)); assertEquals(-1, reader.findEntry(header, val & mask));
} }
} }
} catch (Exception e) { } catch (Exception e) {

View File

@ -26,7 +26,7 @@ class LongPairHashMapTest {
try { try {
RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw"); RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw");
MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000, true); MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000);
var lphm = LongPairHashMap.createNew(mmf, 1024); var lphm = LongPairHashMap.createNew(mmf, 1024);
toPut.forEach(i -> { toPut.forEach(i -> {
lphm.put(new LongPairHashMap.CellData(i, i)); lphm.put(new LongPairHashMap.CellData(i, i));
@ -35,7 +35,7 @@ class LongPairHashMapTest {
lphm.close(); lphm.close();
RandomAccessFile raf2 = new RandomAccessFile(tempFile.toFile(), "rw"); RandomAccessFile raf2 = new RandomAccessFile(tempFile.toFile(), "rw");
MultimapFileLong mmf2 = new MultimapFileLong(raf2, FileChannel.MapMode.READ_WRITE, 10000, 1000, true); MultimapFileLong mmf2 = new MultimapFileLong(raf2, FileChannel.MapMode.READ_WRITE, 10000, 1000);
var lphm2 = LongPairHashMap.loadExisting(mmf2); var lphm2 = LongPairHashMap.loadExisting(mmf2);
toPut.forEach(i -> { toPut.forEach(i -> {
Assertions.assertTrue(lphm2.get(i).isSet()); Assertions.assertTrue(lphm2.get(i).isSet());

View File

@ -56,7 +56,7 @@ class MultimapFileTest {
@SneakyThrows @SneakyThrows
@Test @Test
void put() { void put() {
var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8, false); var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8);
for (int i = 0; i < 32; i++) { for (int i = 0; i < 32; i++) {
file.put(i, i); file.put(i, i);
} }
@ -68,7 +68,7 @@ class MultimapFileTest {
@SneakyThrows @SneakyThrows
@Test @Test
void read() { void read() {
var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8, false); var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8);
for (int i = 0; i < 32; i++) { for (int i = 0; i < 32; i++) {
file.put(i, i); file.put(i, i);
} }
@ -85,7 +85,7 @@ class MultimapFileTest {
@Test @Test
void write() throws IOException { void write() throws IOException {
var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8, false); var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8);
for (int i = 0; i < 32-6; i++) { for (int i = 0; i < 32-6; i++) {
file.write(new long[] { 0,1,2,3,4,5}, i); file.write(new long[] { 0,1,2,3,4,5}, i);
@ -98,7 +98,7 @@ class MultimapFileTest {
@Test @Test
void sortInternal() throws IOException { void sortInternal() throws IOException {
var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8, false); var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8);
var sorter = file.createSorter(Path.of("/tmp"), 16); var sorter = file.createSorter(Path.of("/tmp"), 16);
var searcher = file.createSearcher(); var searcher = file.createSearcher();
for (int i = 0; i < 32; i++) { for (int i = 0; i < 32; i++) {
@ -109,13 +109,13 @@ class MultimapFileTest {
for (int i = 2+1; i < 16; i++) { for (int i = 2+1; i < 16; i++) {
assertTrue(file.get(i) > file.get(i-1)); assertTrue(file.get(i) > file.get(i-1));
assertTrue(searcher.binarySearch(file.get(i), 2, 18)); assertTrue(searcher.binarySearchTest(file.get(i), 2, 16));
} }
} }
@Test @Test
void sortExternal() throws IOException { void sortExternal() throws IOException {
var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8, false); var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8);
var sorter = file.createSorter(Path.of("/tmp"), 2); var sorter = file.createSorter(Path.of("/tmp"), 2);
var searcher = file.createSearcher(); var searcher = file.createSearcher();
@ -128,7 +128,7 @@ class MultimapFileTest {
for (int i = 2+1; i < 16; i++) { for (int i = 2+1; i < 16; i++) {
assertTrue(file.get(i) > file.get(i-1)); assertTrue(file.get(i) > file.get(i-1));
assertTrue(searcher.binarySearch(file.get(i), 2, 18)); assertTrue(searcher.binarySearchTest(file.get(i), 2, 16));
} }
} }