Refactoring BTreeReader and binary search code

This commit is contained in:
vlofgren 2022-06-20 12:25:34 +02:00
parent c324c80efc
commit b1eff0107c
2 changed files with 33 additions and 35 deletions

View File

@ -4,16 +4,15 @@ import nu.marginalia.util.btree.model.BTreeContext;
import nu.marginalia.util.btree.model.BTreeHeader; import nu.marginalia.util.btree.model.BTreeHeader;
import nu.marginalia.util.multimap.MultimapFileLong; import nu.marginalia.util.multimap.MultimapFileLong;
import nu.marginalia.util.multimap.MultimapSearcher; import nu.marginalia.util.multimap.MultimapSearcher;
import org.slf4j.Logger; import org.jetbrains.annotations.Nullable;
import org.slf4j.LoggerFactory;
import static java.lang.Math.min;
public class BTreeReader { public class BTreeReader {
private final MultimapFileLong file; private final MultimapFileLong file;
private final BTreeContext ctx; private final BTreeContext ctx;
private final Logger logger = LoggerFactory.getLogger(BTreeReader.class);
private final MultimapSearcher indexSearcher; private final MultimapSearcher indexSearcher;
private final MultimapSearcher dataSearcher; private final MultimapSearcher dataSearcher;
@ -35,40 +34,42 @@ public class BTreeReader {
*/ */
public long findEntry(BTreeHeader header, final long keyRaw) { public long findEntry(BTreeHeader header, final long keyRaw) {
final long key = keyRaw & ctx.equalityMask(); final long key = keyRaw & ctx.equalityMask();
final long dataAddress = header.dataOffsetLongs();
final int entrySize = ctx.entrySize();
final int blockSize = ctx.BLOCK_SIZE_WORDS(); final int blockSize = ctx.BLOCK_SIZE_WORDS();
final long dataAddress = header.dataOffsetLongs();
if (header.layers() == 0) { // For small data, we only have a data block if (header.layers() == 0) { // For small data, we only have a data block
return dataSearcher.binarySearch(key, dataAddress, header.numEntries()); return dataSearcher.binarySearch(key, dataAddress, header.numEntries());
} }
final long indexOffset = header.indexOffsetLongs(); // Search index layers
long dataLayerOffset = searchIndex(header, key);
// Search the top layer if (dataLayerOffset < 0) {
long layerOffset = indexSearch(key, indexOffset, blockSize); return dataLayerOffset;
if (layerOffset < 0) return -1;
// Search intermediary layers
for (int i = header.layers() - 2; i >= 0; --i) {
final long layerAddressBase = indexOffset + header.relativeIndexLayerOffset(ctx, i);
final long layerBlockOffset = layerAddressBase + layerOffset;
final long nextLayerOffset = indexSearch(key, layerBlockOffset, blockSize);
if (nextLayerOffset < 0)
return -1;
layerOffset = blockSize*(nextLayerOffset + layerOffset);
} }
// Search the corresponding data block // Search the corresponding data block
final long searchStart = dataAddress + layerOffset * entrySize; final long searchStart = dataAddress + dataLayerOffset * ctx.entrySize();
final long lastDataAddress = dataAddress + (long) header.numEntries() * entrySize; final long numEntries = min(header.numEntries() - dataLayerOffset, blockSize);
final long lastItemInBlockAddress = searchStart + (long) blockSize * entrySize;
final long searchEnd = Math.min(lastItemInBlockAddress, lastDataAddress);
return dataSearcher.binarySearch(key, searchStart, (searchEnd - searchStart) / entrySize); return dataSearcher.binarySearch(key, searchStart, numEntries);
}
private long searchIndex(BTreeHeader header, long key) {
final int blockSize = ctx.BLOCK_SIZE_WORDS();
final long indexAddress = header.indexOffsetLongs();
long layerOffset = 0;
for (int i = header.layers() - 1; i >= 0; --i) {
final long layerBlockOffset = header.relativeIndexLayerOffset(ctx, i) + layerOffset;
final long nextLayerOffset = indexSearch(key, indexAddress + layerBlockOffset, blockSize);
if (nextLayerOffset < 0)
return -1;
layerOffset = blockSize *(nextLayerOffset + layerOffset);
}
return layerOffset;
} }
private long indexSearch(long key, long start, long n) { private long indexSearch(long key, long start, long n) {

View File

@ -36,16 +36,13 @@ public record BTreeContext(int MAX_LAYERS,
} }
public long indexLayerSize(int numWords, int level) { public long indexLayerSize(int numWords, int level) {
final long layerSize = 1L<<(BLOCK_SIZE_BITS*(level+1));
final long numBlocks = numWords / layerSize;
long layerSize = 1L<<(BLOCK_SIZE_BITS*(level+1));
int numBlocks = 0;
numBlocks += numWords / layerSize;
if (numWords % layerSize != 0) { if (numWords % layerSize != 0) {
numBlocks++; return BLOCK_SIZE_WORDS * (numBlocks + 1);
} }
return BLOCK_SIZE_WORDS * numBlocks;
return (long) BLOCK_SIZE_WORDS * numBlocks;
} }
} }