mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Refactoring BTreeReader and binary search code
This commit is contained in:
parent
c324c80efc
commit
b1eff0107c
@ -4,16 +4,15 @@ import nu.marginalia.util.btree.model.BTreeContext;
|
|||||||
import nu.marginalia.util.btree.model.BTreeHeader;
|
import nu.marginalia.util.btree.model.BTreeHeader;
|
||||||
import nu.marginalia.util.multimap.MultimapFileLong;
|
import nu.marginalia.util.multimap.MultimapFileLong;
|
||||||
import nu.marginalia.util.multimap.MultimapSearcher;
|
import nu.marginalia.util.multimap.MultimapSearcher;
|
||||||
import org.slf4j.Logger;
|
import org.jetbrains.annotations.Nullable;
|
||||||
import org.slf4j.LoggerFactory;
|
|
||||||
|
import static java.lang.Math.min;
|
||||||
|
|
||||||
public class BTreeReader {
|
public class BTreeReader {
|
||||||
|
|
||||||
private final MultimapFileLong file;
|
private final MultimapFileLong file;
|
||||||
private final BTreeContext ctx;
|
private final BTreeContext ctx;
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(BTreeReader.class);
|
|
||||||
|
|
||||||
private final MultimapSearcher indexSearcher;
|
private final MultimapSearcher indexSearcher;
|
||||||
private final MultimapSearcher dataSearcher;
|
private final MultimapSearcher dataSearcher;
|
||||||
|
|
||||||
@ -35,40 +34,42 @@ public class BTreeReader {
|
|||||||
*/
|
*/
|
||||||
public long findEntry(BTreeHeader header, final long keyRaw) {
|
public long findEntry(BTreeHeader header, final long keyRaw) {
|
||||||
final long key = keyRaw & ctx.equalityMask();
|
final long key = keyRaw & ctx.equalityMask();
|
||||||
|
|
||||||
final long dataAddress = header.dataOffsetLongs();
|
|
||||||
final int entrySize = ctx.entrySize();
|
|
||||||
final int blockSize = ctx.BLOCK_SIZE_WORDS();
|
final int blockSize = ctx.BLOCK_SIZE_WORDS();
|
||||||
|
final long dataAddress = header.dataOffsetLongs();
|
||||||
|
|
||||||
if (header.layers() == 0) { // For small data, we only have a data block
|
if (header.layers() == 0) { // For small data, we only have a data block
|
||||||
return dataSearcher.binarySearch(key, dataAddress, header.numEntries());
|
return dataSearcher.binarySearch(key, dataAddress, header.numEntries());
|
||||||
}
|
}
|
||||||
|
|
||||||
final long indexOffset = header.indexOffsetLongs();
|
// Search index layers
|
||||||
|
long dataLayerOffset = searchIndex(header, key);
|
||||||
// Search the top layer
|
if (dataLayerOffset < 0) {
|
||||||
long layerOffset = indexSearch(key, indexOffset, blockSize);
|
return dataLayerOffset;
|
||||||
if (layerOffset < 0) return -1;
|
|
||||||
|
|
||||||
// Search intermediary layers
|
|
||||||
for (int i = header.layers() - 2; i >= 0; --i) {
|
|
||||||
final long layerAddressBase = indexOffset + header.relativeIndexLayerOffset(ctx, i);
|
|
||||||
final long layerBlockOffset = layerAddressBase + layerOffset;
|
|
||||||
|
|
||||||
final long nextLayerOffset = indexSearch(key, layerBlockOffset, blockSize);
|
|
||||||
if (nextLayerOffset < 0)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
layerOffset = blockSize*(nextLayerOffset + layerOffset);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Search the corresponding data block
|
// Search the corresponding data block
|
||||||
final long searchStart = dataAddress + layerOffset * entrySize;
|
final long searchStart = dataAddress + dataLayerOffset * ctx.entrySize();
|
||||||
final long lastDataAddress = dataAddress + (long) header.numEntries() * entrySize;
|
final long numEntries = min(header.numEntries() - dataLayerOffset, blockSize);
|
||||||
final long lastItemInBlockAddress = searchStart + (long) blockSize * entrySize;
|
|
||||||
final long searchEnd = Math.min(lastItemInBlockAddress, lastDataAddress);
|
|
||||||
|
|
||||||
return dataSearcher.binarySearch(key, searchStart, (searchEnd - searchStart) / entrySize);
|
return dataSearcher.binarySearch(key, searchStart, numEntries);
|
||||||
|
}
|
||||||
|
|
||||||
|
private long searchIndex(BTreeHeader header, long key) {
|
||||||
|
final int blockSize = ctx.BLOCK_SIZE_WORDS();
|
||||||
|
final long indexAddress = header.indexOffsetLongs();
|
||||||
|
|
||||||
|
long layerOffset = 0;
|
||||||
|
|
||||||
|
for (int i = header.layers() - 1; i >= 0; --i) {
|
||||||
|
final long layerBlockOffset = header.relativeIndexLayerOffset(ctx, i) + layerOffset;
|
||||||
|
|
||||||
|
final long nextLayerOffset = indexSearch(key, indexAddress + layerBlockOffset, blockSize);
|
||||||
|
if (nextLayerOffset < 0)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
layerOffset = blockSize *(nextLayerOffset + layerOffset);
|
||||||
|
}
|
||||||
|
return layerOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
private long indexSearch(long key, long start, long n) {
|
private long indexSearch(long key, long start, long n) {
|
||||||
|
@ -36,16 +36,13 @@ public record BTreeContext(int MAX_LAYERS,
|
|||||||
}
|
}
|
||||||
|
|
||||||
public long indexLayerSize(int numWords, int level) {
|
public long indexLayerSize(int numWords, int level) {
|
||||||
|
final long layerSize = 1L<<(BLOCK_SIZE_BITS*(level+1));
|
||||||
|
final long numBlocks = numWords / layerSize;
|
||||||
|
|
||||||
long layerSize = 1L<<(BLOCK_SIZE_BITS*(level+1));
|
|
||||||
int numBlocks = 0;
|
|
||||||
|
|
||||||
numBlocks += numWords / layerSize;
|
|
||||||
if (numWords % layerSize != 0) {
|
if (numWords % layerSize != 0) {
|
||||||
numBlocks++;
|
return BLOCK_SIZE_WORDS * (numBlocks + 1);
|
||||||
}
|
}
|
||||||
|
return BLOCK_SIZE_WORDS * numBlocks;
|
||||||
return (long) BLOCK_SIZE_WORDS * numBlocks;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user