(btree) Roll back optimization of queryDataWithIndex

It had been previously assumed that re-writing this function in the style of retain() would make it faster, but it had the opposite effect.

The reason why retain is so fast due to properties of the data that hold true when intersecting document lists, where long runs of adjacent documents are expected, but not when looking up the data associated with the already intersected documents, where the data is more sparse.
This commit is contained in:
Viktor Lofgren 2024-05-19 11:29:28 +02:00
parent 88997a1c4f
commit daf2a8df54
2 changed files with 23 additions and 58 deletions

View File

@ -5,8 +5,6 @@ import nu.marginalia.array.page.LongQueryBuffer;
import nu.marginalia.btree.model.BTreeContext;
import nu.marginalia.btree.model.BTreeHeader;
import java.util.Arrays;
import static java.lang.Math.min;
public class BTreeReader {
@ -149,13 +147,20 @@ public class BTreeReader {
private long[] queryDataWithIndex(long[] keys, int offset) {
BTreePointer pointer = new BTreePointer(header);
long[] ret = Arrays.copyOf(keys, keys.length);
long[] ret = new long[keys.length];
for (int i = 0; i < keys.length;) {
for (int i = 0; i < keys.length; i++) {
pointer.walkToData(keys[i]);
i = pointer.extractData(ret, i, offset);
long dataAddress = pointer.findData(keys[i]);
if (dataAddress >= 0) {
ret[i] = data.get(dataAddress + offset);
}
pointer.resetToRoot();
}
return ret;
}
@ -239,46 +244,6 @@ public class BTreeReader {
}
}
public int extractData(long[] input, int idx, int offset) {
long dataOffset = findData(input[idx]);
if (dataOffset >= 0) {
input[idx++] = data.get(dataOffset + offset);
if (idx < input.length && input[idx] < maxValueInBlock) {
long relOffsetInBlock = dataOffset - pointerOffset * ctx.entrySize;
long remainingTotal = dataBlockEnd - dataOffset;
long remainingBlock = ctx.pageSize() - relOffsetInBlock; // >= 0
long searchEnd = dataOffset + min(remainingTotal, remainingBlock);
while (dataOffset < searchEnd
&& idx < input.length
&& input[idx] <= maxValueInBlock)
{
long value = data.get(dataOffset);
if (value == input[idx]) {
input[idx++] = data.get(dataOffset + offset);
dataOffset += ctx.entrySize;
}
else if (value > input[idx]) {
input[idx++] = 0;
}
else if (value < input[idx]) {
dataOffset += ctx.entrySize;
}
}
}
}
else {
input[idx++] = 0;
}
return idx;
}
/** Retain any data entry matching the current key
* in the buffer within the current data block.
* <p></p>
@ -287,19 +252,19 @@ public class BTreeReader {
* */
public void retainData(LongQueryBuffer buffer) {
long dataOffset = findData(buffer.currentValue());
if (dataOffset >= 0) {
long dataIndex = findData(buffer.currentValue());
if (dataIndex >= 0) {
buffer.retainAndAdvance();
if (buffer.hasMore() && buffer.currentValue() <= maxValueInBlock) {
long relOffsetInBlock = dataOffset - pointerOffset * ctx.entrySize;
long relOffsetInBlock = dataIndex - pointerOffset * ctx.entrySize;
long remainingTotal = dataBlockEnd - dataOffset;
long remainingTotal = dataBlockEnd - dataIndex;
long remainingBlock = ctx.pageSize() - relOffsetInBlock; // >= 0
long searchEnd = dataOffset + min(remainingTotal, remainingBlock);
long searchEnd = dataIndex + min(remainingTotal, remainingBlock);
data.retainN(buffer, ctx.entrySize, maxValueInBlock, dataOffset, searchEnd);
data.retainN(buffer, ctx.entrySize, maxValueInBlock, dataIndex, searchEnd);
}
}
else {
@ -315,19 +280,19 @@ public class BTreeReader {
* */
public void rejectData(LongQueryBuffer buffer) {
long dataOffset = findData(buffer.currentValue());
if (dataOffset >= 0) {
long dataIndex = findData(buffer.currentValue());
if (dataIndex >= 0) {
buffer.rejectAndAdvance();
if (buffer.hasMore() && buffer.currentValue() <= maxValueInBlock) {
long relOffsetInBlock = dataOffset - pointerOffset * ctx.entrySize;
long relOffsetInBlock = dataIndex - pointerOffset * ctx.entrySize;
long remainingTotal = dataBlockEnd - dataOffset;
long remainingTotal = dataBlockEnd - dataIndex;
long remainingBlock = ctx.pageSize() - relOffsetInBlock; // >= 0
long searchEnd = dataOffset + min(remainingTotal, remainingBlock);
long searchEnd = dataIndex + min(remainingTotal, remainingBlock);
data.rejectN(buffer, ctx.entrySize, maxValueInBlock, dataOffset, searchEnd);
data.rejectN(buffer, ctx.entrySize, maxValueInBlock, dataIndex, searchEnd);
}
}
else {

View File

@ -11,7 +11,7 @@ import java.io.IOException;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
public class BTreeReaderQueryDataWithIndexTest {
BTreeContext ctx = new BTreeContext(5, 2, BTreeBlockSize.BS_32);
BTreeContext ctx = new BTreeContext(5, 2, BTreeBlockSize.BS_64);
LongArray array;
@BeforeEach