(array) Clean up search function jungle

Retire search functions that weren't used, including the native implementations.  Drop confusing suffixes on search function names.  Search functions no longer encode search misses as negative values.

Replaced binary search function with a branchless version that is much faster.

Cleaned up benchmark code.
This commit is contained in:
Viktor Lofgren 2024-05-17 14:30:06 +02:00
parent 48aff52e00
commit 9e766bc056
16 changed files with 263 additions and 653 deletions

View File

@ -28,10 +28,6 @@ import static java.lang.foreign.ValueLayout.JAVA_LONG;
public class NativeAlgos { public class NativeAlgos {
private final MethodHandle qsortHandle; private final MethodHandle qsortHandle;
private final MethodHandle qsort128Handle; private final MethodHandle qsort128Handle;
private final MethodHandle linearSearch64Handle;
private final MethodHandle linearSearch128Handle;
private final MethodHandle binarySearch128Handle;
private final MethodHandle binarySearch64UpperHandle;
public static final NativeAlgos instance; public static final NativeAlgos instance;
@ -51,22 +47,6 @@ public class NativeAlgos {
handle = libraryLookup.find("ms_sort_128").get(); handle = libraryLookup.find("ms_sort_128").get();
qsort128Handle = nativeLinker.downcallHandle(handle, qsort128Handle = nativeLinker.downcallHandle(handle,
FunctionDescriptor.ofVoid(ADDRESS, JAVA_LONG, JAVA_LONG)); FunctionDescriptor.ofVoid(ADDRESS, JAVA_LONG, JAVA_LONG));
handle = libraryLookup.find("ms_linear_search_64").get();
linearSearch64Handle = nativeLinker.downcallHandle(handle,
FunctionDescriptor.of(JAVA_LONG, JAVA_LONG, ADDRESS, JAVA_LONG, JAVA_LONG));
handle = libraryLookup.find("ms_linear_search_128").get();
linearSearch128Handle = nativeLinker.downcallHandle(handle,
FunctionDescriptor.of(JAVA_LONG, JAVA_LONG, ADDRESS, JAVA_LONG, JAVA_LONG));
handle = libraryLookup.find("ms_binary_search_128").get();
binarySearch128Handle = nativeLinker.downcallHandle(handle,
FunctionDescriptor.of(JAVA_LONG, JAVA_LONG, ADDRESS, JAVA_LONG, JAVA_LONG));
handle = libraryLookup.find("ms_binary_search_64upper").get();
binarySearch64UpperHandle = nativeLinker.downcallHandle(handle,
FunctionDescriptor.of(JAVA_LONG, JAVA_LONG, ADDRESS, JAVA_LONG, JAVA_LONG));
} }
static { static {
@ -114,39 +94,4 @@ public class NativeAlgos {
} }
} }
public static long linearSearch64(long key, MemorySegment ms, long start, long end) {
try {
return (long) instance.linearSearch64Handle.invoke(key, ms, start, end);
}
catch (Throwable t) {
throw new RuntimeException("Failed to invoke native function", t);
}
}
public static long linearSearch128(long key, MemorySegment ms, long start, long end) {
try {
return (long) instance.linearSearch128Handle.invoke(key, ms, start, end);
}
catch (Throwable t) {
throw new RuntimeException("Failed to invoke native function", t);
}
}
public static long binarySearch128(long key, MemorySegment ms, long start, long end) {
try {
return (long) instance.binarySearch128Handle.invoke(key, ms, start, end);
}
catch (Throwable t) {
throw new RuntimeException("Failed to invoke native function", t);
}
}
public static long binarySearch64Upper(long key, MemorySegment ms, long start, long end) {
try {
return (long) instance.binarySearch64UpperHandle.invoke(key, ms, start, end);
}
catch (Throwable t) {
throw new RuntimeException("Failed to invoke native function", t);
}
}
} }

View File

@ -28,87 +28,4 @@ void ms_sort_128(int64_t* area, uint64_t start, uint64_t end) {
[](const p64x2& fst, const p64x2& snd) { [](const p64x2& fst, const p64x2& snd) {
return fst.a < snd.a; return fst.a < snd.a;
}); });
} }
inline int64_t encodeSearchMiss64(int64_t value) {
return -1 - std::max(int64_t(0), value);
}
inline int64_t encodeSearchMiss128(int64_t value) {
return -2 - std::max(int64_t(0), value);
}
int64_t ms_linear_search_64(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex) {
uint64_t pos = fromIndex;
for (; pos < toIndex; pos++) {
int64_t val = area[pos];
if (val == key) return pos;
if (val > key) break;
}
return encodeSearchMiss64(pos - 1);
}
int64_t ms_linear_search_128(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex) {
uint64_t pos = fromIndex;
for (; pos < toIndex; pos+=2) {
int64_t val = area[pos];
if (val == key) return pos;
if (val > key) break;
}
return encodeSearchMiss128(pos - 2);
}
int64_t ms_binary_search_128(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex) {
int64_t low = 0;
int64_t high = (toIndex - fromIndex) / 2 - 1;
while (high - low >= 32) {
int64_t mid = low + (high - low) / 2;
int64_t midVal = area[fromIndex + mid * 2];
if (midVal < key) {
low = mid + 1;
} else if (midVal > key) {
high = mid - 1;
} else {
return fromIndex + mid * 2;
}
}
for (fromIndex += low * 2; fromIndex < toIndex; fromIndex+=2) {
int64_t val = area[fromIndex];
if (val == key) return fromIndex;
if (val > key) return encodeSearchMiss128(fromIndex);
}
return encodeSearchMiss128(toIndex - 2);
}
int64_t ms_binary_search_64upper(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex) {
int64_t low = 0;
int64_t high = toIndex - fromIndex - 1;
while (high - low > 32) {
int64_t mid = low + (high - low) / 2;
int64_t midVal = area[fromIndex + mid];
if (midVal < key) {
low = mid + 1;
} else if (midVal > key) {
high = mid - 1;
} else {
return fromIndex + mid;
}
}
for (fromIndex += low; fromIndex < toIndex; fromIndex++) {
if (area[fromIndex] >= key) return fromIndex;
}
return toIndex;
}

View File

@ -5,10 +5,4 @@
extern "C" { extern "C" {
void ms_sort_64(int64_t* area, uint64_t start, uint64_t end); void ms_sort_64(int64_t* area, uint64_t start, uint64_t end);
void ms_sort_128(int64_t* area, uint64_t start, uint64_t end); void ms_sort_128(int64_t* area, uint64_t start, uint64_t end);
int64_t ms_linear_search_64(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex);
int64_t ms_linear_search_128(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex);
int64_t ms_binary_search_128(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex);
int64_t ms_binary_search_64upper(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex);
} }

View File

@ -16,7 +16,7 @@ public interface IntArraySearch extends IntArrayBase {
if (val > key) break; if (val > key) break;
} }
return LongArraySearch.encodeSearchMiss(1, pos - 1); return encodeSearchMiss(1, pos - 1);
} }
default long binarySearch(int key, long fromIndex, long toIndex) { default long binarySearch(int key, long fromIndex, long toIndex) {
@ -119,4 +119,9 @@ public interface IntArraySearch extends IntArrayBase {
} }
} }
static long encodeSearchMiss(int entrySize, long value) {
return -entrySize - Math.max(0, value);
}
} }

View File

@ -29,10 +29,6 @@ public interface LongArrayBase extends BulkTransferArray<LongBuffer> {
void quickSortNative(long start, long end); void quickSortNative(long start, long end);
void quickSortNative128(long start, long end); void quickSortNative128(long start, long end);
long linearSearchNative(long key, long start, long end);
long linearSearchNative128(long key, long start, long end);
long binarySearchNativeUB(long key, long start, long end);
long binarySearchNative128(long key, long start, long end);
default void increment(long pos) { default void increment(long pos) {
set(pos, get(pos) + 1); set(pos, get(pos) + 1);
} }

View File

@ -1,140 +1,41 @@
package nu.marginalia.array.algo; package nu.marginalia.array.algo;
import nu.marginalia.NativeAlgos;
import nu.marginalia.array.buffer.LongQueryBuffer; import nu.marginalia.array.buffer.LongQueryBuffer;
public interface LongArraySearch extends LongArrayBase { public interface LongArraySearch extends LongArrayBase {
int LINEAR_SEARCH_CUTOFF = 32;
default long linearSearch(long key, long fromIndex, long toIndex) {
if (NativeAlgos.isAvailable) {
return linearSearchNative(key, fromIndex, toIndex);
} else {
return linearSearchJava(key, fromIndex, toIndex);
}
}
default long linearSearchN(int sz, long key, long fromIndex, long toIndex) {
if (NativeAlgos.isAvailable && sz == 2) {
return linearSearchNative128(key, fromIndex, toIndex);
} else {
return linearSearchNJava(sz, key, fromIndex, toIndex);
}
}
default long binarySearchUpperBound(long key, long fromIndex, long toIndex) {
if (NativeAlgos.isAvailable) {
return binarySearchNativeUB(key, fromIndex, toIndex);
} else {
return binarySearchUpperBoundJava(key, fromIndex, toIndex);
}
}
default long binarySearchN(int sz, long key, long fromIndex, long toIndex) {
if (NativeAlgos.isAvailable && sz == 2) {
return binarySearchNative128(key, fromIndex, toIndex);
} else {
return binarySearchNJava(sz, key, fromIndex, toIndex);
}
}
default long linearSearchJava(long key, long fromIndex, long toIndex) {
long pos;
for (pos = fromIndex; pos < toIndex; pos++) {
long val = get(pos);
if (val == key) return pos;
if (val > key) break;
}
return encodeSearchMiss(1, pos - 1);
}
default long linearSearchNJava(int sz, long key, long fromIndex, long toIndex) {
long pos;
for (pos = fromIndex; pos < toIndex; pos+=sz) {
long val = get(pos);
if (val == key) return pos;
if (val > key) return encodeSearchMiss(sz, pos);
}
return encodeSearchMiss(sz, toIndex - sz);
}
default long binarySearch(long key, long fromIndex, long toIndex) { default long binarySearch(long key, long fromIndex, long toIndex) {
long low = 0; long low = 0;
long high = (toIndex - fromIndex) - 1; long high = (toIndex - fromIndex) - 1;
long len = high - low;
while (high - low >= LINEAR_SEARCH_CUTOFF) { while (len > 0) {
long mid = (low + high) >>> 1; var half = len / 2;
long midVal = get(fromIndex + mid); if (get(fromIndex + low + half) < key) {
low += len - half;
if (midVal < key) }
low = mid + 1; len = half;
else if (midVal > key)
high = mid - 1;
else
return fromIndex + mid;
} }
return linearSearch(key, fromIndex + low, fromIndex + high + 1); return fromIndex + low;
} }
default long binarySearchNJava(int sz, long key, long fromIndex, long toIndex) { default long binarySearchN(int sz, long key, long fromIndex, long toIndex) {
long low = 0; long low = 0;
long high = (toIndex - fromIndex)/sz - 1; long high = (toIndex - fromIndex)/sz - 1;
long len = high - low;
while (high - low >= LINEAR_SEARCH_CUTOFF) { while (len > 0) {
long mid = (low + high) >>> 1; var half = len / 2;
long midVal = get(fromIndex + sz*mid); if (get(fromIndex + sz * (low + half)) < key) {
low += len - half;
if (midVal < key) }
low = mid + 1; len = half;
else if (midVal > key)
high = mid - 1;
else
return fromIndex + sz*mid;
} }
for (fromIndex += low*sz; fromIndex < toIndex; fromIndex+=sz) { return fromIndex + sz * low;
long val = get(fromIndex);
if (val == key) return fromIndex;
if (val > key) return encodeSearchMiss(sz, fromIndex);
}
return encodeSearchMiss(sz, toIndex - sz);
} }
default long binarySearchUpperBoundJava(long key, long fromIndex, long toIndex) {
long low = 0;
long high = (toIndex - fromIndex) - 1;
while (high - low >= LINEAR_SEARCH_CUTOFF) {
long mid = (low + high) >>> 1;
long midVal = get(fromIndex + mid);
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return fromIndex + mid;
}
for (fromIndex += low; fromIndex < toIndex; fromIndex++) {
if (get(fromIndex) >= key) return fromIndex;
}
return toIndex;
}
default void retain(LongQueryBuffer buffer, long boundary, long searchStart, long searchEnd) { default void retain(LongQueryBuffer buffer, long boundary, long searchStart, long searchEnd) {
if (searchStart >= searchEnd) return; if (searchStart >= searchEnd) return;
@ -255,11 +156,5 @@ public interface LongArraySearch extends LongArrayBase {
} }
static long encodeSearchMiss(int entrySize, long value) {
return -entrySize - Math.max(0, value);
}
static long decodeSearchMiss(int entrySize, long value) {
return -value - entrySize;
}
} }

View File

@ -107,10 +107,6 @@ class SortAlgoQuickSort {
long pivot = array.get(pivotPoint); long pivot = array.get(pivotPoint);
assert (pivotPoint - low) >= 0;
assert (pivotPoint - low) % wordSize == 0;
long i = low - wordSize; long i = low - wordSize;
long j = high + wordSize; long j = high + wordSize;

View File

@ -3,7 +3,6 @@ package nu.marginalia.array.delegate;
import nu.marginalia.array.ArrayRangeReference; import nu.marginalia.array.ArrayRangeReference;
import nu.marginalia.array.LongArray; import nu.marginalia.array.LongArray;
import nu.marginalia.array.algo.LongArraySearch; import nu.marginalia.array.algo.LongArraySearch;
import nu.marginalia.array.algo.SortingContext;
import nu.marginalia.array.buffer.LongQueryBuffer; import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.functional.*; import nu.marginalia.array.functional.*;
@ -84,26 +83,6 @@ public class ShiftedLongArray implements LongArray {
delegate.quickSortNative128(start, end); delegate.quickSortNative128(start, end);
} }
@Override
public long linearSearchNative(long key, long start, long end) {
return delegate.linearSearchNative(key, start + shift, end + shift);
}
@Override
public long linearSearchNative128(long key, long start, long end) {
return delegate.linearSearchNative128(key, start, end);
}
@Override
public long binarySearchNativeUB(long key, long start, long end) {
return delegate.binarySearchNativeUB(key, start + shift, end + shift);
}
@Override
public long binarySearchNative128(long key, long start, long end) {
return delegate.binarySearchNative128(key, start, end);
}
@Override @Override
public long size() { public long size() {
return size; return size;
@ -162,81 +141,24 @@ public class ShiftedLongArray implements LongArray {
} }
public long searchN(int sz, long key) {
if (size < 128) {
return linearSearchN(sz, key);
}
else {
return binarySearchN(sz, key);
}
}
public long search(long key) { public long search(long key) {
if (size < 128) {
return linearSearch(key);
}
else {
return binarySearch(key);
}
}
public long linearSearch(long key) {
return linearSearch(key, 0, size);
}
public long binarySearch(long key) {
return binarySearch(key, 0, size); return binarySearch(key, 0, size);
} }
public long binarySearchN(int sz, long key) {
return binarySearchN(sz, key, 0, size);
}
public long linearSearchN(int sz, long key) {
return linearSearchN(sz, key, 0, size);
}
public void retain(LongQueryBuffer buffer, long boundary) { public void retain(LongQueryBuffer buffer, long boundary) {
retain(buffer, boundary, 0, size); retain(buffer, boundary, 0, size);
} }
public void retainN(LongQueryBuffer buffer, int sz, long boundary) {
if (sz == 1)
retain(buffer, boundary, 0, size);
else
retainN(buffer, sz, boundary, 0, size);
}
public void reject(LongQueryBuffer buffer, long boundary) { public void reject(LongQueryBuffer buffer, long boundary) {
reject(buffer, boundary, 0, size); reject(buffer, boundary, 0, size);
} }
public void rejectN(LongQueryBuffer buffer, int sz, long boundary) {
if (sz == 1)
reject(buffer, boundary, 0, size);
else
rejectN(buffer, sz, boundary, 0, size);
}
@Override @Override
public long linearSearch(long key, long fromIndex, long toIndex) { public long binarySearchN(int sz, long key, long fromIndex, long toIndex) {
return translateSearchResult(1, delegate.linearSearch(key, fromIndex + shift, toIndex+shift)); return delegate.binarySearchN(sz, key, fromIndex + shift, toIndex+shift) - shift;
}
@Override
public long linearSearchN(int sz, long key, long fromIndex, long toIndex) {
return translateSearchResult(sz, delegate.linearSearch(key, fromIndex + shift, toIndex+shift));
} }
@Override @Override
public long binarySearch(long key, long fromIndex, long toIndex) { public long binarySearch(long key, long fromIndex, long toIndex) {
return translateSearchResult(1, delegate.binarySearch(key, fromIndex + shift, toIndex+shift)); return delegate.binarySearch(key, fromIndex + shift, toIndex+shift) - shift;
}
@Override
public long binarySearchN(int sz, long key, long fromIndex, long toIndex) {
return translateSearchResult(sz, delegate.binarySearchN(sz, key, fromIndex + shift, toIndex+shift));
}
@Override
public long binarySearchUpperBound(long key, long fromIndex, long toIndex) {
return translateSearchResult(1, delegate.binarySearchUpperBound(key, fromIndex + shift, toIndex+shift));
} }
private long translateSearchResult(int sz, long delegatedIdx) { private long translateSearchResult(int sz, long delegatedIdx) {

View File

@ -1,113 +0,0 @@
package nu.marginalia.array.page;
import nu.marginalia.array.algo.BulkTransferArray;
import nu.marginalia.array.functional.AddressRangeCall;
import nu.marginalia.array.functional.AddressRangeCallIO;
import nu.marginalia.array.scheme.ArrayPartitioningScheme;
import java.io.IOException;
import static nu.marginalia.array.algo.LongArraySearch.decodeSearchMiss;
import static nu.marginalia.array.algo.LongArraySearch.encodeSearchMiss;
public class AbstractPagingArray<T extends BulkTransferArray<B>, B> {
final T[] pages;
final long size;
final ArrayPartitioningScheme partitioningScheme;
public AbstractPagingArray(ArrayPartitioningScheme partitioningScheme, T[] pages, long size) {
this.partitioningScheme = partitioningScheme;
this.pages = pages;
this.size = size;
}
void delegateToEachPage(long start, long end, AddressRangeCall<T> fn) {
assert end >= start;
int page = partitioningScheme.getPage(start);
long endPos;
for (long pos = start; pos < end; pos = endPos) {
endPos = partitioningScheme.getPageEnd(pos, end);
int sOff = partitioningScheme.getOffset(pos);
int eOff = partitioningScheme.getEndOffset(start, endPos);
fn.apply(pages[page++], sOff, eOff);
}
}
void delegateToEachPageIO(long start, long end, AddressRangeCallIO<T> fn) throws IOException {
assert end >= start;
int page = partitioningScheme.getPage(start);
long endPos;
for (long pos = start; pos < end; pos = endPos) {
endPos = partitioningScheme.getPageEnd(pos, end);
int sOff = partitioningScheme.getOffset(pos);
int eOff = partitioningScheme.getEndOffset(start, endPos);
fn.apply(pages[page++], sOff, eOff);
}
}
long translateSearchResultsFromPage(long fromIndex, long ret) {
int page = partitioningScheme.getPage(fromIndex);
if (ret >= 0) {
return partitioningScheme.toRealIndex(page, (int) ret);
} else {
ret = decodeSearchMiss(1, ret);
ret = partitioningScheme.toRealIndex(page, (int) ret);
return encodeSearchMiss(1, ret);
}
}
public void set(long start, long end, B buffer, int bufferStart) {
assert end >= start;
int page = partitioningScheme.getPage(start);
long endPos;
for (long pos = start; pos < end; pos = endPos) {
endPos = partitioningScheme.getPageEnd(pos, end);
int sOff = partitioningScheme.getOffset(pos);
int eOff = partitioningScheme.getEndOffset(start, endPos);
pages[page++].set(sOff, eOff, buffer, bufferStart);
bufferStart += eOff - sOff;
}
}
public void get(long start, long end, B buffer, int bufferStart) {
assert end >= start;
int page = partitioningScheme.getPage(start);
long endPos;
for (long pos = start; pos < end; pos = endPos) {
endPos = partitioningScheme.getPageEnd(pos, end);
int sOff = partitioningScheme.getOffset(pos);
int eOff = partitioningScheme.getEndOffset(start, endPos);
pages[page++].get(sOff, eOff, buffer, bufferStart);
bufferStart += eOff - sOff;
}
}
public void close() {
for (var page : pages) {
page.close();
}
}
}

View File

@ -135,27 +135,6 @@ public class SegmentLongArray implements PartitionPage, LongArray {
NativeAlgos.sort128(segment, start, end); NativeAlgos.sort128(segment, start, end);
} }
@Override
public long linearSearchNative(long key, long start, long end) {
return NativeAlgos.linearSearch64(key, segment, start, end);
}
@Override
public long linearSearchNative128(long key, long start, long end) {
return NativeAlgos.linearSearch128(key, segment, start, end);
}
@Override
public long binarySearchNativeUB(long key, long start, long end) {
return NativeAlgos.binarySearch64Upper(key, segment, start, end);
}
@Override
public long binarySearchNative128(long key, long start, long end) {
return NativeAlgos.binarySearch128(key, segment, start, end);
}
@Override @Override
public ByteBuffer getByteBuffer() { public ByteBuffer getByteBuffer() {
return segment.asByteBuffer(); return segment.asByteBuffer();

View File

@ -284,24 +284,4 @@ public class UnsafeLongArray implements PartitionPage, LongArray {
NativeAlgos.sort128(segment, start, end); NativeAlgos.sort128(segment, start, end);
} }
@Override
public long linearSearchNative(long key, long start, long end) {
return NativeAlgos.linearSearch64(key, segment, start, end);
}
@Override
public long linearSearchNative128(long key, long start, long end) {
return NativeAlgos.linearSearch128(key, segment, start, end);
}
@Override
public long binarySearchNativeUB(long key, long start, long end) {
return NativeAlgos.binarySearch64Upper(key, segment, start, end);
}
@Override
public long binarySearchNative128(long key, long start, long end) {
return NativeAlgos.binarySearch128(key, segment, start, end);
}
} }

View File

@ -1,48 +0,0 @@
package nu.marginalia.array.page;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.LongArrayFactory;
import org.openjdk.jmh.annotations.*;
/** This benchmark simulates the sorting in index creation */
public class QuicksortBenchmark {
@State(Scope.Benchmark)
public static class BenchState {
@Setup(Level.Invocation)
public void doSetup() {
array.transformEach(0, size, (pos,old) -> ~pos);
}
int size = 1024*1024;
int pageSize = 10*1024;
LongArray array = LongArrayFactory.onHeapShared(size);
}
@Fork(value = 5, warmups = 1)
@Warmup(iterations = 5)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public LongArray javaSort(BenchState state) {
var array = state.array;
array.quickSortJava(0, array.size());
return array;
}
@Fork(value = 5, warmups = 1)
@Warmup(iterations = 5)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public LongArray cppSort(BenchState state) {
var array = state.array;
array.quickSortNative(0, array.size());
return array;
}
}

View File

@ -0,0 +1,110 @@
package nu.marginalia.array.page;
import nu.marginalia.array.LongArray;
import org.openjdk.jmh.annotations.*;
import java.lang.foreign.Arena;
import java.util.Random;
/** This benchmark simulates the searching in index querying */
public class SearchBenchmark {
@State(Scope.Benchmark)
public static class SortState {
public SortState()
{
msArray.transformEach(0, size, (pos,old) -> ~pos);
usArray.transformEach(0, size, (pos,old) -> ~pos);
msArray.quickSortJava(0, size);
usArray.quickSortJava(0, size);
keys = new long[1000];
Random r = new Random();
for (int i = 0; i < 1000; i++) {
keys[i] = msArray.get(r.nextInt(0, size));
}
}
int size = 1024*1024;
long[] keys;
LongArray msArray = SegmentLongArray.onHeap(Arena.ofConfined(), size);
LongArray usArray = UnsafeLongArray.onHeap(Arena.ofConfined(), size);
}
@Fork(value = 1, warmups = 1)
@Warmup(iterations = 5)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public long msSort64(SortState state) {
var array = state.usArray;
long ret = 0;
for (var key : state.keys) {
ret += array.binarySearchNJava(2, key, 0, array.size());
}
return ret;
}
@Fork(value = 3, warmups = 5)
@Warmup(iterations = 5)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public long msSort64_2(SortState state) {
var array = state.usArray;
long ret = 0;
for (var key : state.keys) {
ret += array.binarySearchNJava2(2, key, 0, array.size());
}
return ret;
}
@Fork(value = 5, warmups = 5)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public long msSort128(SortState state) {
var array = state.msArray;
long ret = 0;
for (var key : state.keys) {
ret += array.binarySearchNJava(2, 0, array.size(), key);
}
return ret;
}
@Fork(value = 5, warmups = 5)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public long usSort64(SortState state) {
var array = state.usArray;
long ret = 0;
for (var key : state.keys) {
ret += array.binarySearchUpperBoundJava(0, array.size(), key);
}
return ret;
}
@Fork(value = 5, warmups = 5)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public long usSort128(SortState state) {
var array = state.usArray;
long ret = 0;
for (var key : state.keys) {
ret += array.binarySearchNJava(2, 0, array.size(), key);
}
return ret;
}
}

View File

@ -0,0 +1,102 @@
package nu.marginalia.array.page;
import nu.marginalia.array.LongArray;
import org.openjdk.jmh.annotations.*;
import java.lang.foreign.Arena;
/** This benchmark simulates the sorting in index creation */
public class SortBenchmark {
@State(Scope.Benchmark)
public static class BenchState {
@Setup(Level.Invocation)
public void doSetup() {
msArray.transformEach(0, size, (pos,old) -> ~pos);
usArray.transformEach(0, size, (pos,old) -> ~pos);
}
int size = 1024*1024;
LongArray msArray = SegmentLongArray.onHeap(Arena.ofConfined(), size);
LongArray usArray = UnsafeLongArray.onHeap(Arena.ofConfined(), size);
}
@Fork(value = 5, warmups = 5)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public LongArray msSort64(BenchState state) {
var array = state.msArray;
array.quickSortJavaN(2, 0, array.size());
return array;
}
@Fork(value = 5, warmups = 5)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public LongArray msSort128(BenchState state) {
var array = state.msArray;
array.quickSortJavaN(2, 0, array.size());
return array;
}
@Fork(value = 5, warmups = 5)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public LongArray usSort128(BenchState state) {
var array = state.usArray;
array.quickSortJavaN(2, 0, array.size());
return array;
}
@Fork(value = 5, warmups = 5)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public LongArray usSort64(BenchState state) {
var array = state.usArray;
array.quickSortJavaN(2, 0, array.size());
return array;
}
// We can assign the C++ sorts to lower warmup values as the JIT does not
// need to warm up the C++ code; only the small Java code that calls it.
@Fork(value = 5, warmups = 1)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public LongArray cppSort128(BenchState state) {
var array = state.usArray; // realistically doesn't matter
array.quickSortNative128(0, array.size());
return array;
}
@Fork(value = 5, warmups = 1)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public LongArray cppSort64(BenchState state) {
var array = state.usArray; // realistically doesn't matter
array.quickSortNative(0, array.size());
return array;
}
}

View File

@ -11,129 +11,57 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
class LongArraySearchTest { class LongArraySearchTest {
LongArray basicArray = LongArray.allocate(1024);
LongArray segmentArray = LongArrayFactory.onHeapConfined(1024); LongArray segmentArray = LongArrayFactory.onHeapConfined(1024);
LongArray shiftedArray = LongArray.allocate(1054).range(30, 1054); LongArray shiftedArray = LongArray.allocate(1054).range(30, 1054);
@BeforeEach @BeforeEach
public void setUp() { public void setUp() {
for (int i = 0; i < basicArray.size(); i++) { for (int i = 0; i < shiftedArray.size(); i++) {
basicArray.set(i, 3L*i);
shiftedArray.set(i, 3L*i); shiftedArray.set(i, 3L*i);
segmentArray.set(i, 3L*i); segmentArray.set(i, 3L*i);
} }
} }
@Test
void linearSearch() {
linearSearchTester(basicArray);
linearSearchTester(shiftedArray);
linearSearchTester(segmentArray);
}
@Test @Test
void binarySearch() { void binarySearch() {
binarySearchTester(basicArray);
binarySearchTester(shiftedArray); binarySearchTester(shiftedArray);
binarySearchTester(segmentArray); binarySearchTester(segmentArray);
} }
@Test
void binarySearchUpperBound() {
binarySearchUpperBoundTester(basicArray);
binarySearchUpperBoundTester(shiftedArray);
binarySearchUpperBoundTester(segmentArray);
}
@Test
void binarySearchUpperBoundNative() {
binarySearchUpperBoundNativeTester(basicArray);
binarySearchUpperBoundNativeTester(shiftedArray);
binarySearchUpperBoundNativeTester(segmentArray);
}
@Test @Test
public void testEmptyRange() { public void testEmptyRange() {
assertTrue(segmentArray.binarySearchN(2, 0, 0, 0) < 0); assertTrue(segmentArray.binarySearchN(2, 0, 0, 0) <= 0);
assertTrue(segmentArray.linearSearchN(2, 0, 0, 0) < 0); assertTrue(segmentArray.binarySearch(0, 0, 0) <= 0);
assertTrue(segmentArray.binarySearch(0, 0, 0) < 0);
assertTrue(segmentArray.linearSearch(0, 0, 0) < 0);
} }
void linearSearchTester(LongArray array) {
for (int i = 0; i < array.size() * 3; i++) {
long ret = array.linearSearch(i, 0, array.size());
if ((i % 3) == 0) {
assertTrue(ret >= 0);
assertEquals(i, array.get(ret));
}
else {
long higher = LongArraySearch.decodeSearchMiss(1, ret);
if (i > 0 && higher < array.size()) {
assertTrue(array.get(higher) < i);
}
}
}
}
void binarySearchTester(LongArray array) { void binarySearchTester(LongArray array) {
for (int i = 0; i < array.size() * 3; i++) { for (int i = 0; i < array.size() * 3; i++) {
long ret = array.binarySearch(i, 0, array.size()); long ret = array.binarySearch(i, 0, array.size());
if ((i % 3) == 0) { assertTrue(ret >= 0);
assertTrue(ret >= 0);
assertEquals(i, array.get(ret)); // Invariant check
if (i > 0 && ret > 0 && ret + 1 < array.size()) {
assertTrue(array.get(ret - 1) < i);
assertTrue(array.get(ret) >= i);
assertTrue(array.get(ret + 1) > i);
} }
else {
long higher = LongArraySearch.decodeSearchMiss(1, ret); if ((i % 3) == 0) {
if (i > 0 && higher+1 < array.size()) { assertEquals(i, array.get(ret));
assertTrue(array.get(higher) < i);
}
} }
} }
} }
void binarySearchUpperBoundTester(LongArray array) {
for (int i = 0; i < array.size() * 3; i++) {
long ret = array.binarySearchUpperBound(i, 0, array.size());
if ((i % 3) == 0) {
assertTrue(ret >= 0);
assertEquals(i, array.get(ret));
}
else {
if (i > 0 && ret > 0 && ret < array.size()) {
assertTrue(array.get(ret-1) < i);
}
}
}
}
void binarySearchUpperBoundNativeTester(LongArray array) {
for (int i = 0; i < array.size() * 3; i++) {
long ret = array.binarySearchNativeUB(i, 0, array.size());
if ((i % 3) == 0) {
assertTrue(ret >= 0);
assertEquals(i, array.get(ret));
}
else {
if (i > 0 && ret > 0 && ret < array.size()) {
assertTrue(array.get(ret-1) < i);
}
}
}
}
@Test @Test
void retain() { void retain() {
long[] vals = new long[128]; long[] vals = new long[128];
for (int i = 0; i < vals.length; i++) { vals[i] = i; } for (int i = 0; i < vals.length; i++) { vals[i] = i; }
var buffer = new LongQueryBuffer(vals, 128); var buffer = new LongQueryBuffer(vals, 128);
basicArray.retain(buffer, 128, 0, basicArray.size()); segmentArray.retain(buffer, 128, 0, segmentArray.size());
buffer.finalizeFiltering(); buffer.finalizeFiltering();
assertEquals(43, buffer.size()); assertEquals(43, buffer.size());
@ -148,7 +76,7 @@ class LongArraySearchTest {
for (int i = 0; i < vals.length; i++) { vals[i] = i; } for (int i = 0; i < vals.length; i++) { vals[i] = i; }
var buffer = new LongQueryBuffer(vals, 128); var buffer = new LongQueryBuffer(vals, 128);
basicArray.reject(buffer, 128, 0, basicArray.size()); segmentArray.reject(buffer, 128, 0, segmentArray.size());
buffer.finalizeFiltering(); buffer.finalizeFiltering();
assertEquals(128-43, buffer.size()); assertEquals(128-43, buffer.size());

View File

@ -1,7 +1,6 @@
package nu.marginalia.btree; package nu.marginalia.btree;
import nu.marginalia.array.LongArray; import nu.marginalia.array.LongArray;
import nu.marginalia.array.algo.LongArraySearch;
import nu.marginalia.array.buffer.LongQueryBuffer; import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.btree.model.BTreeContext; import nu.marginalia.btree.model.BTreeContext;
import nu.marginalia.btree.model.BTreeHeader; import nu.marginalia.btree.model.BTreeHeader;
@ -151,10 +150,7 @@ public class BTreeReader {
for (int i = 0; i < keys.length; i++) { for (int i = 0; i < keys.length; i++) {
long key = keys[i]; long key = keys[i];
searchStart = data.binarySearchN(ctx.entrySize, key, searchStart, data.size()); searchStart = data.binarySearchN(ctx.entrySize, key, searchStart, data.size());
if (searchStart < 0) { if (data.get(searchStart) == key) {
searchStart = LongArraySearch.decodeSearchMiss(ctx.entrySize, searchStart);
}
else {
ret[i] = data.get(searchStart + offset); ret[i] = data.get(searchStart + offset);
} }
} }
@ -215,7 +211,7 @@ public class BTreeReader {
final long searchStart = layerOffsets[layer] + offset; final long searchStart = layerOffsets[layer] + offset;
final long nextLayerOffset = index.binarySearchUpperBound(key, searchStart, searchStart + ctx.pageSize()) - searchStart; final long nextLayerOffset = index.binarySearch(key, searchStart, searchStart + ctx.pageSize()) - searchStart;
layer --; layer --;
boundary = index.get(searchStart + nextLayerOffset); boundary = index.get(searchStart + nextLayerOffset);
@ -257,7 +253,13 @@ public class BTreeReader {
long searchEnd = searchStart + min(remainingTotal, remainingBlock); long searchEnd = searchStart + min(remainingTotal, remainingBlock);
return data.binarySearchN(ctx.entrySize, key, searchStart, searchEnd); long ret = data.binarySearchN(ctx.entrySize, key, searchStart, searchEnd);
if (data.get(ret) == key) {
return ret;
}
else {
return -1 - ret;
}
} }
public void retainData(LongQueryBuffer buffer) { public void retainData(LongQueryBuffer buffer) {