From 9e766bc056f1f6642156a883e17072fa3695d61a Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Fri, 17 May 2024 14:30:06 +0200 Subject: [PATCH] (array) Clean up search function jungle Retire search functions that weren't used, including the native implementations. Drop confusing suffixes on search function names. Search functions no longer encode search misses as negative values. Replaced binary search function with a branchless version that is much faster. Cleaned up benchmark code. --- .../cpp/java/nu/marginalia/NativeAlgos.java | 55 ------- .../array/cpp/src/main/cpp/cpphelpers.cpp | 85 +---------- .../array/cpp/src/main/public/cpphelpers.hpp | 6 - .../marginalia/array/algo/IntArraySearch.java | 7 +- .../marginalia/array/algo/LongArrayBase.java | 4 - .../array/algo/LongArraySearch.java | 139 +++--------------- .../array/algo/SortAlgoQuickSort.java | 4 - .../array/delegate/ShiftedLongArray.java | 84 +---------- .../array/page/AbstractPagingArray.java | 113 -------------- .../array/page/SegmentLongArray.java | 21 --- .../array/page/UnsafeLongArray.java | 20 --- .../array/page/QuicksortBenchmark.java | 48 ------ .../array/page/SearchBenchmark.java | 110 ++++++++++++++ .../marginalia/array/page/SortBenchmark.java | 102 +++++++++++++ .../array/algo/LongArraySearchTest.java | 102 ++----------- .../java/nu/marginalia/btree/BTreeReader.java | 16 +- 16 files changed, 263 insertions(+), 653 deletions(-) delete mode 100644 code/libraries/array/java/nu/marginalia/array/page/AbstractPagingArray.java delete mode 100644 code/libraries/array/src/jmh/java/nu/marginalia/array/page/QuicksortBenchmark.java create mode 100644 code/libraries/array/src/jmh/java/nu/marginalia/array/page/SearchBenchmark.java create mode 100644 code/libraries/array/src/jmh/java/nu/marginalia/array/page/SortBenchmark.java diff --git a/code/libraries/array/cpp/java/nu/marginalia/NativeAlgos.java b/code/libraries/array/cpp/java/nu/marginalia/NativeAlgos.java index ce203101..8eb9f643 100644 --- a/code/libraries/array/cpp/java/nu/marginalia/NativeAlgos.java +++ b/code/libraries/array/cpp/java/nu/marginalia/NativeAlgos.java @@ -28,10 +28,6 @@ import static java.lang.foreign.ValueLayout.JAVA_LONG; public class NativeAlgos { private final MethodHandle qsortHandle; private final MethodHandle qsort128Handle; - private final MethodHandle linearSearch64Handle; - private final MethodHandle linearSearch128Handle; - private final MethodHandle binarySearch128Handle; - private final MethodHandle binarySearch64UpperHandle; public static final NativeAlgos instance; @@ -51,22 +47,6 @@ public class NativeAlgos { handle = libraryLookup.find("ms_sort_128").get(); qsort128Handle = nativeLinker.downcallHandle(handle, FunctionDescriptor.ofVoid(ADDRESS, JAVA_LONG, JAVA_LONG)); - - handle = libraryLookup.find("ms_linear_search_64").get(); - linearSearch64Handle = nativeLinker.downcallHandle(handle, - FunctionDescriptor.of(JAVA_LONG, JAVA_LONG, ADDRESS, JAVA_LONG, JAVA_LONG)); - - handle = libraryLookup.find("ms_linear_search_128").get(); - linearSearch128Handle = nativeLinker.downcallHandle(handle, - FunctionDescriptor.of(JAVA_LONG, JAVA_LONG, ADDRESS, JAVA_LONG, JAVA_LONG)); - - handle = libraryLookup.find("ms_binary_search_128").get(); - binarySearch128Handle = nativeLinker.downcallHandle(handle, - FunctionDescriptor.of(JAVA_LONG, JAVA_LONG, ADDRESS, JAVA_LONG, JAVA_LONG)); - - handle = libraryLookup.find("ms_binary_search_64upper").get(); - binarySearch64UpperHandle = nativeLinker.downcallHandle(handle, - FunctionDescriptor.of(JAVA_LONG, JAVA_LONG, ADDRESS, JAVA_LONG, JAVA_LONG)); } static { @@ -114,39 +94,4 @@ public class NativeAlgos { } } - public static long linearSearch64(long key, MemorySegment ms, long start, long end) { - try { - return (long) instance.linearSearch64Handle.invoke(key, ms, start, end); - } - catch (Throwable t) { - throw new RuntimeException("Failed to invoke native function", t); - } - } - - public static long linearSearch128(long key, MemorySegment ms, long start, long end) { - try { - return (long) instance.linearSearch128Handle.invoke(key, ms, start, end); - } - catch (Throwable t) { - throw new RuntimeException("Failed to invoke native function", t); - } - } - - public static long binarySearch128(long key, MemorySegment ms, long start, long end) { - try { - return (long) instance.binarySearch128Handle.invoke(key, ms, start, end); - } - catch (Throwable t) { - throw new RuntimeException("Failed to invoke native function", t); - } - } - - public static long binarySearch64Upper(long key, MemorySegment ms, long start, long end) { - try { - return (long) instance.binarySearch64UpperHandle.invoke(key, ms, start, end); - } - catch (Throwable t) { - throw new RuntimeException("Failed to invoke native function", t); - } - } } diff --git a/code/libraries/array/cpp/src/main/cpp/cpphelpers.cpp b/code/libraries/array/cpp/src/main/cpp/cpphelpers.cpp index f49f7d7b..b5a26608 100644 --- a/code/libraries/array/cpp/src/main/cpp/cpphelpers.cpp +++ b/code/libraries/array/cpp/src/main/cpp/cpphelpers.cpp @@ -28,87 +28,4 @@ void ms_sort_128(int64_t* area, uint64_t start, uint64_t end) { [](const p64x2& fst, const p64x2& snd) { return fst.a < snd.a; }); -} - -inline int64_t encodeSearchMiss64(int64_t value) { - return -1 - std::max(int64_t(0), value); -} -inline int64_t encodeSearchMiss128(int64_t value) { - return -2 - std::max(int64_t(0), value); -} - -int64_t ms_linear_search_64(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex) { - uint64_t pos = fromIndex; - for (; pos < toIndex; pos++) { - int64_t val = area[pos]; - - if (val == key) return pos; - if (val > key) break; - } - - return encodeSearchMiss64(pos - 1); -} - -int64_t ms_linear_search_128(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex) { - uint64_t pos = fromIndex; - - for (; pos < toIndex; pos+=2) { - int64_t val = area[pos]; - - if (val == key) return pos; - if (val > key) break; - } - - return encodeSearchMiss128(pos - 2); -} - -int64_t ms_binary_search_128(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex) { - int64_t low = 0; - int64_t high = (toIndex - fromIndex) / 2 - 1; - - while (high - low >= 32) { - int64_t mid = low + (high - low) / 2; - int64_t midVal = area[fromIndex + mid * 2]; - - if (midVal < key) { - low = mid + 1; - } else if (midVal > key) { - high = mid - 1; - } else { - return fromIndex + mid * 2; - } - } - - for (fromIndex += low * 2; fromIndex < toIndex; fromIndex+=2) { - int64_t val = area[fromIndex]; - - if (val == key) return fromIndex; - if (val > key) return encodeSearchMiss128(fromIndex); - } - - return encodeSearchMiss128(toIndex - 2); -} - -int64_t ms_binary_search_64upper(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex) { - int64_t low = 0; - int64_t high = toIndex - fromIndex - 1; - - while (high - low > 32) { - int64_t mid = low + (high - low) / 2; - int64_t midVal = area[fromIndex + mid]; - - if (midVal < key) { - low = mid + 1; - } else if (midVal > key) { - high = mid - 1; - } else { - return fromIndex + mid; - } - } - - for (fromIndex += low; fromIndex < toIndex; fromIndex++) { - if (area[fromIndex] >= key) return fromIndex; - } - - return toIndex; -} +} \ No newline at end of file diff --git a/code/libraries/array/cpp/src/main/public/cpphelpers.hpp b/code/libraries/array/cpp/src/main/public/cpphelpers.hpp index 2fa03fd9..3e983ab8 100644 --- a/code/libraries/array/cpp/src/main/public/cpphelpers.hpp +++ b/code/libraries/array/cpp/src/main/public/cpphelpers.hpp @@ -5,10 +5,4 @@ extern "C" { void ms_sort_64(int64_t* area, uint64_t start, uint64_t end); void ms_sort_128(int64_t* area, uint64_t start, uint64_t end); - - int64_t ms_linear_search_64(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex); - int64_t ms_linear_search_128(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex); - - int64_t ms_binary_search_128(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex); - int64_t ms_binary_search_64upper(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex); } diff --git a/code/libraries/array/java/nu/marginalia/array/algo/IntArraySearch.java b/code/libraries/array/java/nu/marginalia/array/algo/IntArraySearch.java index c56767c9..6a148846 100644 --- a/code/libraries/array/java/nu/marginalia/array/algo/IntArraySearch.java +++ b/code/libraries/array/java/nu/marginalia/array/algo/IntArraySearch.java @@ -16,7 +16,7 @@ public interface IntArraySearch extends IntArrayBase { if (val > key) break; } - return LongArraySearch.encodeSearchMiss(1, pos - 1); + return encodeSearchMiss(1, pos - 1); } default long binarySearch(int key, long fromIndex, long toIndex) { @@ -119,4 +119,9 @@ public interface IntArraySearch extends IntArrayBase { } } + + static long encodeSearchMiss(int entrySize, long value) { + return -entrySize - Math.max(0, value); + } + } diff --git a/code/libraries/array/java/nu/marginalia/array/algo/LongArrayBase.java b/code/libraries/array/java/nu/marginalia/array/algo/LongArrayBase.java index d70b7761..728f4f41 100644 --- a/code/libraries/array/java/nu/marginalia/array/algo/LongArrayBase.java +++ b/code/libraries/array/java/nu/marginalia/array/algo/LongArrayBase.java @@ -29,10 +29,6 @@ public interface LongArrayBase extends BulkTransferArray { void quickSortNative(long start, long end); void quickSortNative128(long start, long end); - long linearSearchNative(long key, long start, long end); - long linearSearchNative128(long key, long start, long end); - long binarySearchNativeUB(long key, long start, long end); - long binarySearchNative128(long key, long start, long end); default void increment(long pos) { set(pos, get(pos) + 1); } diff --git a/code/libraries/array/java/nu/marginalia/array/algo/LongArraySearch.java b/code/libraries/array/java/nu/marginalia/array/algo/LongArraySearch.java index 367062a6..a4ac54e5 100644 --- a/code/libraries/array/java/nu/marginalia/array/algo/LongArraySearch.java +++ b/code/libraries/array/java/nu/marginalia/array/algo/LongArraySearch.java @@ -1,140 +1,41 @@ package nu.marginalia.array.algo; -import nu.marginalia.NativeAlgos; import nu.marginalia.array.buffer.LongQueryBuffer; public interface LongArraySearch extends LongArrayBase { - int LINEAR_SEARCH_CUTOFF = 32; - - default long linearSearch(long key, long fromIndex, long toIndex) { - if (NativeAlgos.isAvailable) { - return linearSearchNative(key, fromIndex, toIndex); - } else { - return linearSearchJava(key, fromIndex, toIndex); - } - } - - default long linearSearchN(int sz, long key, long fromIndex, long toIndex) { - if (NativeAlgos.isAvailable && sz == 2) { - return linearSearchNative128(key, fromIndex, toIndex); - } else { - return linearSearchNJava(sz, key, fromIndex, toIndex); - } - } - - default long binarySearchUpperBound(long key, long fromIndex, long toIndex) { - if (NativeAlgos.isAvailable) { - return binarySearchNativeUB(key, fromIndex, toIndex); - } else { - return binarySearchUpperBoundJava(key, fromIndex, toIndex); - } - } - - default long binarySearchN(int sz, long key, long fromIndex, long toIndex) { - if (NativeAlgos.isAvailable && sz == 2) { - return binarySearchNative128(key, fromIndex, toIndex); - } else { - return binarySearchNJava(sz, key, fromIndex, toIndex); - } - } - - default long linearSearchJava(long key, long fromIndex, long toIndex) { - long pos; - - for (pos = fromIndex; pos < toIndex; pos++) { - long val = get(pos); - - if (val == key) return pos; - if (val > key) break; - } - - return encodeSearchMiss(1, pos - 1); - } - - default long linearSearchNJava(int sz, long key, long fromIndex, long toIndex) { - long pos; - - for (pos = fromIndex; pos < toIndex; pos+=sz) { - long val = get(pos); - - if (val == key) return pos; - if (val > key) return encodeSearchMiss(sz, pos); - } - - return encodeSearchMiss(sz, toIndex - sz); - } - default long binarySearch(long key, long fromIndex, long toIndex) { long low = 0; long high = (toIndex - fromIndex) - 1; + long len = high - low; - while (high - low >= LINEAR_SEARCH_CUTOFF) { - long mid = (low + high) >>> 1; - long midVal = get(fromIndex + mid); - - if (midVal < key) - low = mid + 1; - else if (midVal > key) - high = mid - 1; - else - return fromIndex + mid; + while (len > 0) { + var half = len / 2; + if (get(fromIndex + low + half) < key) { + low += len - half; + } + len = half; } - return linearSearch(key, fromIndex + low, fromIndex + high + 1); + return fromIndex + low; } - default long binarySearchNJava(int sz, long key, long fromIndex, long toIndex) { + default long binarySearchN(int sz, long key, long fromIndex, long toIndex) { long low = 0; long high = (toIndex - fromIndex)/sz - 1; + long len = high - low; - while (high - low >= LINEAR_SEARCH_CUTOFF) { - long mid = (low + high) >>> 1; - long midVal = get(fromIndex + sz*mid); - - if (midVal < key) - low = mid + 1; - else if (midVal > key) - high = mid - 1; - else - return fromIndex + sz*mid; + while (len > 0) { + var half = len / 2; + if (get(fromIndex + sz * (low + half)) < key) { + low += len - half; + } + len = half; } - for (fromIndex += low*sz; fromIndex < toIndex; fromIndex+=sz) { - long val = get(fromIndex); - - if (val == key) return fromIndex; - if (val > key) return encodeSearchMiss(sz, fromIndex); - } - - return encodeSearchMiss(sz, toIndex - sz); + return fromIndex + sz * low; } - - default long binarySearchUpperBoundJava(long key, long fromIndex, long toIndex) { - long low = 0; - long high = (toIndex - fromIndex) - 1; - - while (high - low >= LINEAR_SEARCH_CUTOFF) { - long mid = (low + high) >>> 1; - long midVal = get(fromIndex + mid); - - if (midVal < key) - low = mid + 1; - else if (midVal > key) - high = mid - 1; - else - return fromIndex + mid; - } - - for (fromIndex += low; fromIndex < toIndex; fromIndex++) { - if (get(fromIndex) >= key) return fromIndex; - } - - return toIndex; - } - - default void retain(LongQueryBuffer buffer, long boundary, long searchStart, long searchEnd) { if (searchStart >= searchEnd) return; @@ -255,11 +156,5 @@ public interface LongArraySearch extends LongArrayBase { } - static long encodeSearchMiss(int entrySize, long value) { - return -entrySize - Math.max(0, value); - } - static long decodeSearchMiss(int entrySize, long value) { - return -value - entrySize; - } } diff --git a/code/libraries/array/java/nu/marginalia/array/algo/SortAlgoQuickSort.java b/code/libraries/array/java/nu/marginalia/array/algo/SortAlgoQuickSort.java index 6b06d663..1e70b02a 100644 --- a/code/libraries/array/java/nu/marginalia/array/algo/SortAlgoQuickSort.java +++ b/code/libraries/array/java/nu/marginalia/array/algo/SortAlgoQuickSort.java @@ -107,10 +107,6 @@ class SortAlgoQuickSort { long pivot = array.get(pivotPoint); - assert (pivotPoint - low) >= 0; - assert (pivotPoint - low) % wordSize == 0; - - long i = low - wordSize; long j = high + wordSize; diff --git a/code/libraries/array/java/nu/marginalia/array/delegate/ShiftedLongArray.java b/code/libraries/array/java/nu/marginalia/array/delegate/ShiftedLongArray.java index 627e1755..b302223a 100644 --- a/code/libraries/array/java/nu/marginalia/array/delegate/ShiftedLongArray.java +++ b/code/libraries/array/java/nu/marginalia/array/delegate/ShiftedLongArray.java @@ -3,7 +3,6 @@ package nu.marginalia.array.delegate; import nu.marginalia.array.ArrayRangeReference; import nu.marginalia.array.LongArray; import nu.marginalia.array.algo.LongArraySearch; -import nu.marginalia.array.algo.SortingContext; import nu.marginalia.array.buffer.LongQueryBuffer; import nu.marginalia.array.functional.*; @@ -84,26 +83,6 @@ public class ShiftedLongArray implements LongArray { delegate.quickSortNative128(start, end); } - @Override - public long linearSearchNative(long key, long start, long end) { - return delegate.linearSearchNative(key, start + shift, end + shift); - } - - @Override - public long linearSearchNative128(long key, long start, long end) { - return delegate.linearSearchNative128(key, start, end); - } - - @Override - public long binarySearchNativeUB(long key, long start, long end) { - return delegate.binarySearchNativeUB(key, start + shift, end + shift); - } - - @Override - public long binarySearchNative128(long key, long start, long end) { - return delegate.binarySearchNative128(key, start, end); - } - @Override public long size() { return size; @@ -162,81 +141,24 @@ public class ShiftedLongArray implements LongArray { } - public long searchN(int sz, long key) { - if (size < 128) { - return linearSearchN(sz, key); - } - else { - return binarySearchN(sz, key); - } - } - public long search(long key) { - if (size < 128) { - return linearSearch(key); - } - else { - return binarySearch(key); - } - } - - public long linearSearch(long key) { - return linearSearch(key, 0, size); - } - - public long binarySearch(long key) { return binarySearch(key, 0, size); } - public long binarySearchN(int sz, long key) { - return binarySearchN(sz, key, 0, size); - } - - public long linearSearchN(int sz, long key) { - return linearSearchN(sz, key, 0, size); - } - public void retain(LongQueryBuffer buffer, long boundary) { retain(buffer, boundary, 0, size); } - public void retainN(LongQueryBuffer buffer, int sz, long boundary) { - if (sz == 1) - retain(buffer, boundary, 0, size); - else - retainN(buffer, sz, boundary, 0, size); - } - public void reject(LongQueryBuffer buffer, long boundary) { reject(buffer, boundary, 0, size); } - public void rejectN(LongQueryBuffer buffer, int sz, long boundary) { - if (sz == 1) - reject(buffer, boundary, 0, size); - else - rejectN(buffer, sz, boundary, 0, size); - - } - @Override - public long linearSearch(long key, long fromIndex, long toIndex) { - return translateSearchResult(1, delegate.linearSearch(key, fromIndex + shift, toIndex+shift)); - } - @Override - public long linearSearchN(int sz, long key, long fromIndex, long toIndex) { - return translateSearchResult(sz, delegate.linearSearch(key, fromIndex + shift, toIndex+shift)); + public long binarySearchN(int sz, long key, long fromIndex, long toIndex) { + return delegate.binarySearchN(sz, key, fromIndex + shift, toIndex+shift) - shift; } @Override public long binarySearch(long key, long fromIndex, long toIndex) { - return translateSearchResult(1, delegate.binarySearch(key, fromIndex + shift, toIndex+shift)); - } - @Override - public long binarySearchN(int sz, long key, long fromIndex, long toIndex) { - return translateSearchResult(sz, delegate.binarySearchN(sz, key, fromIndex + shift, toIndex+shift)); - } - @Override - public long binarySearchUpperBound(long key, long fromIndex, long toIndex) { - return translateSearchResult(1, delegate.binarySearchUpperBound(key, fromIndex + shift, toIndex+shift)); + return delegate.binarySearch(key, fromIndex + shift, toIndex+shift) - shift; } private long translateSearchResult(int sz, long delegatedIdx) { diff --git a/code/libraries/array/java/nu/marginalia/array/page/AbstractPagingArray.java b/code/libraries/array/java/nu/marginalia/array/page/AbstractPagingArray.java deleted file mode 100644 index 03d753df..00000000 --- a/code/libraries/array/java/nu/marginalia/array/page/AbstractPagingArray.java +++ /dev/null @@ -1,113 +0,0 @@ -package nu.marginalia.array.page; - -import nu.marginalia.array.algo.BulkTransferArray; -import nu.marginalia.array.functional.AddressRangeCall; -import nu.marginalia.array.functional.AddressRangeCallIO; -import nu.marginalia.array.scheme.ArrayPartitioningScheme; - -import java.io.IOException; - -import static nu.marginalia.array.algo.LongArraySearch.decodeSearchMiss; -import static nu.marginalia.array.algo.LongArraySearch.encodeSearchMiss; - -public class AbstractPagingArray, B> { - final T[] pages; - final long size; - final ArrayPartitioningScheme partitioningScheme; - - public AbstractPagingArray(ArrayPartitioningScheme partitioningScheme, T[] pages, long size) { - this.partitioningScheme = partitioningScheme; - this.pages = pages; - this.size = size; - } - - void delegateToEachPage(long start, long end, AddressRangeCall fn) { - assert end >= start; - - int page = partitioningScheme.getPage(start); - - long endPos; - - for (long pos = start; pos < end; pos = endPos) { - endPos = partitioningScheme.getPageEnd(pos, end); - - int sOff = partitioningScheme.getOffset(pos); - int eOff = partitioningScheme.getEndOffset(start, endPos); - - fn.apply(pages[page++], sOff, eOff); - } - } - - void delegateToEachPageIO(long start, long end, AddressRangeCallIO fn) throws IOException { - assert end >= start; - - int page = partitioningScheme.getPage(start); - - long endPos; - - for (long pos = start; pos < end; pos = endPos) { - endPos = partitioningScheme.getPageEnd(pos, end); - - int sOff = partitioningScheme.getOffset(pos); - int eOff = partitioningScheme.getEndOffset(start, endPos); - - fn.apply(pages[page++], sOff, eOff); - } - } - - long translateSearchResultsFromPage(long fromIndex, long ret) { - int page = partitioningScheme.getPage(fromIndex); - - if (ret >= 0) { - return partitioningScheme.toRealIndex(page, (int) ret); - } else { - ret = decodeSearchMiss(1, ret); - ret = partitioningScheme.toRealIndex(page, (int) ret); - return encodeSearchMiss(1, ret); - } - } - - public void set(long start, long end, B buffer, int bufferStart) { - assert end >= start; - - int page = partitioningScheme.getPage(start); - - long endPos; - - for (long pos = start; pos < end; pos = endPos) { - endPos = partitioningScheme.getPageEnd(pos, end); - - int sOff = partitioningScheme.getOffset(pos); - int eOff = partitioningScheme.getEndOffset(start, endPos); - - pages[page++].set(sOff, eOff, buffer, bufferStart); - - bufferStart += eOff - sOff; - } - } - - public void get(long start, long end, B buffer, int bufferStart) { - assert end >= start; - - int page = partitioningScheme.getPage(start); - - long endPos; - - for (long pos = start; pos < end; pos = endPos) { - endPos = partitioningScheme.getPageEnd(pos, end); - - int sOff = partitioningScheme.getOffset(pos); - int eOff = partitioningScheme.getEndOffset(start, endPos); - - pages[page++].get(sOff, eOff, buffer, bufferStart); - - bufferStart += eOff - sOff; - } - } - - public void close() { - for (var page : pages) { - page.close(); - } - } -} diff --git a/code/libraries/array/java/nu/marginalia/array/page/SegmentLongArray.java b/code/libraries/array/java/nu/marginalia/array/page/SegmentLongArray.java index 9a1cc2f9..034752ab 100644 --- a/code/libraries/array/java/nu/marginalia/array/page/SegmentLongArray.java +++ b/code/libraries/array/java/nu/marginalia/array/page/SegmentLongArray.java @@ -135,27 +135,6 @@ public class SegmentLongArray implements PartitionPage, LongArray { NativeAlgos.sort128(segment, start, end); } - @Override - public long linearSearchNative(long key, long start, long end) { - return NativeAlgos.linearSearch64(key, segment, start, end); - } - - @Override - public long linearSearchNative128(long key, long start, long end) { - return NativeAlgos.linearSearch128(key, segment, start, end); - } - - @Override - public long binarySearchNativeUB(long key, long start, long end) { - return NativeAlgos.binarySearch64Upper(key, segment, start, end); - } - - @Override - public long binarySearchNative128(long key, long start, long end) { - return NativeAlgos.binarySearch128(key, segment, start, end); - } - - @Override public ByteBuffer getByteBuffer() { return segment.asByteBuffer(); diff --git a/code/libraries/array/java/nu/marginalia/array/page/UnsafeLongArray.java b/code/libraries/array/java/nu/marginalia/array/page/UnsafeLongArray.java index e38e1b28..7d86e56e 100644 --- a/code/libraries/array/java/nu/marginalia/array/page/UnsafeLongArray.java +++ b/code/libraries/array/java/nu/marginalia/array/page/UnsafeLongArray.java @@ -284,24 +284,4 @@ public class UnsafeLongArray implements PartitionPage, LongArray { NativeAlgos.sort128(segment, start, end); } - @Override - public long linearSearchNative(long key, long start, long end) { - return NativeAlgos.linearSearch64(key, segment, start, end); - } - - @Override - public long linearSearchNative128(long key, long start, long end) { - return NativeAlgos.linearSearch128(key, segment, start, end); - } - - @Override - public long binarySearchNativeUB(long key, long start, long end) { - return NativeAlgos.binarySearch64Upper(key, segment, start, end); - } - - @Override - public long binarySearchNative128(long key, long start, long end) { - return NativeAlgos.binarySearch128(key, segment, start, end); - } - } diff --git a/code/libraries/array/src/jmh/java/nu/marginalia/array/page/QuicksortBenchmark.java b/code/libraries/array/src/jmh/java/nu/marginalia/array/page/QuicksortBenchmark.java deleted file mode 100644 index 866c70c6..00000000 --- a/code/libraries/array/src/jmh/java/nu/marginalia/array/page/QuicksortBenchmark.java +++ /dev/null @@ -1,48 +0,0 @@ -package nu.marginalia.array.page; - -import nu.marginalia.array.LongArray; -import nu.marginalia.array.LongArrayFactory; -import org.openjdk.jmh.annotations.*; - -/** This benchmark simulates the sorting in index creation */ -public class QuicksortBenchmark { - - @State(Scope.Benchmark) - public static class BenchState { - - @Setup(Level.Invocation) - public void doSetup() { - array.transformEach(0, size, (pos,old) -> ~pos); - } - - int size = 1024*1024; - int pageSize = 10*1024; - LongArray array = LongArrayFactory.onHeapShared(size); - } - - @Fork(value = 5, warmups = 1) - @Warmup(iterations = 5) - @Benchmark - @BenchmarkMode(Mode.Throughput) - public LongArray javaSort(BenchState state) { - var array = state.array; - - array.quickSortJava(0, array.size()); - - return array; - } - - @Fork(value = 5, warmups = 1) - @Warmup(iterations = 5) - @Benchmark - @BenchmarkMode(Mode.Throughput) - public LongArray cppSort(BenchState state) { - - var array = state.array; - - array.quickSortNative(0, array.size()); - - return array; - } - -} diff --git a/code/libraries/array/src/jmh/java/nu/marginalia/array/page/SearchBenchmark.java b/code/libraries/array/src/jmh/java/nu/marginalia/array/page/SearchBenchmark.java new file mode 100644 index 00000000..898a2f33 --- /dev/null +++ b/code/libraries/array/src/jmh/java/nu/marginalia/array/page/SearchBenchmark.java @@ -0,0 +1,110 @@ +package nu.marginalia.array.page; + +import nu.marginalia.array.LongArray; +import org.openjdk.jmh.annotations.*; + +import java.lang.foreign.Arena; +import java.util.Random; + +/** This benchmark simulates the searching in index querying */ +public class SearchBenchmark { + + @State(Scope.Benchmark) + public static class SortState { + + public SortState() + { + msArray.transformEach(0, size, (pos,old) -> ~pos); + usArray.transformEach(0, size, (pos,old) -> ~pos); + msArray.quickSortJava(0, size); + usArray.quickSortJava(0, size); + keys = new long[1000]; + Random r = new Random(); + for (int i = 0; i < 1000; i++) { + keys[i] = msArray.get(r.nextInt(0, size)); + } + } + + int size = 1024*1024; + + long[] keys; + LongArray msArray = SegmentLongArray.onHeap(Arena.ofConfined(), size); + LongArray usArray = UnsafeLongArray.onHeap(Arena.ofConfined(), size); + } + + @Fork(value = 1, warmups = 1) + @Warmup(iterations = 5) + @Benchmark + @BenchmarkMode(Mode.Throughput) + public long msSort64(SortState state) { + var array = state.usArray; + + long ret = 0; + for (var key : state.keys) { + ret += array.binarySearchNJava(2, key, 0, array.size()); + } + + return ret; + } + + @Fork(value = 3, warmups = 5) + @Warmup(iterations = 5) + @Benchmark + @BenchmarkMode(Mode.Throughput) + public long msSort64_2(SortState state) { + var array = state.usArray; + + long ret = 0; + for (var key : state.keys) { + ret += array.binarySearchNJava2(2, key, 0, array.size()); + } + + return ret; + } + + @Fork(value = 5, warmups = 5) + @Warmup(iterations = 1) + @Benchmark + @BenchmarkMode(Mode.Throughput) + public long msSort128(SortState state) { + var array = state.msArray; + + long ret = 0; + for (var key : state.keys) { + ret += array.binarySearchNJava(2, 0, array.size(), key); + } + + return ret; + } + + @Fork(value = 5, warmups = 5) + @Warmup(iterations = 1) + @Benchmark + @BenchmarkMode(Mode.Throughput) + public long usSort64(SortState state) { + var array = state.usArray; + + long ret = 0; + for (var key : state.keys) { + ret += array.binarySearchUpperBoundJava(0, array.size(), key); + } + + return ret; + } + + @Fork(value = 5, warmups = 5) + @Warmup(iterations = 1) + @Benchmark + @BenchmarkMode(Mode.Throughput) + public long usSort128(SortState state) { + var array = state.usArray; + + long ret = 0; + for (var key : state.keys) { + ret += array.binarySearchNJava(2, 0, array.size(), key); + } + + return ret; + } + +} diff --git a/code/libraries/array/src/jmh/java/nu/marginalia/array/page/SortBenchmark.java b/code/libraries/array/src/jmh/java/nu/marginalia/array/page/SortBenchmark.java new file mode 100644 index 00000000..70c8c591 --- /dev/null +++ b/code/libraries/array/src/jmh/java/nu/marginalia/array/page/SortBenchmark.java @@ -0,0 +1,102 @@ +package nu.marginalia.array.page; + +import nu.marginalia.array.LongArray; +import org.openjdk.jmh.annotations.*; + +import java.lang.foreign.Arena; + +/** This benchmark simulates the sorting in index creation */ +public class SortBenchmark { + + @State(Scope.Benchmark) + public static class BenchState { + + @Setup(Level.Invocation) + public void doSetup() { + msArray.transformEach(0, size, (pos,old) -> ~pos); + usArray.transformEach(0, size, (pos,old) -> ~pos); + } + + int size = 1024*1024; + + LongArray msArray = SegmentLongArray.onHeap(Arena.ofConfined(), size); + LongArray usArray = UnsafeLongArray.onHeap(Arena.ofConfined(), size); + } + + @Fork(value = 5, warmups = 5) + @Warmup(iterations = 1) + @Benchmark + @BenchmarkMode(Mode.Throughput) + public LongArray msSort64(BenchState state) { + var array = state.msArray; + + array.quickSortJavaN(2, 0, array.size()); + + return array; + } + + @Fork(value = 5, warmups = 5) + @Warmup(iterations = 1) + @Benchmark + @BenchmarkMode(Mode.Throughput) + public LongArray msSort128(BenchState state) { + var array = state.msArray; + + array.quickSortJavaN(2, 0, array.size()); + + return array; + } + + @Fork(value = 5, warmups = 5) + @Warmup(iterations = 1) + @Benchmark + @BenchmarkMode(Mode.Throughput) + public LongArray usSort128(BenchState state) { + var array = state.usArray; + + array.quickSortJavaN(2, 0, array.size()); + + return array; + } + + @Fork(value = 5, warmups = 5) + @Warmup(iterations = 1) + @Benchmark + @BenchmarkMode(Mode.Throughput) + public LongArray usSort64(BenchState state) { + var array = state.usArray; + + array.quickSortJavaN(2, 0, array.size()); + + return array; + } + + // We can assign the C++ sorts to lower warmup values as the JIT does not + // need to warm up the C++ code; only the small Java code that calls it. + + @Fork(value = 5, warmups = 1) + @Warmup(iterations = 1) + @Benchmark + @BenchmarkMode(Mode.Throughput) + public LongArray cppSort128(BenchState state) { + + var array = state.usArray; // realistically doesn't matter + + array.quickSortNative128(0, array.size()); + + return array; + } + + @Fork(value = 5, warmups = 1) + @Warmup(iterations = 1) + @Benchmark + @BenchmarkMode(Mode.Throughput) + public LongArray cppSort64(BenchState state) { + + var array = state.usArray; // realistically doesn't matter + + array.quickSortNative(0, array.size()); + + return array; + } +} diff --git a/code/libraries/array/test/nu/marginalia/array/algo/LongArraySearchTest.java b/code/libraries/array/test/nu/marginalia/array/algo/LongArraySearchTest.java index f320d4bc..f551fa19 100644 --- a/code/libraries/array/test/nu/marginalia/array/algo/LongArraySearchTest.java +++ b/code/libraries/array/test/nu/marginalia/array/algo/LongArraySearchTest.java @@ -11,129 +11,57 @@ import static org.junit.jupiter.api.Assertions.assertTrue; class LongArraySearchTest { - LongArray basicArray = LongArray.allocate(1024); LongArray segmentArray = LongArrayFactory.onHeapConfined(1024); - LongArray shiftedArray = LongArray.allocate(1054).range(30, 1054); @BeforeEach public void setUp() { - for (int i = 0; i < basicArray.size(); i++) { - basicArray.set(i, 3L*i); + for (int i = 0; i < shiftedArray.size(); i++) { shiftedArray.set(i, 3L*i); segmentArray.set(i, 3L*i); } } - @Test - void linearSearch() { - linearSearchTester(basicArray); - linearSearchTester(shiftedArray); - linearSearchTester(segmentArray); - } @Test void binarySearch() { - binarySearchTester(basicArray); binarySearchTester(shiftedArray); binarySearchTester(segmentArray); } - @Test - void binarySearchUpperBound() { - binarySearchUpperBoundTester(basicArray); - binarySearchUpperBoundTester(shiftedArray); - binarySearchUpperBoundTester(segmentArray); - } - - @Test - void binarySearchUpperBoundNative() { - binarySearchUpperBoundNativeTester(basicArray); - binarySearchUpperBoundNativeTester(shiftedArray); - binarySearchUpperBoundNativeTester(segmentArray); - } - - @Test public void testEmptyRange() { - assertTrue(segmentArray.binarySearchN(2, 0, 0, 0) < 0); - assertTrue(segmentArray.linearSearchN(2, 0, 0, 0) < 0); - assertTrue(segmentArray.binarySearch(0, 0, 0) < 0); - assertTrue(segmentArray.linearSearch(0, 0, 0) < 0); + assertTrue(segmentArray.binarySearchN(2, 0, 0, 0) <= 0); + assertTrue(segmentArray.binarySearch(0, 0, 0) <= 0); } - void linearSearchTester(LongArray array) { - for (int i = 0; i < array.size() * 3; i++) { - long ret = array.linearSearch(i, 0, array.size()); - - if ((i % 3) == 0) { - assertTrue(ret >= 0); - assertEquals(i, array.get(ret)); - } - else { - long higher = LongArraySearch.decodeSearchMiss(1, ret); - if (i > 0 && higher < array.size()) { - assertTrue(array.get(higher) < i); - } - } - } - } void binarySearchTester(LongArray array) { for (int i = 0; i < array.size() * 3; i++) { long ret = array.binarySearch(i, 0, array.size()); - if ((i % 3) == 0) { - assertTrue(ret >= 0); - assertEquals(i, array.get(ret)); + assertTrue(ret >= 0); + + // Invariant check + if (i > 0 && ret > 0 && ret + 1 < array.size()) { + assertTrue(array.get(ret - 1) < i); + assertTrue(array.get(ret) >= i); + assertTrue(array.get(ret + 1) > i); } - else { - long higher = LongArraySearch.decodeSearchMiss(1, ret); - if (i > 0 && higher+1 < array.size()) { - assertTrue(array.get(higher) < i); - } + + if ((i % 3) == 0) { + assertEquals(i, array.get(ret)); } } } - void binarySearchUpperBoundTester(LongArray array) { - for (int i = 0; i < array.size() * 3; i++) { - long ret = array.binarySearchUpperBound(i, 0, array.size()); - - if ((i % 3) == 0) { - assertTrue(ret >= 0); - assertEquals(i, array.get(ret)); - } - else { - if (i > 0 && ret > 0 && ret < array.size()) { - assertTrue(array.get(ret-1) < i); - } - } - } - } - - void binarySearchUpperBoundNativeTester(LongArray array) { - for (int i = 0; i < array.size() * 3; i++) { - long ret = array.binarySearchNativeUB(i, 0, array.size()); - - if ((i % 3) == 0) { - assertTrue(ret >= 0); - assertEquals(i, array.get(ret)); - } - else { - if (i > 0 && ret > 0 && ret < array.size()) { - assertTrue(array.get(ret-1) < i); - } - } - } - } @Test void retain() { long[] vals = new long[128]; for (int i = 0; i < vals.length; i++) { vals[i] = i; } var buffer = new LongQueryBuffer(vals, 128); - basicArray.retain(buffer, 128, 0, basicArray.size()); + segmentArray.retain(buffer, 128, 0, segmentArray.size()); buffer.finalizeFiltering(); assertEquals(43, buffer.size()); @@ -148,7 +76,7 @@ class LongArraySearchTest { for (int i = 0; i < vals.length; i++) { vals[i] = i; } var buffer = new LongQueryBuffer(vals, 128); - basicArray.reject(buffer, 128, 0, basicArray.size()); + segmentArray.reject(buffer, 128, 0, segmentArray.size()); buffer.finalizeFiltering(); assertEquals(128-43, buffer.size()); diff --git a/code/libraries/btree/java/nu/marginalia/btree/BTreeReader.java b/code/libraries/btree/java/nu/marginalia/btree/BTreeReader.java index bc40bb43..3b17bd26 100644 --- a/code/libraries/btree/java/nu/marginalia/btree/BTreeReader.java +++ b/code/libraries/btree/java/nu/marginalia/btree/BTreeReader.java @@ -1,7 +1,6 @@ package nu.marginalia.btree; import nu.marginalia.array.LongArray; -import nu.marginalia.array.algo.LongArraySearch; import nu.marginalia.array.buffer.LongQueryBuffer; import nu.marginalia.btree.model.BTreeContext; import nu.marginalia.btree.model.BTreeHeader; @@ -151,10 +150,7 @@ public class BTreeReader { for (int i = 0; i < keys.length; i++) { long key = keys[i]; searchStart = data.binarySearchN(ctx.entrySize, key, searchStart, data.size()); - if (searchStart < 0) { - searchStart = LongArraySearch.decodeSearchMiss(ctx.entrySize, searchStart); - } - else { + if (data.get(searchStart) == key) { ret[i] = data.get(searchStart + offset); } } @@ -215,7 +211,7 @@ public class BTreeReader { final long searchStart = layerOffsets[layer] + offset; - final long nextLayerOffset = index.binarySearchUpperBound(key, searchStart, searchStart + ctx.pageSize()) - searchStart; + final long nextLayerOffset = index.binarySearch(key, searchStart, searchStart + ctx.pageSize()) - searchStart; layer --; boundary = index.get(searchStart + nextLayerOffset); @@ -257,7 +253,13 @@ public class BTreeReader { long searchEnd = searchStart + min(remainingTotal, remainingBlock); - return data.binarySearchN(ctx.entrySize, key, searchStart, searchEnd); + long ret = data.binarySearchN(ctx.entrySize, key, searchStart, searchEnd); + if (data.get(ret) == key) { + return ret; + } + else { + return -1 - ret; + } } public void retainData(LongQueryBuffer buffer) {