mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(array) Clean up search function jungle
Retire search functions that weren't used, including the native implementations. Drop confusing suffixes on search function names. Search functions no longer encode search misses as negative values. Replaced binary search function with a branchless version that is much faster. Cleaned up benchmark code.
This commit is contained in:
parent
48aff52e00
commit
9e766bc056
@ -28,10 +28,6 @@ import static java.lang.foreign.ValueLayout.JAVA_LONG;
|
||||
public class NativeAlgos {
|
||||
private final MethodHandle qsortHandle;
|
||||
private final MethodHandle qsort128Handle;
|
||||
private final MethodHandle linearSearch64Handle;
|
||||
private final MethodHandle linearSearch128Handle;
|
||||
private final MethodHandle binarySearch128Handle;
|
||||
private final MethodHandle binarySearch64UpperHandle;
|
||||
|
||||
public static final NativeAlgos instance;
|
||||
|
||||
@ -51,22 +47,6 @@ public class NativeAlgos {
|
||||
handle = libraryLookup.find("ms_sort_128").get();
|
||||
qsort128Handle = nativeLinker.downcallHandle(handle,
|
||||
FunctionDescriptor.ofVoid(ADDRESS, JAVA_LONG, JAVA_LONG));
|
||||
|
||||
handle = libraryLookup.find("ms_linear_search_64").get();
|
||||
linearSearch64Handle = nativeLinker.downcallHandle(handle,
|
||||
FunctionDescriptor.of(JAVA_LONG, JAVA_LONG, ADDRESS, JAVA_LONG, JAVA_LONG));
|
||||
|
||||
handle = libraryLookup.find("ms_linear_search_128").get();
|
||||
linearSearch128Handle = nativeLinker.downcallHandle(handle,
|
||||
FunctionDescriptor.of(JAVA_LONG, JAVA_LONG, ADDRESS, JAVA_LONG, JAVA_LONG));
|
||||
|
||||
handle = libraryLookup.find("ms_binary_search_128").get();
|
||||
binarySearch128Handle = nativeLinker.downcallHandle(handle,
|
||||
FunctionDescriptor.of(JAVA_LONG, JAVA_LONG, ADDRESS, JAVA_LONG, JAVA_LONG));
|
||||
|
||||
handle = libraryLookup.find("ms_binary_search_64upper").get();
|
||||
binarySearch64UpperHandle = nativeLinker.downcallHandle(handle,
|
||||
FunctionDescriptor.of(JAVA_LONG, JAVA_LONG, ADDRESS, JAVA_LONG, JAVA_LONG));
|
||||
}
|
||||
|
||||
static {
|
||||
@ -114,39 +94,4 @@ public class NativeAlgos {
|
||||
}
|
||||
}
|
||||
|
||||
public static long linearSearch64(long key, MemorySegment ms, long start, long end) {
|
||||
try {
|
||||
return (long) instance.linearSearch64Handle.invoke(key, ms, start, end);
|
||||
}
|
||||
catch (Throwable t) {
|
||||
throw new RuntimeException("Failed to invoke native function", t);
|
||||
}
|
||||
}
|
||||
|
||||
public static long linearSearch128(long key, MemorySegment ms, long start, long end) {
|
||||
try {
|
||||
return (long) instance.linearSearch128Handle.invoke(key, ms, start, end);
|
||||
}
|
||||
catch (Throwable t) {
|
||||
throw new RuntimeException("Failed to invoke native function", t);
|
||||
}
|
||||
}
|
||||
|
||||
public static long binarySearch128(long key, MemorySegment ms, long start, long end) {
|
||||
try {
|
||||
return (long) instance.binarySearch128Handle.invoke(key, ms, start, end);
|
||||
}
|
||||
catch (Throwable t) {
|
||||
throw new RuntimeException("Failed to invoke native function", t);
|
||||
}
|
||||
}
|
||||
|
||||
public static long binarySearch64Upper(long key, MemorySegment ms, long start, long end) {
|
||||
try {
|
||||
return (long) instance.binarySearch64UpperHandle.invoke(key, ms, start, end);
|
||||
}
|
||||
catch (Throwable t) {
|
||||
throw new RuntimeException("Failed to invoke native function", t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -28,87 +28,4 @@ void ms_sort_128(int64_t* area, uint64_t start, uint64_t end) {
|
||||
[](const p64x2& fst, const p64x2& snd) {
|
||||
return fst.a < snd.a;
|
||||
});
|
||||
}
|
||||
|
||||
inline int64_t encodeSearchMiss64(int64_t value) {
|
||||
return -1 - std::max(int64_t(0), value);
|
||||
}
|
||||
inline int64_t encodeSearchMiss128(int64_t value) {
|
||||
return -2 - std::max(int64_t(0), value);
|
||||
}
|
||||
|
||||
int64_t ms_linear_search_64(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex) {
|
||||
uint64_t pos = fromIndex;
|
||||
for (; pos < toIndex; pos++) {
|
||||
int64_t val = area[pos];
|
||||
|
||||
if (val == key) return pos;
|
||||
if (val > key) break;
|
||||
}
|
||||
|
||||
return encodeSearchMiss64(pos - 1);
|
||||
}
|
||||
|
||||
int64_t ms_linear_search_128(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex) {
|
||||
uint64_t pos = fromIndex;
|
||||
|
||||
for (; pos < toIndex; pos+=2) {
|
||||
int64_t val = area[pos];
|
||||
|
||||
if (val == key) return pos;
|
||||
if (val > key) break;
|
||||
}
|
||||
|
||||
return encodeSearchMiss128(pos - 2);
|
||||
}
|
||||
|
||||
int64_t ms_binary_search_128(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex) {
|
||||
int64_t low = 0;
|
||||
int64_t high = (toIndex - fromIndex) / 2 - 1;
|
||||
|
||||
while (high - low >= 32) {
|
||||
int64_t mid = low + (high - low) / 2;
|
||||
int64_t midVal = area[fromIndex + mid * 2];
|
||||
|
||||
if (midVal < key) {
|
||||
low = mid + 1;
|
||||
} else if (midVal > key) {
|
||||
high = mid - 1;
|
||||
} else {
|
||||
return fromIndex + mid * 2;
|
||||
}
|
||||
}
|
||||
|
||||
for (fromIndex += low * 2; fromIndex < toIndex; fromIndex+=2) {
|
||||
int64_t val = area[fromIndex];
|
||||
|
||||
if (val == key) return fromIndex;
|
||||
if (val > key) return encodeSearchMiss128(fromIndex);
|
||||
}
|
||||
|
||||
return encodeSearchMiss128(toIndex - 2);
|
||||
}
|
||||
|
||||
int64_t ms_binary_search_64upper(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex) {
|
||||
int64_t low = 0;
|
||||
int64_t high = toIndex - fromIndex - 1;
|
||||
|
||||
while (high - low > 32) {
|
||||
int64_t mid = low + (high - low) / 2;
|
||||
int64_t midVal = area[fromIndex + mid];
|
||||
|
||||
if (midVal < key) {
|
||||
low = mid + 1;
|
||||
} else if (midVal > key) {
|
||||
high = mid - 1;
|
||||
} else {
|
||||
return fromIndex + mid;
|
||||
}
|
||||
}
|
||||
|
||||
for (fromIndex += low; fromIndex < toIndex; fromIndex++) {
|
||||
if (area[fromIndex] >= key) return fromIndex;
|
||||
}
|
||||
|
||||
return toIndex;
|
||||
}
|
||||
}
|
@ -5,10 +5,4 @@
|
||||
extern "C" {
|
||||
void ms_sort_64(int64_t* area, uint64_t start, uint64_t end);
|
||||
void ms_sort_128(int64_t* area, uint64_t start, uint64_t end);
|
||||
|
||||
int64_t ms_linear_search_64(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex);
|
||||
int64_t ms_linear_search_128(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex);
|
||||
|
||||
int64_t ms_binary_search_128(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex);
|
||||
int64_t ms_binary_search_64upper(int64_t key, int64_t* area, uint64_t fromIndex, uint64_t toIndex);
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ public interface IntArraySearch extends IntArrayBase {
|
||||
if (val > key) break;
|
||||
}
|
||||
|
||||
return LongArraySearch.encodeSearchMiss(1, pos - 1);
|
||||
return encodeSearchMiss(1, pos - 1);
|
||||
}
|
||||
|
||||
default long binarySearch(int key, long fromIndex, long toIndex) {
|
||||
@ -119,4 +119,9 @@ public interface IntArraySearch extends IntArrayBase {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static long encodeSearchMiss(int entrySize, long value) {
|
||||
return -entrySize - Math.max(0, value);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -29,10 +29,6 @@ public interface LongArrayBase extends BulkTransferArray<LongBuffer> {
|
||||
|
||||
void quickSortNative(long start, long end);
|
||||
void quickSortNative128(long start, long end);
|
||||
long linearSearchNative(long key, long start, long end);
|
||||
long linearSearchNative128(long key, long start, long end);
|
||||
long binarySearchNativeUB(long key, long start, long end);
|
||||
long binarySearchNative128(long key, long start, long end);
|
||||
default void increment(long pos) {
|
||||
set(pos, get(pos) + 1);
|
||||
}
|
||||
|
@ -1,140 +1,41 @@
|
||||
package nu.marginalia.array.algo;
|
||||
|
||||
import nu.marginalia.NativeAlgos;
|
||||
import nu.marginalia.array.buffer.LongQueryBuffer;
|
||||
|
||||
public interface LongArraySearch extends LongArrayBase {
|
||||
|
||||
int LINEAR_SEARCH_CUTOFF = 32;
|
||||
|
||||
default long linearSearch(long key, long fromIndex, long toIndex) {
|
||||
if (NativeAlgos.isAvailable) {
|
||||
return linearSearchNative(key, fromIndex, toIndex);
|
||||
} else {
|
||||
return linearSearchJava(key, fromIndex, toIndex);
|
||||
}
|
||||
}
|
||||
|
||||
default long linearSearchN(int sz, long key, long fromIndex, long toIndex) {
|
||||
if (NativeAlgos.isAvailable && sz == 2) {
|
||||
return linearSearchNative128(key, fromIndex, toIndex);
|
||||
} else {
|
||||
return linearSearchNJava(sz, key, fromIndex, toIndex);
|
||||
}
|
||||
}
|
||||
|
||||
default long binarySearchUpperBound(long key, long fromIndex, long toIndex) {
|
||||
if (NativeAlgos.isAvailable) {
|
||||
return binarySearchNativeUB(key, fromIndex, toIndex);
|
||||
} else {
|
||||
return binarySearchUpperBoundJava(key, fromIndex, toIndex);
|
||||
}
|
||||
}
|
||||
|
||||
default long binarySearchN(int sz, long key, long fromIndex, long toIndex) {
|
||||
if (NativeAlgos.isAvailable && sz == 2) {
|
||||
return binarySearchNative128(key, fromIndex, toIndex);
|
||||
} else {
|
||||
return binarySearchNJava(sz, key, fromIndex, toIndex);
|
||||
}
|
||||
}
|
||||
|
||||
default long linearSearchJava(long key, long fromIndex, long toIndex) {
|
||||
long pos;
|
||||
|
||||
for (pos = fromIndex; pos < toIndex; pos++) {
|
||||
long val = get(pos);
|
||||
|
||||
if (val == key) return pos;
|
||||
if (val > key) break;
|
||||
}
|
||||
|
||||
return encodeSearchMiss(1, pos - 1);
|
||||
}
|
||||
|
||||
default long linearSearchNJava(int sz, long key, long fromIndex, long toIndex) {
|
||||
long pos;
|
||||
|
||||
for (pos = fromIndex; pos < toIndex; pos+=sz) {
|
||||
long val = get(pos);
|
||||
|
||||
if (val == key) return pos;
|
||||
if (val > key) return encodeSearchMiss(sz, pos);
|
||||
}
|
||||
|
||||
return encodeSearchMiss(sz, toIndex - sz);
|
||||
}
|
||||
|
||||
default long binarySearch(long key, long fromIndex, long toIndex) {
|
||||
long low = 0;
|
||||
long high = (toIndex - fromIndex) - 1;
|
||||
long len = high - low;
|
||||
|
||||
while (high - low >= LINEAR_SEARCH_CUTOFF) {
|
||||
long mid = (low + high) >>> 1;
|
||||
long midVal = get(fromIndex + mid);
|
||||
|
||||
if (midVal < key)
|
||||
low = mid + 1;
|
||||
else if (midVal > key)
|
||||
high = mid - 1;
|
||||
else
|
||||
return fromIndex + mid;
|
||||
while (len > 0) {
|
||||
var half = len / 2;
|
||||
if (get(fromIndex + low + half) < key) {
|
||||
low += len - half;
|
||||
}
|
||||
len = half;
|
||||
}
|
||||
|
||||
return linearSearch(key, fromIndex + low, fromIndex + high + 1);
|
||||
return fromIndex + low;
|
||||
}
|
||||
|
||||
default long binarySearchNJava(int sz, long key, long fromIndex, long toIndex) {
|
||||
default long binarySearchN(int sz, long key, long fromIndex, long toIndex) {
|
||||
long low = 0;
|
||||
long high = (toIndex - fromIndex)/sz - 1;
|
||||
long len = high - low;
|
||||
|
||||
while (high - low >= LINEAR_SEARCH_CUTOFF) {
|
||||
long mid = (low + high) >>> 1;
|
||||
long midVal = get(fromIndex + sz*mid);
|
||||
|
||||
if (midVal < key)
|
||||
low = mid + 1;
|
||||
else if (midVal > key)
|
||||
high = mid - 1;
|
||||
else
|
||||
return fromIndex + sz*mid;
|
||||
while (len > 0) {
|
||||
var half = len / 2;
|
||||
if (get(fromIndex + sz * (low + half)) < key) {
|
||||
low += len - half;
|
||||
}
|
||||
len = half;
|
||||
}
|
||||
|
||||
for (fromIndex += low*sz; fromIndex < toIndex; fromIndex+=sz) {
|
||||
long val = get(fromIndex);
|
||||
|
||||
if (val == key) return fromIndex;
|
||||
if (val > key) return encodeSearchMiss(sz, fromIndex);
|
||||
}
|
||||
|
||||
return encodeSearchMiss(sz, toIndex - sz);
|
||||
return fromIndex + sz * low;
|
||||
}
|
||||
|
||||
|
||||
default long binarySearchUpperBoundJava(long key, long fromIndex, long toIndex) {
|
||||
long low = 0;
|
||||
long high = (toIndex - fromIndex) - 1;
|
||||
|
||||
while (high - low >= LINEAR_SEARCH_CUTOFF) {
|
||||
long mid = (low + high) >>> 1;
|
||||
long midVal = get(fromIndex + mid);
|
||||
|
||||
if (midVal < key)
|
||||
low = mid + 1;
|
||||
else if (midVal > key)
|
||||
high = mid - 1;
|
||||
else
|
||||
return fromIndex + mid;
|
||||
}
|
||||
|
||||
for (fromIndex += low; fromIndex < toIndex; fromIndex++) {
|
||||
if (get(fromIndex) >= key) return fromIndex;
|
||||
}
|
||||
|
||||
return toIndex;
|
||||
}
|
||||
|
||||
|
||||
default void retain(LongQueryBuffer buffer, long boundary, long searchStart, long searchEnd) {
|
||||
|
||||
if (searchStart >= searchEnd) return;
|
||||
@ -255,11 +156,5 @@ public interface LongArraySearch extends LongArrayBase {
|
||||
|
||||
}
|
||||
|
||||
static long encodeSearchMiss(int entrySize, long value) {
|
||||
return -entrySize - Math.max(0, value);
|
||||
}
|
||||
|
||||
static long decodeSearchMiss(int entrySize, long value) {
|
||||
return -value - entrySize;
|
||||
}
|
||||
}
|
||||
|
@ -107,10 +107,6 @@ class SortAlgoQuickSort {
|
||||
|
||||
long pivot = array.get(pivotPoint);
|
||||
|
||||
assert (pivotPoint - low) >= 0;
|
||||
assert (pivotPoint - low) % wordSize == 0;
|
||||
|
||||
|
||||
long i = low - wordSize;
|
||||
long j = high + wordSize;
|
||||
|
||||
|
@ -3,7 +3,6 @@ package nu.marginalia.array.delegate;
|
||||
import nu.marginalia.array.ArrayRangeReference;
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.algo.LongArraySearch;
|
||||
import nu.marginalia.array.algo.SortingContext;
|
||||
import nu.marginalia.array.buffer.LongQueryBuffer;
|
||||
import nu.marginalia.array.functional.*;
|
||||
|
||||
@ -84,26 +83,6 @@ public class ShiftedLongArray implements LongArray {
|
||||
delegate.quickSortNative128(start, end);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long linearSearchNative(long key, long start, long end) {
|
||||
return delegate.linearSearchNative(key, start + shift, end + shift);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long linearSearchNative128(long key, long start, long end) {
|
||||
return delegate.linearSearchNative128(key, start, end);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long binarySearchNativeUB(long key, long start, long end) {
|
||||
return delegate.binarySearchNativeUB(key, start + shift, end + shift);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long binarySearchNative128(long key, long start, long end) {
|
||||
return delegate.binarySearchNative128(key, start, end);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size() {
|
||||
return size;
|
||||
@ -162,81 +141,24 @@ public class ShiftedLongArray implements LongArray {
|
||||
}
|
||||
|
||||
|
||||
public long searchN(int sz, long key) {
|
||||
if (size < 128) {
|
||||
return linearSearchN(sz, key);
|
||||
}
|
||||
else {
|
||||
return binarySearchN(sz, key);
|
||||
}
|
||||
}
|
||||
|
||||
public long search(long key) {
|
||||
if (size < 128) {
|
||||
return linearSearch(key);
|
||||
}
|
||||
else {
|
||||
return binarySearch(key);
|
||||
}
|
||||
}
|
||||
|
||||
public long linearSearch(long key) {
|
||||
return linearSearch(key, 0, size);
|
||||
}
|
||||
|
||||
public long binarySearch(long key) {
|
||||
return binarySearch(key, 0, size);
|
||||
}
|
||||
|
||||
public long binarySearchN(int sz, long key) {
|
||||
return binarySearchN(sz, key, 0, size);
|
||||
}
|
||||
|
||||
public long linearSearchN(int sz, long key) {
|
||||
return linearSearchN(sz, key, 0, size);
|
||||
}
|
||||
|
||||
public void retain(LongQueryBuffer buffer, long boundary) {
|
||||
retain(buffer, boundary, 0, size);
|
||||
}
|
||||
public void retainN(LongQueryBuffer buffer, int sz, long boundary) {
|
||||
if (sz == 1)
|
||||
retain(buffer, boundary, 0, size);
|
||||
else
|
||||
retainN(buffer, sz, boundary, 0, size);
|
||||
}
|
||||
|
||||
public void reject(LongQueryBuffer buffer, long boundary) {
|
||||
reject(buffer, boundary, 0, size);
|
||||
}
|
||||
|
||||
public void rejectN(LongQueryBuffer buffer, int sz, long boundary) {
|
||||
if (sz == 1)
|
||||
reject(buffer, boundary, 0, size);
|
||||
else
|
||||
rejectN(buffer, sz, boundary, 0, size);
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public long linearSearch(long key, long fromIndex, long toIndex) {
|
||||
return translateSearchResult(1, delegate.linearSearch(key, fromIndex + shift, toIndex+shift));
|
||||
}
|
||||
@Override
|
||||
public long linearSearchN(int sz, long key, long fromIndex, long toIndex) {
|
||||
return translateSearchResult(sz, delegate.linearSearch(key, fromIndex + shift, toIndex+shift));
|
||||
public long binarySearchN(int sz, long key, long fromIndex, long toIndex) {
|
||||
return delegate.binarySearchN(sz, key, fromIndex + shift, toIndex+shift) - shift;
|
||||
}
|
||||
@Override
|
||||
public long binarySearch(long key, long fromIndex, long toIndex) {
|
||||
return translateSearchResult(1, delegate.binarySearch(key, fromIndex + shift, toIndex+shift));
|
||||
}
|
||||
@Override
|
||||
public long binarySearchN(int sz, long key, long fromIndex, long toIndex) {
|
||||
return translateSearchResult(sz, delegate.binarySearchN(sz, key, fromIndex + shift, toIndex+shift));
|
||||
}
|
||||
@Override
|
||||
public long binarySearchUpperBound(long key, long fromIndex, long toIndex) {
|
||||
return translateSearchResult(1, delegate.binarySearchUpperBound(key, fromIndex + shift, toIndex+shift));
|
||||
return delegate.binarySearch(key, fromIndex + shift, toIndex+shift) - shift;
|
||||
}
|
||||
|
||||
private long translateSearchResult(int sz, long delegatedIdx) {
|
||||
|
@ -1,113 +0,0 @@
|
||||
package nu.marginalia.array.page;
|
||||
|
||||
import nu.marginalia.array.algo.BulkTransferArray;
|
||||
import nu.marginalia.array.functional.AddressRangeCall;
|
||||
import nu.marginalia.array.functional.AddressRangeCallIO;
|
||||
import nu.marginalia.array.scheme.ArrayPartitioningScheme;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static nu.marginalia.array.algo.LongArraySearch.decodeSearchMiss;
|
||||
import static nu.marginalia.array.algo.LongArraySearch.encodeSearchMiss;
|
||||
|
||||
public class AbstractPagingArray<T extends BulkTransferArray<B>, B> {
|
||||
final T[] pages;
|
||||
final long size;
|
||||
final ArrayPartitioningScheme partitioningScheme;
|
||||
|
||||
public AbstractPagingArray(ArrayPartitioningScheme partitioningScheme, T[] pages, long size) {
|
||||
this.partitioningScheme = partitioningScheme;
|
||||
this.pages = pages;
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
void delegateToEachPage(long start, long end, AddressRangeCall<T> fn) {
|
||||
assert end >= start;
|
||||
|
||||
int page = partitioningScheme.getPage(start);
|
||||
|
||||
long endPos;
|
||||
|
||||
for (long pos = start; pos < end; pos = endPos) {
|
||||
endPos = partitioningScheme.getPageEnd(pos, end);
|
||||
|
||||
int sOff = partitioningScheme.getOffset(pos);
|
||||
int eOff = partitioningScheme.getEndOffset(start, endPos);
|
||||
|
||||
fn.apply(pages[page++], sOff, eOff);
|
||||
}
|
||||
}
|
||||
|
||||
void delegateToEachPageIO(long start, long end, AddressRangeCallIO<T> fn) throws IOException {
|
||||
assert end >= start;
|
||||
|
||||
int page = partitioningScheme.getPage(start);
|
||||
|
||||
long endPos;
|
||||
|
||||
for (long pos = start; pos < end; pos = endPos) {
|
||||
endPos = partitioningScheme.getPageEnd(pos, end);
|
||||
|
||||
int sOff = partitioningScheme.getOffset(pos);
|
||||
int eOff = partitioningScheme.getEndOffset(start, endPos);
|
||||
|
||||
fn.apply(pages[page++], sOff, eOff);
|
||||
}
|
||||
}
|
||||
|
||||
long translateSearchResultsFromPage(long fromIndex, long ret) {
|
||||
int page = partitioningScheme.getPage(fromIndex);
|
||||
|
||||
if (ret >= 0) {
|
||||
return partitioningScheme.toRealIndex(page, (int) ret);
|
||||
} else {
|
||||
ret = decodeSearchMiss(1, ret);
|
||||
ret = partitioningScheme.toRealIndex(page, (int) ret);
|
||||
return encodeSearchMiss(1, ret);
|
||||
}
|
||||
}
|
||||
|
||||
public void set(long start, long end, B buffer, int bufferStart) {
|
||||
assert end >= start;
|
||||
|
||||
int page = partitioningScheme.getPage(start);
|
||||
|
||||
long endPos;
|
||||
|
||||
for (long pos = start; pos < end; pos = endPos) {
|
||||
endPos = partitioningScheme.getPageEnd(pos, end);
|
||||
|
||||
int sOff = partitioningScheme.getOffset(pos);
|
||||
int eOff = partitioningScheme.getEndOffset(start, endPos);
|
||||
|
||||
pages[page++].set(sOff, eOff, buffer, bufferStart);
|
||||
|
||||
bufferStart += eOff - sOff;
|
||||
}
|
||||
}
|
||||
|
||||
public void get(long start, long end, B buffer, int bufferStart) {
|
||||
assert end >= start;
|
||||
|
||||
int page = partitioningScheme.getPage(start);
|
||||
|
||||
long endPos;
|
||||
|
||||
for (long pos = start; pos < end; pos = endPos) {
|
||||
endPos = partitioningScheme.getPageEnd(pos, end);
|
||||
|
||||
int sOff = partitioningScheme.getOffset(pos);
|
||||
int eOff = partitioningScheme.getEndOffset(start, endPos);
|
||||
|
||||
pages[page++].get(sOff, eOff, buffer, bufferStart);
|
||||
|
||||
bufferStart += eOff - sOff;
|
||||
}
|
||||
}
|
||||
|
||||
public void close() {
|
||||
for (var page : pages) {
|
||||
page.close();
|
||||
}
|
||||
}
|
||||
}
|
@ -135,27 +135,6 @@ public class SegmentLongArray implements PartitionPage, LongArray {
|
||||
NativeAlgos.sort128(segment, start, end);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long linearSearchNative(long key, long start, long end) {
|
||||
return NativeAlgos.linearSearch64(key, segment, start, end);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long linearSearchNative128(long key, long start, long end) {
|
||||
return NativeAlgos.linearSearch128(key, segment, start, end);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long binarySearchNativeUB(long key, long start, long end) {
|
||||
return NativeAlgos.binarySearch64Upper(key, segment, start, end);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long binarySearchNative128(long key, long start, long end) {
|
||||
return NativeAlgos.binarySearch128(key, segment, start, end);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public ByteBuffer getByteBuffer() {
|
||||
return segment.asByteBuffer();
|
||||
|
@ -284,24 +284,4 @@ public class UnsafeLongArray implements PartitionPage, LongArray {
|
||||
NativeAlgos.sort128(segment, start, end);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long linearSearchNative(long key, long start, long end) {
|
||||
return NativeAlgos.linearSearch64(key, segment, start, end);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long linearSearchNative128(long key, long start, long end) {
|
||||
return NativeAlgos.linearSearch128(key, segment, start, end);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long binarySearchNativeUB(long key, long start, long end) {
|
||||
return NativeAlgos.binarySearch64Upper(key, segment, start, end);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long binarySearchNative128(long key, long start, long end) {
|
||||
return NativeAlgos.binarySearch128(key, segment, start, end);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,48 +0,0 @@
|
||||
package nu.marginalia.array.page;
|
||||
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.LongArrayFactory;
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
|
||||
/** This benchmark simulates the sorting in index creation */
|
||||
public class QuicksortBenchmark {
|
||||
|
||||
@State(Scope.Benchmark)
|
||||
public static class BenchState {
|
||||
|
||||
@Setup(Level.Invocation)
|
||||
public void doSetup() {
|
||||
array.transformEach(0, size, (pos,old) -> ~pos);
|
||||
}
|
||||
|
||||
int size = 1024*1024;
|
||||
int pageSize = 10*1024;
|
||||
LongArray array = LongArrayFactory.onHeapShared(size);
|
||||
}
|
||||
|
||||
@Fork(value = 5, warmups = 1)
|
||||
@Warmup(iterations = 5)
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
public LongArray javaSort(BenchState state) {
|
||||
var array = state.array;
|
||||
|
||||
array.quickSortJava(0, array.size());
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
@Fork(value = 5, warmups = 1)
|
||||
@Warmup(iterations = 5)
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
public LongArray cppSort(BenchState state) {
|
||||
|
||||
var array = state.array;
|
||||
|
||||
array.quickSortNative(0, array.size());
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,110 @@
|
||||
package nu.marginalia.array.page;
|
||||
|
||||
import nu.marginalia.array.LongArray;
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
|
||||
import java.lang.foreign.Arena;
|
||||
import java.util.Random;
|
||||
|
||||
/** This benchmark simulates the searching in index querying */
|
||||
public class SearchBenchmark {
|
||||
|
||||
@State(Scope.Benchmark)
|
||||
public static class SortState {
|
||||
|
||||
public SortState()
|
||||
{
|
||||
msArray.transformEach(0, size, (pos,old) -> ~pos);
|
||||
usArray.transformEach(0, size, (pos,old) -> ~pos);
|
||||
msArray.quickSortJava(0, size);
|
||||
usArray.quickSortJava(0, size);
|
||||
keys = new long[1000];
|
||||
Random r = new Random();
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
keys[i] = msArray.get(r.nextInt(0, size));
|
||||
}
|
||||
}
|
||||
|
||||
int size = 1024*1024;
|
||||
|
||||
long[] keys;
|
||||
LongArray msArray = SegmentLongArray.onHeap(Arena.ofConfined(), size);
|
||||
LongArray usArray = UnsafeLongArray.onHeap(Arena.ofConfined(), size);
|
||||
}
|
||||
|
||||
@Fork(value = 1, warmups = 1)
|
||||
@Warmup(iterations = 5)
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
public long msSort64(SortState state) {
|
||||
var array = state.usArray;
|
||||
|
||||
long ret = 0;
|
||||
for (var key : state.keys) {
|
||||
ret += array.binarySearchNJava(2, key, 0, array.size());
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Fork(value = 3, warmups = 5)
|
||||
@Warmup(iterations = 5)
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
public long msSort64_2(SortState state) {
|
||||
var array = state.usArray;
|
||||
|
||||
long ret = 0;
|
||||
for (var key : state.keys) {
|
||||
ret += array.binarySearchNJava2(2, key, 0, array.size());
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Fork(value = 5, warmups = 5)
|
||||
@Warmup(iterations = 1)
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
public long msSort128(SortState state) {
|
||||
var array = state.msArray;
|
||||
|
||||
long ret = 0;
|
||||
for (var key : state.keys) {
|
||||
ret += array.binarySearchNJava(2, 0, array.size(), key);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Fork(value = 5, warmups = 5)
|
||||
@Warmup(iterations = 1)
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
public long usSort64(SortState state) {
|
||||
var array = state.usArray;
|
||||
|
||||
long ret = 0;
|
||||
for (var key : state.keys) {
|
||||
ret += array.binarySearchUpperBoundJava(0, array.size(), key);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Fork(value = 5, warmups = 5)
|
||||
@Warmup(iterations = 1)
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
public long usSort128(SortState state) {
|
||||
var array = state.usArray;
|
||||
|
||||
long ret = 0;
|
||||
for (var key : state.keys) {
|
||||
ret += array.binarySearchNJava(2, 0, array.size(), key);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,102 @@
|
||||
package nu.marginalia.array.page;
|
||||
|
||||
import nu.marginalia.array.LongArray;
|
||||
import org.openjdk.jmh.annotations.*;
|
||||
|
||||
import java.lang.foreign.Arena;
|
||||
|
||||
/** This benchmark simulates the sorting in index creation */
|
||||
public class SortBenchmark {
|
||||
|
||||
@State(Scope.Benchmark)
|
||||
public static class BenchState {
|
||||
|
||||
@Setup(Level.Invocation)
|
||||
public void doSetup() {
|
||||
msArray.transformEach(0, size, (pos,old) -> ~pos);
|
||||
usArray.transformEach(0, size, (pos,old) -> ~pos);
|
||||
}
|
||||
|
||||
int size = 1024*1024;
|
||||
|
||||
LongArray msArray = SegmentLongArray.onHeap(Arena.ofConfined(), size);
|
||||
LongArray usArray = UnsafeLongArray.onHeap(Arena.ofConfined(), size);
|
||||
}
|
||||
|
||||
@Fork(value = 5, warmups = 5)
|
||||
@Warmup(iterations = 1)
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
public LongArray msSort64(BenchState state) {
|
||||
var array = state.msArray;
|
||||
|
||||
array.quickSortJavaN(2, 0, array.size());
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
@Fork(value = 5, warmups = 5)
|
||||
@Warmup(iterations = 1)
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
public LongArray msSort128(BenchState state) {
|
||||
var array = state.msArray;
|
||||
|
||||
array.quickSortJavaN(2, 0, array.size());
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
@Fork(value = 5, warmups = 5)
|
||||
@Warmup(iterations = 1)
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
public LongArray usSort128(BenchState state) {
|
||||
var array = state.usArray;
|
||||
|
||||
array.quickSortJavaN(2, 0, array.size());
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
@Fork(value = 5, warmups = 5)
|
||||
@Warmup(iterations = 1)
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
public LongArray usSort64(BenchState state) {
|
||||
var array = state.usArray;
|
||||
|
||||
array.quickSortJavaN(2, 0, array.size());
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
// We can assign the C++ sorts to lower warmup values as the JIT does not
|
||||
// need to warm up the C++ code; only the small Java code that calls it.
|
||||
|
||||
@Fork(value = 5, warmups = 1)
|
||||
@Warmup(iterations = 1)
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
public LongArray cppSort128(BenchState state) {
|
||||
|
||||
var array = state.usArray; // realistically doesn't matter
|
||||
|
||||
array.quickSortNative128(0, array.size());
|
||||
|
||||
return array;
|
||||
}
|
||||
|
||||
@Fork(value = 5, warmups = 1)
|
||||
@Warmup(iterations = 1)
|
||||
@Benchmark
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
public LongArray cppSort64(BenchState state) {
|
||||
|
||||
var array = state.usArray; // realistically doesn't matter
|
||||
|
||||
array.quickSortNative(0, array.size());
|
||||
|
||||
return array;
|
||||
}
|
||||
}
|
@ -11,129 +11,57 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
class LongArraySearchTest {
|
||||
|
||||
LongArray basicArray = LongArray.allocate(1024);
|
||||
LongArray segmentArray = LongArrayFactory.onHeapConfined(1024);
|
||||
|
||||
LongArray shiftedArray = LongArray.allocate(1054).range(30, 1054);
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() {
|
||||
for (int i = 0; i < basicArray.size(); i++) {
|
||||
basicArray.set(i, 3L*i);
|
||||
for (int i = 0; i < shiftedArray.size(); i++) {
|
||||
shiftedArray.set(i, 3L*i);
|
||||
segmentArray.set(i, 3L*i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void linearSearch() {
|
||||
linearSearchTester(basicArray);
|
||||
linearSearchTester(shiftedArray);
|
||||
linearSearchTester(segmentArray);
|
||||
}
|
||||
|
||||
@Test
|
||||
void binarySearch() {
|
||||
binarySearchTester(basicArray);
|
||||
binarySearchTester(shiftedArray);
|
||||
binarySearchTester(segmentArray);
|
||||
}
|
||||
|
||||
@Test
|
||||
void binarySearchUpperBound() {
|
||||
binarySearchUpperBoundTester(basicArray);
|
||||
binarySearchUpperBoundTester(shiftedArray);
|
||||
binarySearchUpperBoundTester(segmentArray);
|
||||
}
|
||||
|
||||
@Test
|
||||
void binarySearchUpperBoundNative() {
|
||||
binarySearchUpperBoundNativeTester(basicArray);
|
||||
binarySearchUpperBoundNativeTester(shiftedArray);
|
||||
binarySearchUpperBoundNativeTester(segmentArray);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testEmptyRange() {
|
||||
assertTrue(segmentArray.binarySearchN(2, 0, 0, 0) < 0);
|
||||
assertTrue(segmentArray.linearSearchN(2, 0, 0, 0) < 0);
|
||||
assertTrue(segmentArray.binarySearch(0, 0, 0) < 0);
|
||||
assertTrue(segmentArray.linearSearch(0, 0, 0) < 0);
|
||||
assertTrue(segmentArray.binarySearchN(2, 0, 0, 0) <= 0);
|
||||
assertTrue(segmentArray.binarySearch(0, 0, 0) <= 0);
|
||||
}
|
||||
|
||||
void linearSearchTester(LongArray array) {
|
||||
for (int i = 0; i < array.size() * 3; i++) {
|
||||
long ret = array.linearSearch(i, 0, array.size());
|
||||
|
||||
if ((i % 3) == 0) {
|
||||
assertTrue(ret >= 0);
|
||||
assertEquals(i, array.get(ret));
|
||||
}
|
||||
else {
|
||||
long higher = LongArraySearch.decodeSearchMiss(1, ret);
|
||||
if (i > 0 && higher < array.size()) {
|
||||
assertTrue(array.get(higher) < i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void binarySearchTester(LongArray array) {
|
||||
for (int i = 0; i < array.size() * 3; i++) {
|
||||
long ret = array.binarySearch(i, 0, array.size());
|
||||
|
||||
if ((i % 3) == 0) {
|
||||
assertTrue(ret >= 0);
|
||||
assertEquals(i, array.get(ret));
|
||||
assertTrue(ret >= 0);
|
||||
|
||||
// Invariant check
|
||||
if (i > 0 && ret > 0 && ret + 1 < array.size()) {
|
||||
assertTrue(array.get(ret - 1) < i);
|
||||
assertTrue(array.get(ret) >= i);
|
||||
assertTrue(array.get(ret + 1) > i);
|
||||
}
|
||||
else {
|
||||
long higher = LongArraySearch.decodeSearchMiss(1, ret);
|
||||
if (i > 0 && higher+1 < array.size()) {
|
||||
assertTrue(array.get(higher) < i);
|
||||
}
|
||||
|
||||
if ((i % 3) == 0) {
|
||||
assertEquals(i, array.get(ret));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void binarySearchUpperBoundTester(LongArray array) {
|
||||
for (int i = 0; i < array.size() * 3; i++) {
|
||||
long ret = array.binarySearchUpperBound(i, 0, array.size());
|
||||
|
||||
if ((i % 3) == 0) {
|
||||
assertTrue(ret >= 0);
|
||||
assertEquals(i, array.get(ret));
|
||||
}
|
||||
else {
|
||||
if (i > 0 && ret > 0 && ret < array.size()) {
|
||||
assertTrue(array.get(ret-1) < i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void binarySearchUpperBoundNativeTester(LongArray array) {
|
||||
for (int i = 0; i < array.size() * 3; i++) {
|
||||
long ret = array.binarySearchNativeUB(i, 0, array.size());
|
||||
|
||||
if ((i % 3) == 0) {
|
||||
assertTrue(ret >= 0);
|
||||
assertEquals(i, array.get(ret));
|
||||
}
|
||||
else {
|
||||
if (i > 0 && ret > 0 && ret < array.size()) {
|
||||
assertTrue(array.get(ret-1) < i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@Test
|
||||
void retain() {
|
||||
long[] vals = new long[128];
|
||||
for (int i = 0; i < vals.length; i++) { vals[i] = i; }
|
||||
var buffer = new LongQueryBuffer(vals, 128);
|
||||
|
||||
basicArray.retain(buffer, 128, 0, basicArray.size());
|
||||
segmentArray.retain(buffer, 128, 0, segmentArray.size());
|
||||
buffer.finalizeFiltering();
|
||||
|
||||
assertEquals(43, buffer.size());
|
||||
@ -148,7 +76,7 @@ class LongArraySearchTest {
|
||||
for (int i = 0; i < vals.length; i++) { vals[i] = i; }
|
||||
var buffer = new LongQueryBuffer(vals, 128);
|
||||
|
||||
basicArray.reject(buffer, 128, 0, basicArray.size());
|
||||
segmentArray.reject(buffer, 128, 0, segmentArray.size());
|
||||
buffer.finalizeFiltering();
|
||||
|
||||
assertEquals(128-43, buffer.size());
|
||||
|
@ -1,7 +1,6 @@
|
||||
package nu.marginalia.btree;
|
||||
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.algo.LongArraySearch;
|
||||
import nu.marginalia.array.buffer.LongQueryBuffer;
|
||||
import nu.marginalia.btree.model.BTreeContext;
|
||||
import nu.marginalia.btree.model.BTreeHeader;
|
||||
@ -151,10 +150,7 @@ public class BTreeReader {
|
||||
for (int i = 0; i < keys.length; i++) {
|
||||
long key = keys[i];
|
||||
searchStart = data.binarySearchN(ctx.entrySize, key, searchStart, data.size());
|
||||
if (searchStart < 0) {
|
||||
searchStart = LongArraySearch.decodeSearchMiss(ctx.entrySize, searchStart);
|
||||
}
|
||||
else {
|
||||
if (data.get(searchStart) == key) {
|
||||
ret[i] = data.get(searchStart + offset);
|
||||
}
|
||||
}
|
||||
@ -215,7 +211,7 @@ public class BTreeReader {
|
||||
|
||||
final long searchStart = layerOffsets[layer] + offset;
|
||||
|
||||
final long nextLayerOffset = index.binarySearchUpperBound(key, searchStart, searchStart + ctx.pageSize()) - searchStart;
|
||||
final long nextLayerOffset = index.binarySearch(key, searchStart, searchStart + ctx.pageSize()) - searchStart;
|
||||
|
||||
layer --;
|
||||
boundary = index.get(searchStart + nextLayerOffset);
|
||||
@ -257,7 +253,13 @@ public class BTreeReader {
|
||||
|
||||
long searchEnd = searchStart + min(remainingTotal, remainingBlock);
|
||||
|
||||
return data.binarySearchN(ctx.entrySize, key, searchStart, searchEnd);
|
||||
long ret = data.binarySearchN(ctx.entrySize, key, searchStart, searchEnd);
|
||||
if (data.get(ret) == key) {
|
||||
return ret;
|
||||
}
|
||||
else {
|
||||
return -1 - ret;
|
||||
}
|
||||
}
|
||||
|
||||
public void retainData(LongQueryBuffer buffer) {
|
||||
|
Loading…
Reference in New Issue
Block a user