(array) Clean up the Array library

IntArray gets the YAGNI axe.   The array library had two implementations, one for longs which was used, and one for ints, which only ever saw bit rot.   Removing the latter, as all it ever did was clutter up the codebase and add technical debt.  If we need int arrays, we fork LongArray again (or add int capabilities to it)

Also cleaning up the interfaces, removing layers of redundant abstractions and adding javadocs.

Finally adding sz=2 specializations to the quick- and insertion sort algorithms.  It seems the JIT isn't optimizing these particularly well, this is an attempt to help it out a bit.
This commit is contained in:
Viktor Lofgren 2024-05-18 13:23:06 +02:00
parent 650f3843bb
commit 19163fa883
78 changed files with 294 additions and 2226 deletions

View File

@ -1,6 +1,6 @@
package nu.marginalia.index;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
import nu.marginalia.btree.BTreeReader;
import nu.marginalia.index.query.EntrySource;

View File

@ -1,6 +1,6 @@
package nu.marginalia.index.construction;
import nu.marginalia.array.functional.LongTransformer;
import nu.marginalia.array.algo.LongArrayTransformations;
/**
* Transforms an array of item-counts into an array of item-offsets such that the previous counts would fit into an
@ -9,7 +9,7 @@ import nu.marginalia.array.functional.LongTransformer;
* [ 1, 2, 3, 5, ... ] -> [ 0, 1, 3, 6, 11, ... ]
*
*/
public class CountToOffsetTransformer implements LongTransformer {
public class CountToOffsetTransformer implements LongArrayTransformations.LongTransformer {
long offset = 0;
public final int entrySize;

View File

@ -1,13 +1,13 @@
package nu.marginalia.index.construction;
import nu.marginalia.array.functional.LongBinaryOperation;
import nu.marginalia.array.algo.LongArrayTransformations;
import nu.marginalia.btree.model.BTreeContext;
/** Calculates the necessary size of an index from an array of offsets (@see CountToOffsetTransformer)<p>
*
* Used with LongArray.fold()
* */
public class IndexSizeEstimator implements LongBinaryOperation {
public class IndexSizeEstimator implements LongArrayTransformations.LongBinaryOperation {
private final BTreeContext bTreeContext;
private final int entrySize;

View File

@ -1,7 +1,7 @@
package nu.marginalia.index.construction;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.functional.LongIOTransformer;
import nu.marginalia.array.algo.LongArrayTransformations;
import nu.marginalia.btree.BTreeWriter;
import nu.marginalia.btree.model.BTreeContext;
@ -9,7 +9,7 @@ import java.io.IOException;
import java.nio.channels.FileChannel;
/** Constructs the BTrees in a reverse index */
public class ReverseIndexBTreeTransformer implements LongIOTransformer {
public class ReverseIndexBTreeTransformer implements LongArrayTransformations.LongIOTransformer {
private final BTreeWriter writer;
private final FileChannel intermediateChannel;

View File

@ -72,7 +72,7 @@ public class ReversePreindexWordSegments {
}
// Sort the words file
words.quickSort(0, counts.size());
words.sort(0, counts.size());
// Populate the counts
for (i = 0; i < countsMap.size(); i++) {

View File

@ -1,6 +1,6 @@
package nu.marginalia.index.query;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
import nu.marginalia.btree.BTreeReader;
import nu.marginalia.index.query.filter.QueryFilterStepIf;

View File

@ -1,6 +1,6 @@
package nu.marginalia.index.query;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
import nu.marginalia.btree.BTreeReader;
import nu.marginalia.index.query.filter.QueryFilterStepIf;

View File

@ -1,7 +1,6 @@
package nu.marginalia.index;
import nu.marginalia.array.algo.SortingContext;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
import nu.marginalia.index.construction.DocIdRewriter;
import nu.marginalia.index.construction.ReversePreindex;
import nu.marginalia.index.construction.TestJournalFactory;
@ -22,14 +21,12 @@ import static org.junit.jupiter.api.Assertions.*;
class ReverseIndexReaderTest {
TestJournalFactory journalFactory;
Path tempDir;
SortingContext sortingContext;
@BeforeEach
public void setUp() throws IOException {
journalFactory = new TestJournalFactory();
tempDir = Files.createTempDirectory("sort");
sortingContext = new SortingContext(Path.of("invalid"), 1<<20);
}
@AfterEach
@ -64,7 +61,6 @@ class ReverseIndexReaderTest {
var indexReader = createIndex(
new EntryDataWithWordMeta(100, 101, wm(50, 51), wm(51, 52)),
new EntryDataWithWordMeta(101, 101, wm(51, 53), wm(52, 54))
);
assertEquals(1, indexReader.numDocuments(50));

View File

@ -2,7 +2,6 @@
package nu.marginalia.index.construction;
import nu.marginalia.array.LongArrayFactory;
import nu.marginalia.array.algo.SortingContext;
import nu.marginalia.btree.BTreeReader;
import nu.marginalia.btree.model.BTreeHeader;
import org.junit.jupiter.api.AfterEach;
@ -24,7 +23,6 @@ class ReversePreindexFinalizeTest {
Path wordsIdFile;
Path docsFile;
Path tempDir;
SortingContext sortingContext;
@BeforeEach
public void setUp() throws IOException {
@ -34,7 +32,6 @@ class ReversePreindexFinalizeTest {
wordsIdFile = Files.createTempFile("words", ".dat");
docsFile = Files.createTempFile("docs", ".dat");
tempDir = Files.createTempDirectory("sort");
sortingContext = new SortingContext(Path.of("invalid"), 1<<20);
}
@AfterEach

View File

@ -1,7 +1,6 @@
package nu.marginalia.index.construction;
import nu.marginalia.array.algo.SortingContext;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
@ -20,7 +19,6 @@ class ReversePreindexMergeTest {
Path wordsIdFile;
Path docsFile;
Path tempDir;
SortingContext sortingContext;
@BeforeEach
public void setUp() throws IOException {
@ -30,7 +28,6 @@ class ReversePreindexMergeTest {
wordsIdFile = Files.createTempFile("words", ".dat");
docsFile = Files.createTempFile("docs", ".dat");
tempDir = Files.createTempDirectory("sort");
sortingContext = new SortingContext(Path.of("invalid"), 1<<20);
}
@AfterEach

View File

@ -14,10 +14,7 @@ import nu.marginalia.api.searchquery.model.compiled.CompiledQueryLong;
import nu.marginalia.api.searchquery.model.compiled.CqDataInt;
import nu.marginalia.api.searchquery.model.query.SearchSpecification;
import nu.marginalia.api.searchquery.model.results.*;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingDetails;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingInputs;
import nu.marginalia.api.searchquery.model.results.debug.ResultRankingOutputs;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
import nu.marginalia.index.index.StatefulIndex;
import nu.marginalia.index.model.SearchParameters;
import nu.marginalia.index.model.SearchTerms;

View File

@ -1,6 +1,6 @@
package nu.marginalia.index.query;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
/** Dummy EntrySource that returns no entries. */
public class EmptyEntrySource implements EntrySource {

View File

@ -1,6 +1,6 @@
package nu.marginalia.index.query;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
/** An EntrySource is a source of entries for a query.
*/

View File

@ -1,7 +1,7 @@
package nu.marginalia.index.query;
import nu.marginalia.index.query.filter.QueryFilterStepIf;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
import java.util.ArrayList;
import java.util.List;

View File

@ -1,6 +1,6 @@
package nu.marginalia.index.query.filter;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
import java.util.ArrayList;
import java.util.List;

View File

@ -1,6 +1,6 @@
package nu.marginalia.index.query.filter;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
import java.util.ArrayList;
import java.util.List;
@ -60,7 +60,7 @@ public class QueryFilterAnyOf implements QueryFilterStepIf {
for (var step : steps)
{
var slice = buffer.slice(start, endOfValidData);
slice.data.quickSort(0, slice.size());
slice.data.sort(0, slice.size());
step.apply(slice);
start += slice.end;
@ -73,7 +73,7 @@ public class QueryFilterAnyOf implements QueryFilterStepIf {
// After all filters have been applied, we must re-sort all the retained data
// to uphold the sortedness contract
buffer.data.quickSort(0, buffer.end);
buffer.data.sort(0, buffer.end);
}
public String describe() {

View File

@ -1,6 +1,6 @@
package nu.marginalia.index.query.filter;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
public class QueryFilterLetThrough implements QueryFilterStepIf {

View File

@ -1,6 +1,6 @@
package nu.marginalia.index.query.filter;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
public class QueryFilterNoPass implements QueryFilterStepIf {
static final QueryFilterStepIf instance = new QueryFilterNoPass();

View File

@ -1,6 +1,6 @@
package nu.marginalia.index.query.filter;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
public interface QueryFilterStepIf extends Comparable<QueryFilterStepIf> {
boolean test(long value);

View File

@ -1,6 +1,6 @@
package nu.marginalia.index.query.filter;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
import org.junit.jupiter.api.Test;
import java.util.List;

View File

@ -1,7 +0,0 @@
package nu.marginalia.array;
/** A reference to a range of an array. Use this class judiciously to avoid
* gc churn.
*/
public record ArrayRangeReference<T>(T array, long start, long end) {
}

View File

@ -1,32 +0,0 @@
package nu.marginalia.array;
import nu.marginalia.array.algo.IntArrayBase;
import nu.marginalia.array.algo.IntArraySearch;
import nu.marginalia.array.algo.IntArraySort;
import nu.marginalia.array.algo.IntArrayTransformations;
import nu.marginalia.array.delegate.ShiftedIntArray;
import nu.marginalia.array.page.SegmentIntArray;
import java.lang.foreign.Arena;
public interface IntArray extends IntArrayBase, IntArrayTransformations, IntArraySearch, IntArraySort {
int WORD_SIZE = 4;
static IntArray allocate(long size) {
return SegmentIntArray.onHeap(Arena.ofShared(), size);
}
default IntArray shifted(long offset) {
return new ShiftedIntArray(offset, this);
}
default IntArray range(long start, long end) {
return new ShiftedIntArray(start, end, this);
}
/** Translate the range into the equivalent range in the underlying array if they are in the same page */
ArrayRangeReference<IntArray> directRangeIfPossible(long start, long end);
void force();
default void close() { }
}

View File

@ -4,7 +4,6 @@ import nu.marginalia.array.algo.LongArrayBase;
import nu.marginalia.array.algo.LongArraySearch;
import nu.marginalia.array.algo.LongArraySort;
import nu.marginalia.array.algo.LongArrayTransformations;
import nu.marginalia.array.delegate.ShiftedLongArray;
import nu.marginalia.array.page.UnsafeLongArray;
import java.lang.foreign.Arena;
@ -19,16 +18,12 @@ public interface LongArray extends LongArrayBase, LongArrayTransformations, Long
return UnsafeLongArray.onHeap(Arena.ofShared(), size);
}
default LongArray shifted(long offset) {
return new ShiftedLongArray(offset, this);
}
default LongArray range(long start, long end) {
return new ShiftedLongArray(start, end, this);
}
/** Translate the range into the equivalent range in the underlying array if they are in the same page */
ArrayRangeReference<LongArray> directRangeIfPossible(long start, long end);
LongArray shifted(long offset);
LongArray range(long start, long end);
/** Force any changes to be written to the backing store */
void force();
/** Close the array and release any resources */
void close();
}

View File

@ -1,67 +0,0 @@
package nu.marginalia.array.algo;
import java.io.IOException;
import java.nio.IntBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
public interface IntArrayBase extends BulkTransferArray<IntBuffer> {
int get(long pos);
void set(long pos, int value);
default void set(long pos, int... value) {
for (int i = 0; i < value.length; i++) {
set(pos+i, value[i]);
}
}
long size();
default void fill(long start, long end, int val) {
for (long v = start; v < end; v++) {
set(v, val);
}
}
default void swap(long pos1, long pos2) {
int tmp = get(pos1);
set(pos1, get(pos2));
set(pos2, tmp);
}
default void increment(long pos) {
set(pos, get(pos) + 1);
}
default int getAndIncrement(long pos) {
int val = get(pos);
set(pos, val + 1);
return val;
}
default void set(long start, long end, IntBuffer buffer, int bufferStart) {
for (int i = 0; i < (end-start); i++) {
set(start+i, buffer.get(i + bufferStart));
}
}
default void get(long start, long end, IntBuffer buffer, int bufferStart) {
for (int i = 0; i < (end-start); i++) {
buffer.put(i + bufferStart, get(start + i));
}
}
default void get(long start, IntBuffer buffer) {
get(start, start + buffer.remaining(), buffer, buffer.position());
}
default void get(long start, long end, int[] buffer) {
for (int i = 0; i < (end-start); i++) {
buffer[i] = get(start + i);
}
}
void write(Path file) throws IOException;
void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException;
}

View File

@ -1,127 +0,0 @@
package nu.marginalia.array.algo;
import nu.marginalia.array.buffer.IntQueryBuffer;
public interface IntArraySearch extends IntArrayBase {
int LINEAR_SEARCH_CUTOFF = 64;
default long linearSearch(int key, long fromIndex, long toIndex) {
long pos;
for (pos = fromIndex; pos < toIndex; pos++) {
int val = get(pos);
if (val == key) return pos;
if (val > key) break;
}
return encodeSearchMiss(1, pos - 1);
}
default long binarySearch(int key, long fromIndex, long toIndex) {
long low = 0;
long high = (toIndex - fromIndex) - 1;
while (high - low >= LINEAR_SEARCH_CUTOFF) {
long mid = (low + high) >>> 1;
long midVal = get(fromIndex + mid);
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return fromIndex + mid;
}
return linearSearch(key, fromIndex + low, fromIndex + high + 1);
}
default long binarySearchUpperBound(int key, long fromIndex, long toIndex) {
long low = 0;
long high = (toIndex - fromIndex) - 1;
while (high - low >= LINEAR_SEARCH_CUTOFF) {
long mid = (low + high) >>> 1;
long midVal = get(fromIndex + mid);
if (midVal < key)
low = mid + 1;
else if (midVal > key)
high = mid - 1;
else
return fromIndex + mid;
}
for (fromIndex += low; fromIndex < toIndex; fromIndex++) {
if (get(fromIndex) >= key) return fromIndex;
}
return toIndex;
}
default void retain(IntQueryBuffer buffer, long boundary, long searchStart, long searchEnd) {
if (searchStart >= searchEnd) return;
int bv = buffer.currentValue();
int av = get(searchStart);
long pos = searchStart;
while (bv <= boundary && buffer.hasMore()) {
if (bv < av) {
if (!buffer.rejectAndAdvance()) break;
bv = buffer.currentValue();
continue;
}
else if (bv == av) {
if (!buffer.retainAndAdvance()) break;
bv = buffer.currentValue();
continue;
}
if (++pos < searchEnd) {
av = get(pos);
}
else {
break;
}
}
}
default void reject(IntQueryBuffer buffer, long boundary, long searchStart, long searchEnd) {
if (searchStart >= searchEnd) return;
int bv = buffer.currentValue();
int av = get(searchStart);
long pos = searchStart;
while (bv <= boundary && buffer.hasMore()) {
if (bv < av) {
if (!buffer.retainAndAdvance()) break;
bv = buffer.currentValue();
continue;
}
else if (bv == av) {
if (!buffer.rejectAndAdvance()) break;
bv = buffer.currentValue();
continue;
}
if (++pos < searchEnd) {
av = get(pos);
}
else {
break;
}
}
}
static long encodeSearchMiss(int entrySize, long value) {
return -entrySize - Math.max(0, value);
}
}

View File

@ -1,79 +0,0 @@
package nu.marginalia.array.algo;
import java.io.IOException;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
public interface IntArraySort extends IntArrayBase {
default boolean isSorted(long start, long end) {
if (start == end) return true;
int val = get(start);
for (long i = start + 1; i < end; i++) {
int next = get(i);
if (next < val)
return false;
val = next;
}
return true;
}
default void sortLargeSpan(SortingContext ctx, long start, long end) throws IOException {
long size = end - start;
if (size < ctx.memorySortLimit()) {
quickSort(start, end);
}
else {
mergeSort(start, end, ctx.tempDir());
}
}
default boolean isSortedN(int wordSize, long start, long end) {
if (start == end) return true;
int val = get(start);
for (long i = start + wordSize; i < end; i+=wordSize) {
int next = get(i);
if (next < val)
return false;
val = next;
}
return true;
}
default void insertionSort(long start, long end) {
assert end - start < Integer.MAX_VALUE;
SortAlgoInsertionSort._insertionSort(this, start, end);
}
default void quickSort(long start, long end) {
if (end - start < 64) {
insertionSort(start, end);
}
else {
SortAlgoQuickSort._quickSortLH(this, start, end - 1);
}
}
default void mergeSort(long start, long end, Path tmpDir) throws IOException {
int length = (int) (end - start);
Path tmpFile = Files.createTempFile(tmpDir,"sort-"+start+"-"+(start+length), ".dat");
try (var channel = (FileChannel) Files.newByteChannel(tmpFile, StandardOpenOption.WRITE, StandardOpenOption.READ)) {
var workBuffer = channel.map(FileChannel.MapMode.READ_WRITE, 0, 4L * length).asIntBuffer();
SortAlgoMergeSort._mergeSort(this, start, length, workBuffer);
}
finally {
Files.delete(tmpFile);
}
}
}

View File

@ -1,47 +0,0 @@
package nu.marginalia.array.algo;
import nu.marginalia.array.functional.*;
import java.io.IOException;
public interface IntArrayTransformations extends IntArrayBase {
default void forEach(long start, long end, LongIntConsumer consumer) {
for (long i = start; i < end; i++) {
consumer.accept(i, get(i));
}
}
default void transformEach(long start, long end, IntTransformer transformer) {
for (long i = start; i < end; i++) {
set(i, transformer.transform(i, get(i)));
}
}
default void transformEachIO(long start, long end, IntIOTransformer transformer) throws IOException {
for (long i = start; i < end; i++) {
set(i, transformer.transform(i, get(i)));
}
}
default int foldIO(int zero, long start, long end, IntBinaryIOOperation operator) throws IOException {
int accumulator = zero;
for (long i = start; i < end; i++) {
accumulator = operator.apply(accumulator, get(i));
}
return accumulator;
}
default int fold(int zero, long start, long end, IntBinaryOperation operator) {
int accumulator = zero;
for (long i = start; i < end; i++) {
accumulator = operator.apply(accumulator, get(i));
}
return accumulator;
}
}

View File

@ -3,32 +3,42 @@ package nu.marginalia.array.algo;
import nu.marginalia.array.LongArray;
import java.io.IOException;
import java.lang.foreign.MemorySegment;
import java.nio.LongBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
@SuppressWarnings("preview")
public interface LongArrayBase extends BulkTransferArray<LongBuffer> {
/** Get a value from the array at the specified position */
long get(long pos);
/** Set a value in the array at the specified position */
void set(long pos, long value);
/** Return the memory segment backing the array */
MemorySegment getMemorySegment();
/** Set a sequence of value in the array starting at the specified position */
default void set(long pos, long... value) {
for (int i = 0; i < value.length; i++) {
set(pos+i, value[i]);
}
}
/** Return the size of the array */
long size();
/** Fill the array with the specified value at the provided range */
default void fill(long start, long end, long val) {
for (long v = start; v < end; v++) {
set(v, val);
}
}
void quickSortNative(long start, long end);
void quickSortNative128(long start, long end);
default void increment(long pos) {
set(pos, get(pos) + 1);
}
@ -48,6 +58,16 @@ public interface LongArrayBase extends BulkTransferArray<LongBuffer> {
}
}
default void swap2(long pos1, long pos2) {
long tmp = get(pos1);
set(pos1, get(pos2));
set(pos2, tmp);
tmp = get(pos1 + 1);
set(pos1 + 1, get(pos2 + 1));
set(pos2 + 1, tmp);
}
default long getAndIncrement(long pos) {
long val = get(pos);
set(pos, val + 1);

View File

@ -1,6 +1,6 @@
package nu.marginalia.array.algo;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
public interface LongArraySearch extends LongArrayBase {

View File

@ -1,6 +1,7 @@
package nu.marginalia.array.algo;
import nu.marginalia.NativeAlgos;
import nu.marginalia.array.LongArray;
import java.io.IOException;
import java.nio.channels.FileChannel;
@ -84,33 +85,29 @@ public interface LongArraySort extends LongArrayBase {
return true;
}
default void insertionSort(long start, long end) {
SortAlgoInsertionSort._insertionSort(this, start, end);
static void insertionSort(LongArraySort array, long start, long end) {
SortAlgoInsertionSort._insertionSort(array, start, end);
}
default void insertionSortN(int sz, long start, long end) {
SortAlgoInsertionSort._insertionSortN(this, sz, start, end);
}
default void quickSort(long start, long end) {
if (end - start < 64) {
insertionSort(start, end);
}
else if (NativeAlgos.isAvailable) {
quickSortNative(start, end);
static void insertionSortN(LongArraySort array, int sz, long start, long end) {
if (sz == 2) {
SortAlgoInsertionSort._insertionSort2(array, start, end);
}
else {
SortAlgoInsertionSort._insertionSortN(array, sz, start, end);
}
}
default void sort(long start, long end) {
if (NativeAlgos.isAvailable) {
NativeAlgos.sort(getMemorySegment(), start, end);
} else {
SortAlgoQuickSort._quickSortLH(this, start, end - 1);
}
}
default void quickSortJava(long start, long end) {
if (end - start < 64) {
insertionSort(start, end);
}
else {
SortAlgoQuickSort._quickSortLH(this, start, end - 1);
}
static void quickSortJava(LongArray array, long start, long end) {
SortAlgoQuickSort._quickSortLH(array, start, end - 1);
}
default void quickSortN(int wordSize, long start, long end) {
@ -119,23 +116,39 @@ public interface LongArraySort extends LongArrayBase {
if (end == start)
return;
if (NativeAlgos.isAvailable && wordSize == 2) {
quickSortNative128(start, end);
if (wordSize == 2) {
if (NativeAlgos.isAvailable) {
NativeAlgos.sort128(getMemorySegment(), start, end);
}
else {
SortAlgoQuickSort._quickSortLH2(this, start, end - 2);
}
}
else {
SortAlgoQuickSort._quickSortLHN(this, wordSize, start, end - wordSize);
}
}
default void quickSortJavaN(int wordSize, long start, long end) {
static void quickSortJavaN(LongArray array, int wordSize, long start, long end) {
assert ((end - start) % wordSize) == 0;
if (end == start)
return;
SortAlgoQuickSort._quickSortLHN(this, wordSize, start, end - wordSize);
SortAlgoQuickSort._quickSortLHN(array, wordSize, start, end - wordSize);
}
static void quickSortJava2(LongArray array, long start, long end) {
assert ((end - start) % 2) == 0;
if (end == start)
return;
SortAlgoQuickSort._quickSortLH2(array, start, end - 2);
}
/** Don't use this method, it's slow. */
@Deprecated
default void mergeSortN(int wordSize, long start, long end, Path tmpDir) throws IOException {
int length = (int) (end - start);
assert (length % wordSize) == 0;
@ -152,6 +165,8 @@ public interface LongArraySort extends LongArrayBase {
}
/** Don't use this method, it's slow. */
@Deprecated
default void mergeSort(long start, long end, Path tmpDir) throws IOException {
int length = (int) (end - start);

View File

@ -1,29 +1,34 @@
package nu.marginalia.array.algo;
import nu.marginalia.array.functional.*;
import java.io.IOException;
public interface LongArrayTransformations extends LongArrayBase {
/** Applies the provided consumer to each element in the array range */
default void forEach(long start, long end, LongLongConsumer consumer) {
for (long i = start; i < end; i++) {
consumer.accept(i, get(i));
}
}
/** Transforms each element in the array range using the provided transformer,
* so that array[i] = transformer.apply(i, array[i]) */
default void transformEach(long start, long end, LongTransformer transformer) {
for (long i = start; i < end; i++) {
set(i, transformer.transform(i, get(i)));
}
}
/** Transforms each element in the array range using the provided transformer,
* so that array[i] = transformer.apply(i, array[i]) */
default void transformEachIO(long start, long end, LongIOTransformer transformer) throws IOException {
for (long i = start; i < end; i++) {
set(i, transformer.transform(i, get(i)));
}
}
/** Transforms each element in the array range using the provided transformer,
* so that array[i] = transformer.apply(i, operator.apply(i-1, ...)) */
default long foldIO(long zero, long start, long end, LongBinaryIOOperation operator) throws IOException {
long accumulator = zero;
@ -34,6 +39,8 @@ public interface LongArrayTransformations extends LongArrayBase {
return accumulator;
}
/** Transforms each element in the array range using the provided transformer,
* so that array[i] = transformer.apply(i, operator.apply(i-1, ...)) */
default long fold(long zero, long start, long end, LongBinaryOperation operator) {
long accumulator = zero;
@ -44,4 +51,23 @@ public interface LongArrayTransformations extends LongArrayBase {
return accumulator;
}
interface LongBinaryIOOperation {
long apply(long left, long right) throws IOException;
}
interface LongBinaryOperation {
long apply(long left, long right);
}
interface LongIOTransformer {
long transform(long pos, long old) throws IOException;
}
interface LongLongConsumer {
void accept(long pos, long val);
}
interface LongTransformer {
long transform(long pos, long old);
}
}

View File

@ -2,6 +2,35 @@ package nu.marginalia.array.algo;
class SortAlgoInsertionSort {
static void _insertionSort2(LongArraySort array, long start, long end) {
assert end - start < Integer.MAX_VALUE;
int span = (int) (end - start);
assert (span % 2) == 0;
if (span <= 2) {
return;
}
long k;
long v;
for (long i = 1; i < span / 2; i++) {
k = array.get(start + i * 2);
v = array.get(start + i * 2 + 1);
long j;
for (j = i - 1; j >= 0 && array.get(start + j * 2) > k; j--) {
shift(array, start + j * 2, start + (j + 1) * 2, 2);
}
array.set(start + (j + 1) * 2, k);
array.set(start + (j + 1) * 2 + 1, v);
}
}
static void _insertionSortN(LongArraySort array, int sz, long start, long end) {
assert end - start < Integer.MAX_VALUE;
@ -22,7 +51,7 @@ class SortAlgoInsertionSort {
long j;
for (j = i - 1; j >= 0 && array.get(start + j * sz) > key; j--) {
shiftN(array, sz, start + j * sz, start + (j + 1) * sz, sz);
shift(array, start + j * sz, start + (j + 1) * sz, sz);
}
array.set(start + (j + 1) * sz, buf);
@ -49,44 +78,11 @@ class SortAlgoInsertionSort {
array.set(start + j + 1, key);
}
}
static void _insertionSort(IntArraySort array, long start, long end) {
assert end - start < Integer.MAX_VALUE;
int n = (int) (end - start);
if (n <= 1) {
return;
}
for (int i = 1; i < n; i++) {
int key = array.get(start + i);
int j;
for (j = i - 1; j >= 0 && array.get(start + j) > key; j--) {
shift(array, start + j, start + j + 1, 1);
}
array.set(start + j + 1, key);
}
}
private static void shiftN(LongArraySort array, int sz, long start, long end, long shift) {
for (long i = start; i < end; i+=sz) {
for (int j = 0; j < sz; j++) {
array.set(i + j + shift, array.get(i + j));
}
}
}
private static void shift(LongArraySort array, long start, long end, long shift) {
for (long i = start; i < end; i++) {
array.set(i + shift, array.get(i));
}
}
private static void shift(IntArraySort array, long start, long end, long shift) {
for (long i = start; i < end; i++) {
array.set(i + shift, array.get(i));
}
}
}

View File

@ -1,38 +1,17 @@
package nu.marginalia.array.algo;
import java.io.IOException;
import java.nio.IntBuffer;
import java.nio.LongBuffer;
@Deprecated
class SortAlgoMergeSort {
static void _mergeSort(IntArraySort array, long start, int length, IntBuffer workBuffer) {
int width = Math.min(Integer.highestOneBit(length), 1 << 16);
// Do in-memory sorting up until internalSortLimit first
for (int i = 0; i < length; i += width) {
array.quickSort(start + i, start + i + Math.min(width, length-i));
}
// Then finish with merge sort
for (width = 1; width < length; width*=2) {
for (int i = 0; i < length; i += 2*width) {
_merge(array, start, i, Math.min(i+width, length), Math.min(i+2*width, length), workBuffer);
}
workBuffer.clear();
array.set(start, start + length, workBuffer, 0);
}
}
static void _mergeSort(LongArraySort array, long start, int length, LongBuffer workBuffer) {
int width = Math.min(Integer.highestOneBit(length), 1 << 16);
// Do in-memory sorting up until internalSortLimit first
for (int i = 0; i < length; i += width) {
array.quickSort(start + i, start + i + Math.min(width, length-i));
array.sort(start + i, start + i + Math.min(width, length-i));
}
// Then finish with merge sort
@ -110,19 +89,4 @@ class SortAlgoMergeSort {
}
}
static void _merge(IntArraySort array, long offset, int left, int right, int end, IntBuffer workBuffer) {
long idxL = left;
long idxR = right;
for (int putPos = left; putPos < end; putPos++) {
if (idxL < right && (idxR >= end || array.get(offset+idxL) < array.get(offset+idxR))) {
workBuffer.put(putPos, array.get(offset+idxL));
idxL++;
}
else {
workBuffer.put(putPos, array.get(offset+idxR));
idxR++;
}
}
}
}

View File

@ -3,53 +3,27 @@ package nu.marginalia.array.algo;
class SortAlgoQuickSort {
static void _quickSortLH(IntArraySort array, long low, long highInclusive) {
static void _quickSortLH2(LongArraySort array, long low, long highInclusive) {
if (low < 0 || highInclusive < 0 || low >= highInclusive)
return;
if (highInclusive - low < 32) {
array.insertionSort(low, highInclusive + 1);
if (highInclusive - low < 8) {
SortAlgoInsertionSort._insertionSort2(array, low, highInclusive + 2);
return;
}
long p = _quickSortPartition(array, low, highInclusive);
long p = _quickSortPartition2(array, low, highInclusive);
_quickSortLH(array, low, p);
_quickSortLH(array, p + 1, highInclusive);
_quickSortLH2(array, low, p);
_quickSortLH2(array, p + 2, highInclusive);
}
static long _quickSortPartition(IntArraySort array, long low, long high) {
long pivotPoint = ((low + high) / (2L));
int pivot = array.get(pivotPoint);
long i = low - 1;
long j = high + 1;
for (;;) {
do {
i+=1;
} while (array.get(i) < pivot);
do {
j-=1;
}
while (array.get(j) > pivot);
if (i >= j) return j;
else array.swap(i, j);
}
}
static void _quickSortLHN(LongArraySort array, int wordSize, long low, long highInclusive) {
if (low < 0 || highInclusive < 0 || low >= highInclusive)
return;
if (highInclusive - low < 32L*wordSize) {
array.insertionSortN(wordSize, low, highInclusive + wordSize);
if (highInclusive - low < 8) {
SortAlgoInsertionSort._insertionSortN(array, wordSize, low, highInclusive + wordSize);
return;
}
@ -65,8 +39,8 @@ class SortAlgoQuickSort {
if (low < 0 || highInclusive < 0 || low >= highInclusive)
return;
if (highInclusive - low < 32) {
array.insertionSort(low, highInclusive + 1);
if (highInclusive - low < 8) {
SortAlgoInsertionSort._insertionSort(array, low, highInclusive + 1);
return;
}
@ -126,4 +100,30 @@ class SortAlgoQuickSort {
}
}
static long _quickSortPartition2(LongArraySort array, long low, long high) {
long delta = (high - low) / (2L);
long pivotPoint = low + (delta / 2) * 2;
long pivot = array.get(pivotPoint);
long i = low - 2;
long j = high + 2;
for (;;) {
do {
i+=2;
}
while (array.get(i) < pivot);
do {
j-=2;
}
while (array.get(j) > pivot);
if (i >= j) return j;
else array.swap2(i, j);
}
}
}

View File

@ -1,14 +0,0 @@
package nu.marginalia.array.algo;
import java.nio.file.Path;
/**
*
* @param tempDir Directory where MergeSort will allocate temporary buffers
* @param memorySortLimit Breaking point where MergeSort will be preferred over QuickSort. This is specified in
* number of items. So for e.g. long array n=2, 16 bytes x this value is the memory usage
*/
public record SortingContext(
Path tempDir,
int memorySortLimit) {
}

View File

@ -158,23 +158,12 @@ public class TwoArrayOperations {
* Count the number of distinct elements in two sorted arrays.
*/
public static long countDistinctElements(LongArray a, LongArray b, long aStart, long aEnd, long bStart, long bEnd) {
var directRangeA = a.directRangeIfPossible(aStart, aEnd);
var directRangeB = b.directRangeIfPossible(bStart, bEnd);
// Ensure that the arrays are sorted
if (TwoArrayOperations.class.desiredAssertionStatus()) {
assert (a.isSorted(aStart, aEnd));
assert (b.isSorted(bStart, bEnd));
}
a = directRangeA.array();
aStart = directRangeA.start();
aEnd = directRangeA.end();
b = directRangeB.array();
bStart = directRangeB.start();
bEnd = directRangeB.end();
return countDistinctElementsDirect(a, b, aStart, aEnd, bStart, bEnd);
}
@ -188,16 +177,13 @@ public class TwoArrayOperations {
assert (b.isSortedN(stepSize, bStart, bEnd));
}
var directRangeA = a.directRangeIfPossible(aStart, aEnd);
var directRangeB = b.directRangeIfPossible(bStart, bEnd);
return countDistinctElementsDirectN(stepSize,
directRangeA.array(),
directRangeB.array(),
directRangeA.start(),
directRangeA.end(),
directRangeB.start(),
directRangeB.end());
a,
b,
aStart,
aEnd,
bStart,
bEnd);
}
private static long countDistinctElementsDirect(LongArray a, LongArray b, long aStart, long aEnd, long bStart, long bEnd) {

View File

@ -1,112 +0,0 @@
package nu.marginalia.array.buffer;
import java.util.Arrays;
public class IntQueryBuffer {
public final int[] data;
public int end;
private int read = 0;
private int write = 0;
public IntQueryBuffer(int size) {
this.data = new int[size];
this.end = size;
}
public IntQueryBuffer(int [] data, int size) {
this.data = data;
this.end = size;
}
public int[] copyData() {
return Arrays.copyOf(data, end);
}
public boolean isEmpty() {
return end == 0;
}
public int size() {
return end;
}
public int currentValue() {
return data[read];
}
public boolean rejectAndAdvance() {
return ++read < end;
}
public boolean retainAndAdvance() {
if (read != write) {
int tmp = data[write];
data[write] = data[read];
data[read] = tmp;
}
write++;
return ++read < end;
}
public boolean hasMore() {
return read < end;
}
public void finalizeFiltering() {
end = write;
read = 0;
write = 0;
}
public void startFilterForRange(int pos, int end) {
read = write = pos;
this.end = end;
}
public void reset() {
end = data.length;
read = 0;
write = 0;
}
public void zero() {
end = 0;
read = 0;
write = 0;
Arrays.fill(data, 0);
}
public void uniq() {
if (end <= 1) return;
int prev = currentValue();
retainAndAdvance();
while (hasMore()) {
int val = currentValue();
if (prev == val) {
rejectAndAdvance();
} else {
retainAndAdvance();
prev = val;
}
}
finalizeFiltering();
}
public String toString() {
return getClass().getSimpleName() + "[" +
"read = " + read +
",write = " + write +
",end = " + end +
",data = [" + Arrays.toString(Arrays.copyOf(data, end)) + "]]";
}
}

View File

@ -1,212 +0,0 @@
package nu.marginalia.array.delegate;
import nu.marginalia.array.ArrayRangeReference;
import nu.marginalia.array.IntArray;
import nu.marginalia.array.algo.SortingContext;
import nu.marginalia.array.buffer.IntQueryBuffer;
import nu.marginalia.array.functional.*;
import java.io.IOException;
import java.nio.IntBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
public class ShiftedIntArray implements IntArray {
public final long shift;
public final long size;
private final IntArray delegate;
public ShiftedIntArray(long shift, IntArray delegate) {
this.shift = shift;
this.size = delegate.size() - shift;
this.delegate = delegate;
}
public ShiftedIntArray(long start, long end, IntArray delegate) {
this.shift = start;
this.size = end - start;
this.delegate = delegate;
}
@Override
public int get(long pos) {
return delegate.get(pos+shift);
}
@Override
public void set(long pos, int value) {
delegate.set(pos+shift, value);
}
@Override
public void set(long start, long end, IntBuffer buffer, int bufferStart) {
delegate.set(shift + start, shift + end, buffer, bufferStart);
}
@Override
public void get(long start, long end, IntBuffer buffer, int bufferStart) {
delegate.get(shift + start, shift + end, buffer, bufferStart);
}
@Override
public void get(long start, IntBuffer buffer) {
delegate.get(shift + start, buffer);
}
@Override
public void get(long start, long end, int[] buffer) {
delegate.get(shift+start, shift+end, buffer);
}
@Override
public int getAndIncrement(long pos) {
return delegate.getAndIncrement(shift + pos);
}
@Override
public void fill(long start, long end, int val) {
delegate.fill(start + shift, end + shift, val);
}
@Override
public long size() {
return size;
}
@Override
public void write(Path file) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public ShiftedIntArray shifted(long offset) {
return new ShiftedIntArray(shift+offset, delegate);
}
@Override
public ShiftedIntArray range(long start, long end) {
return new ShiftedIntArray(shift + start, shift+end, delegate);
}
public ArrayRangeReference<IntArray> directRangeIfPossible(long start, long end) {
return delegate.directRangeIfPossible(shift + start, shift + end);
}
public int[] toArray() {
int[] ret = new int[(int) size];
for (int i = 0; i < size; i++) {
ret[i] = delegate.get(shift + i);
}
return ret;
}
public boolean isSorted() {
return isSorted(0, size);
}
public boolean isSorted(long start, long end) {
return delegate.isSorted(shift + start, shift + end);
}
public void sortLargeSpan(SortingContext ctx, long start, long end) throws IOException {
delegate.sortLargeSpan(ctx, start, end);
}
public long search(int key) {
if (size < 128) {
return linearSearch(key);
}
else {
return binarySearch(key);
}
}
public long linearSearch(int key) {
return linearSearch(key, 0, size);
}
public long binarySearch(int key) {
return binarySearch(key, 0, size);
}
public long binarySearchUpperbound(int key) {
return binarySearchUpperBound(key, 0, size);
}
public void retain(IntQueryBuffer buffer, long boundary) {
retain(buffer, boundary, 0, size);
}
public void reject(IntQueryBuffer buffer, long boundary) {
reject(buffer, boundary, 0, size);
}
@Override
public long linearSearch(int key, long fromIndex, long toIndex) {
return translateSearchResult(delegate.linearSearch(key, fromIndex + shift, toIndex+shift));
}
@Override
public long binarySearch(int key, long fromIndex, long toIndex) {
return translateSearchResult(delegate.binarySearch(key, fromIndex + shift, toIndex+shift));
}
@Override
public long binarySearchUpperBound(int key, long fromIndex, long toIndex) {
return translateSearchResult(delegate.binarySearchUpperBound(key, fromIndex + shift, toIndex+shift));
}
private long translateSearchResult(long ret) {
if (ret > 0) return ret - shift;
return ret + shift;
}
@Override
public void retain(IntQueryBuffer buffer, long boundary, long searchStart, long searchEnd) {
delegate.retain(buffer, boundary, searchStart + shift, searchEnd + shift);
}
@Override
public void reject(IntQueryBuffer buffer, long boundary, long searchStart, long searchEnd) {
delegate.reject(buffer, boundary, searchStart + shift, searchEnd + shift);
}
@Override
public void forEach(long start, long end, LongIntConsumer consumer) {
delegate.forEach(start + shift, end+shift, (pos, old) -> consumer.accept(pos-shift, old));
}
@Override
public void transformEach(long start, long end, IntTransformer transformer) {
delegate.transformEach(start + shift, end+shift, (pos, old) -> transformer.transform(pos-shift, old));
}
@Override
public void transformEachIO(long start, long end, IntIOTransformer transformer) throws IOException {
delegate.transformEachIO(start + shift, end+shift, (pos, old) -> transformer.transform(pos-shift, old));
}
@Override
public int foldIO(int zero, long start, long end, IntBinaryIOOperation operator) throws IOException {
return delegate.foldIO(zero, start + shift, end+shift, operator);
}
@Override
public int fold(int zero, long start, long end, IntBinaryOperation operator){
return delegate.fold(zero, start + shift, end+shift, operator);
}
@Override
public void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException {
delegate.transferFrom(source, sourceStart, shift + arrayStart, shift + arrayEnd);
}
@Override
public void force() {
delegate.force();
}
}

View File

@ -1,212 +0,0 @@
package nu.marginalia.array.delegate;
import nu.marginalia.array.ArrayRangeReference;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.algo.LongArraySearch;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.functional.*;
import java.io.IOException;
import java.nio.LongBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
public class ShiftedLongArray implements LongArray {
public final long shift;
public final long size;
private final LongArray delegate;
public ShiftedLongArray(long shift, LongArray delegate) {
this.shift = shift;
this.size = delegate.size() - shift;
this.delegate = delegate;
}
public ShiftedLongArray(long start, long end, LongArray delegate) {
this.shift = start;
this.size = end - start;
this.delegate = delegate;
}
@Override
public long get(long pos) {
return delegate.get(pos+shift);
}
@Override
public void set(long pos, long value) {
delegate.set(pos+shift, value);
}
@Override
public void set(long start, long end, LongBuffer buffer, int bufferStart) {
delegate.set(shift + start, shift + end, buffer, bufferStart);
}
@Override
public void get(long start, long end, LongBuffer buffer, int bufferStart) {
delegate.get(shift + start, shift + end, buffer, bufferStart);
}
@Override
public void close() {
delegate.close();
}
@Override
public void get(long start, LongBuffer buffer) {
delegate.get(shift + start, buffer);
}
@Override
public void get(long start, long end, long[] buffer) {
delegate.get(shift+start, shift+end, buffer);
}
@Override
public long getAndIncrement(long pos) {
return delegate.getAndIncrement(shift + pos);
}
@Override
public void fill(long start, long end, long val) {
delegate.fill(start + shift, end + shift, val);
}
@Override
public void quickSortNative(long start, long end) {
delegate.quickSortNative(start + shift, end + shift);
}
@Override
public void quickSortNative128(long start, long end) {
delegate.quickSortNative128(start, end);
}
@Override
public long size() {
return size;
}
@Override
public void write(Path file) throws IOException {
throw new UnsupportedOperationException();
}
@Override
public ShiftedLongArray shifted(long offset) {
return new ShiftedLongArray(shift+offset, delegate);
}
@Override
public ShiftedLongArray range(long start, long end) {
return new ShiftedLongArray(shift + start, shift+end, delegate);
}
public long[] toArray() {
long[] ret = new long[(int) size];
for (int i = 0; i < size; i++) {
ret[i] = delegate.get(shift + i);
}
return ret;
}
public boolean isSorted() {
return isSorted(0, size);
}
public boolean isSortedN(int sz) {
return isSortedN(sz, 0, size);
}
public boolean isSorted(long start, long end) {
return delegate.isSorted(shift + start, shift + end);
}
public void quickSort(long start, long end) {
delegate.quickSort(shift + start, shift + end);
}
public void quickSortN(int sz, long start, long end) {
delegate.quickSortN(sz, shift + start, shift + end);
}
public ArrayRangeReference<LongArray> directRangeIfPossible(long start, long end) {
return delegate.directRangeIfPossible(shift + start, shift + end);
}
public boolean isSortedN(int sz, long start, long end) {
return delegate.isSortedN(sz, shift + start, shift + end);
}
public long search(long key) {
return binarySearch(key, 0, size);
}
public void retain(LongQueryBuffer buffer, long boundary) {
retain(buffer, boundary, 0, size);
}
public void reject(LongQueryBuffer buffer, long boundary) {
reject(buffer, boundary, 0, size);
}
@Override
public long binarySearchN(int sz, long key, long fromIndex, long toIndex) {
return delegate.binarySearchN(sz, key, fromIndex + shift, toIndex+shift) - shift;
}
@Override
public long binarySearch(long key, long fromIndex, long toIndex) {
return delegate.binarySearch(key, fromIndex + shift, toIndex+shift) - shift;
}
public void retain(LongQueryBuffer buffer, long boundary, long searchStart, long searchEnd) {
delegate.retain(buffer, boundary, searchStart + shift, searchEnd + shift);
}
public void retainN(LongQueryBuffer buffer, int sz, long boundary, long searchStart, long searchEnd) {
delegate.retainN(buffer, sz, boundary, searchStart + shift, searchEnd + shift);
}
public void reject(LongQueryBuffer buffer, long boundary, long searchStart, long searchEnd) {
delegate.reject(buffer, boundary, searchStart + shift, searchEnd + shift);
}
public void rejectN(LongQueryBuffer buffer, int sz, long boundary, long searchStart, long searchEnd) {
delegate.rejectN(buffer, sz, boundary, searchStart + shift, searchEnd + shift);
}
@Override
public void forEach(long start, long end, LongLongConsumer consumer) {
delegate.forEach(start + shift, end+shift, (pos, old) -> consumer.accept(pos-shift, old));
}
@Override
public void transformEach(long start, long end, LongTransformer transformer) {
delegate.transformEach(start + shift, end+shift, (pos, old) -> transformer.transform(pos-shift, old));
}
@Override
public void transformEachIO(long start, long end, LongIOTransformer transformer) throws IOException {
delegate.transformEachIO(start + shift, end+shift, (pos, old) -> transformer.transform(pos-shift, old));
}
@Override
public long foldIO(long zero, long start, long end, LongBinaryIOOperation operator) throws IOException {
return delegate.foldIO(zero, start + shift, end+shift, operator);
}
@Override
public long fold(long zero, long start, long end, LongBinaryOperation operator) {
return delegate.fold(zero, start + shift, end+shift, operator);
}
@Override
public void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException {
delegate.transferFrom(source, sourceStart, shift + arrayStart, shift + arrayEnd);
}
@Override
public void force() {
delegate.force();
}
}

View File

@ -1,5 +0,0 @@
package nu.marginalia.array.functional;
public interface AddressRangeCall<T> {
void apply(T array, int start, int end);
}

View File

@ -1,7 +0,0 @@
package nu.marginalia.array.functional;
import java.io.IOException;
public interface AddressRangeCallIO<T> {
void apply(T array, int start, int end) throws IOException;
}

View File

@ -1,5 +0,0 @@
package nu.marginalia.array.functional;
public interface AddressRangeIntFunction<T> {
int apply(T array, int start, int end);
}

View File

@ -1,5 +0,0 @@
package nu.marginalia.array.functional;
public interface AddressRangeLongFunction<T> {
long apply(T array, int start, int end);
}

View File

@ -1,7 +0,0 @@
package nu.marginalia.array.functional;
import java.io.IOException;
public interface IntBinaryIOOperation {
int apply(int left, int right) throws IOException;
}

View File

@ -1,5 +0,0 @@
package nu.marginalia.array.functional;
public interface IntBinaryOperation {
int apply(int left, int right);
}

View File

@ -1,7 +0,0 @@
package nu.marginalia.array.functional;
import java.io.IOException;
public interface IntIOTransformer {
int transform(long pos, int old) throws IOException;
}

View File

@ -1,5 +0,0 @@
package nu.marginalia.array.functional;
public interface IntTransformer {
int transform(long pos, int old);
}

View File

@ -1,7 +0,0 @@
package nu.marginalia.array.functional;
import java.io.IOException;
public interface LongBinaryIOOperation {
long apply(long left, long right) throws IOException;
}

View File

@ -1,5 +0,0 @@
package nu.marginalia.array.functional;
public interface LongBinaryOperation {
long apply(long left, long right);
}

View File

@ -1,7 +0,0 @@
package nu.marginalia.array.functional;
import java.io.IOException;
public interface LongIOTransformer {
long transform(long pos, long old) throws IOException;
}

View File

@ -1,5 +0,0 @@
package nu.marginalia.array.functional;
public interface LongIntConsumer {
void accept(long pos, int val);
}

View File

@ -1,5 +0,0 @@
package nu.marginalia.array.functional;
public interface LongLongConsumer {
void accept(long pos, long val);
}

View File

@ -1,5 +0,0 @@
package nu.marginalia.array.functional;
public interface LongTransformer {
long transform(long pos, long old);
}

View File

@ -1,4 +1,4 @@
package nu.marginalia.array.buffer;
package nu.marginalia.array.page;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.LongArrayFactory;

View File

@ -1,22 +0,0 @@
package nu.marginalia.array.page;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
public interface PartitionPage {
default void write(FileChannel channel) throws IOException {
var byteBuffer = getByteBuffer();
byteBuffer.clear();
while (byteBuffer.position() < byteBuffer.limit()) {
channel.write(byteBuffer);
}
byteBuffer.clear();
}
ByteBuffer getByteBuffer();
}

View File

@ -1,176 +0,0 @@
package nu.marginalia.array.page;
import nu.marginalia.array.ArrayRangeReference;
import nu.marginalia.array.IntArray;
import javax.annotation.Nullable;
import java.io.IOException;
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.nio.ByteBuffer;
import java.nio.IntBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import static java.lang.foreign.ValueLayout.JAVA_INT;
public class SegmentIntArray implements PartitionPage, IntArray {
@Nullable
private final Arena arena;
private final MemorySegment segment;
private boolean closed;
SegmentIntArray(MemorySegment segment,
@Nullable Arena arena) {
this.segment = segment;
this.arena = arena;
}
public static SegmentIntArray onHeap(Arena arena, long size) {
return new SegmentIntArray(arena.allocate(WORD_SIZE*size, 8), arena);
}
public static SegmentIntArray fromMmapReadOnly(Arena arena, Path file, long offset, long size) throws IOException {
return new SegmentIntArray(
mmapFile(arena, file, offset, size, FileChannel.MapMode.READ_ONLY, StandardOpenOption.READ),
arena);
}
public static SegmentIntArray fromMmapReadWrite(Arena arena, Path file, long offset, long size) throws IOException {
return new SegmentIntArray(
mmapFile(arena, file, offset, size, FileChannel.MapMode.READ_WRITE,
StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE),
arena);
}
private static MemorySegment mmapFile(Arena arena,
Path file,
long offset,
long size,
FileChannel.MapMode mode,
OpenOption... openOptions) throws IOException
{
try (var channel = (FileChannel) Files.newByteChannel(file, openOptions)) {
return channel.map(mode,
JAVA_INT.byteSize() * offset,
JAVA_INT.byteSize() * size,
arena);
}
catch (IOException ex) {
throw new IOException("Failed to map file " + file + " (" + offset + ":" + size + ")", ex);
}
}
@Override
public IntArray range(long start, long end) {
return new SegmentIntArray(
segment.asSlice(
start * JAVA_INT.byteSize(),
(end-start) * JAVA_INT.byteSize()),
null);
}
@Override
public IntArray shifted(long start) {
return new SegmentIntArray(
segment.asSlice(start * JAVA_INT.byteSize()),
null);
}
@Override
public int get(long at) {
try {
return segment.getAtIndex(JAVA_INT, at);
}
catch (IndexOutOfBoundsException ex) {
throw new IndexOutOfBoundsException("@" + at + "(" + 0 + ":" + segment.byteSize()/8 + ")");
}
}
@Override
public void get(long start, long end, int[] buffer) {
for (int i = 0; i < end - start; i++) {
buffer[i] = segment.getAtIndex(JAVA_INT, start + i);
}
}
@Override
public void set(long at, int val) {
segment.setAtIndex(JAVA_INT, at, val);
}
@Override
public void set(long start, long end, IntBuffer buffer, int bufferStart) {
for (int i = 0; i < end - start; i++) {
set(start + i, buffer.get(bufferStart + i));
}
}
@Override
public synchronized void close() {
if (arena != null && !closed) {
arena.close();
}
closed = true;
}
@Override
public long size() {
return segment.byteSize() / JAVA_INT.byteSize();
}
@Override
public ByteBuffer getByteBuffer() {
return segment.asByteBuffer();
}
@Override
public void write(Path filename) throws IOException {
try (var arena = Arena.ofConfined()) {
var destSegment = SegmentIntArray.fromMmapReadWrite(arena, filename, 0, segment.byteSize());
destSegment.segment.copyFrom(segment);
destSegment.force();
}
}
@Override
public void force() {
if (segment.isMapped()) {
segment.force();
}
}
public ArrayRangeReference<IntArray> directRangeIfPossible(long start, long end) {
return new ArrayRangeReference<>(this, start, end);
}
@Override
public void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException {
final int stride = 1024*1204*128; // Copy 1 GB at a time 'cause byte buffers are 'a byte buffering
long ss = sourceStart;
for (long as = arrayStart; as < arrayEnd; as += stride, ss += stride) {
long ae = Math.min(as + stride, arrayEnd);
long index = as * JAVA_INT.byteSize();
long length = (ae - as) * JAVA_INT.byteSize();
var bufferSlice = segment.asSlice(index, length).asByteBuffer();
long startPos = ss * JAVA_INT.byteSize();
while (bufferSlice.position() < bufferSlice.capacity()) {
source.read(bufferSlice, startPos + bufferSlice.position());
}
}
}
}

View File

@ -1,14 +1,11 @@
package nu.marginalia.array.page;
import nu.marginalia.NativeAlgos;
import nu.marginalia.array.ArrayRangeReference;
import nu.marginalia.array.LongArray;
import javax.annotation.Nullable;
import java.io.IOException;
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.nio.ByteBuffer;
import java.nio.LongBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
@ -18,7 +15,8 @@ import java.nio.file.StandardOpenOption;
import static java.lang.foreign.ValueLayout.JAVA_LONG;
public class SegmentLongArray implements PartitionPage, LongArray {
@SuppressWarnings("preview")
public class SegmentLongArray implements LongArray {
@Nullable
private final Arena arena;
@ -126,20 +124,6 @@ public class SegmentLongArray implements PartitionPage, LongArray {
return segment.byteSize() / JAVA_LONG.byteSize();
}
@Override
public void quickSortNative(long start, long end) {
NativeAlgos.sort(segment, start, end);
}
@Override
public void quickSortNative128(long start, long end) {
NativeAlgos.sort128(segment, start, end);
}
@Override
public ByteBuffer getByteBuffer() {
return segment.asByteBuffer();
}
@Override
public void write(Path filename) throws IOException {
try (var arena = Arena.ofConfined()) {
@ -158,10 +142,6 @@ public class SegmentLongArray implements PartitionPage, LongArray {
}
public ArrayRangeReference<LongArray> directRangeIfPossible(long start, long end) {
return new ArrayRangeReference<>(this, start, end);
}
@Override
public void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException {
@ -188,4 +168,8 @@ public class SegmentLongArray implements PartitionPage, LongArray {
}
@Override
public MemorySegment getMemorySegment() {
return segment;
}
}

View File

@ -1,7 +1,5 @@
package nu.marginalia.array.page;
import nu.marginalia.NativeAlgos;
import nu.marginalia.array.ArrayRangeReference;
import nu.marginalia.array.LongArray;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -12,7 +10,6 @@ import javax.annotation.Nullable;
import java.io.IOException;
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.nio.ByteBuffer;
import java.nio.LongBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
@ -23,7 +20,9 @@ import static java.lang.foreign.ValueLayout.JAVA_LONG;
/** Variant of SegmentLongArray that uses Unsafe to access the memory.
* */
public class UnsafeLongArray implements PartitionPage, LongArray {
@SuppressWarnings("preview")
public class UnsafeLongArray implements LongArray {
private static final Unsafe unsafe = UnsafeProvider.getUnsafe();
private static final Logger logger = LoggerFactory.getLogger(UnsafeLongArray.class);
@ -114,6 +113,11 @@ public class UnsafeLongArray implements PartitionPage, LongArray {
unsafe.putLong(segment.address() + at * JAVA_LONG.byteSize(), val);
}
@Override
public MemorySegment getMemorySegment() {
return segment;
}
@Override
public void set(long start, long end, LongBuffer buffer, int bufferStart) {
for (int i = 0; i < end - start; i++) {
@ -143,11 +147,6 @@ public class UnsafeLongArray implements PartitionPage, LongArray {
return segment.byteSize() / JAVA_LONG.byteSize();
}
@Override
public ByteBuffer getByteBuffer() {
return segment.asByteBuffer();
}
@Override
public void write(Path filename) throws IOException {
try (var arena = Arena.ofConfined()) {
@ -172,10 +171,6 @@ public class UnsafeLongArray implements PartitionPage, LongArray {
}
}
public ArrayRangeReference<LongArray> directRangeIfPossible(long start, long end) {
return new ArrayRangeReference<>(this, start, end);
}
public void chanelChannelTransfer(FileChannel source,
long sourceStartL,
long arrayStartL,
@ -274,14 +269,4 @@ public class UnsafeLongArray implements PartitionPage, LongArray {
}
}
@Override
public void quickSortNative(long start, long end) {
NativeAlgos.sort(segment, start, end);
}
@Override
public void quickSortNative128(long start, long end) {
NativeAlgos.sort128(segment, start, end);
}
}

View File

@ -1,51 +0,0 @@
package nu.marginalia.array.scheme;
public interface ArrayPartitioningScheme {
static ArrayPartitioningScheme forPartitionSize(int size) {
if (Integer.highestOneBit(size) == size) {
return new PowerOf2PartitioningScheme(size);
}
else {
return new SequentialPartitioningScheme(size);
}
}
static int getRequiredPartitions(long cardinality, int partitionSize) {
return (int) (cardinality / partitionSize + Long.signum(cardinality % partitionSize));
}
int getPartitions(long cardinality);
int getPage(long at);
boolean isSamePage(long a, long b);
/** Get the page offset corresponding to at */
int getOffset(long at);
/** Variant of getOffset that doesn't wrap around the page boundary, necessary when
* translating an exclusive end offset that getOffset(...) will translate to 0 and consider
* part of the next page.
*
* It is also necessary to consider the start offset to determine when the end offset
*
*/
default int getEndOffset(long start, long end) {
if (end == 0 || end <= start)
return getOffset(end);
return 1 + getOffset(end - 1);
}
/** Get the end of the buffer containing at, or endTotal, whichever is smaller
*/
long getPageEnd(long at, long endTotal);
/**
* toRealIndex(getBuffer(val), getOffset(val)) = val
*/
long toRealIndex(int buffer, int offset);
int getRequiredPageSize(int buffer, long cardinality);
}

View File

@ -1,60 +0,0 @@
package nu.marginalia.array.scheme;
public class PowerOf2PartitioningScheme implements ArrayPartitioningScheme {
final int partitionSize;
final long offsetMask;
final long bufferMask;
final int pageShift;
public PowerOf2PartitioningScheme(int partitionSize) {
assert partitionSize == Integer.highestOneBit(partitionSize);
this.partitionSize = partitionSize;
offsetMask = partitionSize - 1;
bufferMask = ~offsetMask;
pageShift = Integer.numberOfTrailingZeros(partitionSize);
}
@Override
public int getPartitions(long cardinality) {
return ArrayPartitioningScheme.getRequiredPartitions(cardinality, partitionSize);
}
@Override
public int getPage(long at) { // very hot code
return (int) (at >>> pageShift);
}
@Override
public int getOffset(long at) { // very hot code
return (int) (at & offsetMask);
}
@Override
public boolean isSamePage(long a, long b) { // hot code
return 0 == ((a ^ b) & bufferMask);
}
@Override
public long getPageEnd(long at, long endTotal) {
return Math.min(endTotal, partitionSize * (1L + getPage(at)));
}
@Override
public long toRealIndex(int buffer, int offset) {
return offset + (long) buffer * partitionSize;
}
@Override
public int getRequiredPageSize(int buffer, long cardinality) {
if ((long) (1 + buffer) * partitionSize <= cardinality) {
return partitionSize;
}
return (int) (cardinality % partitionSize);
}
}

View File

@ -1,56 +0,0 @@
package nu.marginalia.array.scheme;
public class SequentialPartitioningScheme implements ArrayPartitioningScheme {
final int partitionSize;
public SequentialPartitioningScheme(int partitionSize) {
this.partitionSize = partitionSize;
}
public static int getRequiredPartitions(long cardinality, int partitionSize) {
return (int) (cardinality / partitionSize + Long.signum(cardinality % partitionSize));
}
@Override
public int getPartitions(long cardinality) {
return getRequiredPartitions(cardinality, partitionSize);
}
@Override
public int getPage(long at) {
return (int) (at / partitionSize);
}
public long getPageEnd(long at, long endTotal) {
return Math.min(endTotal, partitionSize * (1L + getPage(at)));
}
@Override
public boolean isSamePage(long a, long b) {
return (int) (a / partitionSize) == (int)(b/partitionSize);
}
@Override
public int getOffset(long at) {
return (int) (at % partitionSize);
}
public long toRealIndex(int buffer, int offset) {
return offset + (long) buffer * partitionSize;
}
@Override
public int getRequiredPageSize(int buffer, long cardinality) {
if ((long) (1 + buffer) * partitionSize <= cardinality) {
return partitionSize;
}
return (int) (cardinality % partitionSize);
}
}

View File

@ -36,11 +36,9 @@ try (var array = LongArrayFactory.mmapForWritingConfined(Path.of("/tmp/test"), 1
## Query Buffers
The classes [IntQueryBuffer](java/nu/marginalia/array/buffer/IntQueryBuffer.java)
and [LongQueryBuffer](java/nu/marginalia/array/buffer/LongQueryBuffer.java) are used
heavily in the search engine's query processing.
The class and [LongQueryBuffer](java/nu/marginalia/array/buffer/LongQueryBuffer.java) is used heavily in the search engine's query processing.
They are dual-pointer buffers that offer tools for filtering data.
It is a dual-pointer buffer that offers tools for filtering data.
```java
LongQueryBuffer buffer = new LongQueryBuffer(1000);
@ -78,8 +76,7 @@ buffer.finalizeFiltering();
```
Especially noteworthy are the operations `retain()` and `reject()` in
[IntArraySearch](java/nu/marginalia/array/algo/IntArraySearch.java) and [LongArraySearch](java/nu/marginalia/array/algo/LongArraySearch.java).
Especially noteworthy are the operations `retain()` and `reject()` in [LongArraySearch](java/nu/marginalia/array/algo/LongArraySearch.java).
They keep or remove all items in the buffer that exist in the referenced range of the array,
which must be sorted.

View File

@ -10,14 +10,14 @@ import java.util.Random;
public class SearchBenchmark {
@State(Scope.Benchmark)
public static class SortState {
public static class SearchState {
public SortState()
public SearchState()
{
msArray.transformEach(0, size, (pos,old) -> ~pos);
usArray.transformEach(0, size, (pos,old) -> ~pos);
msArray.quickSortJava(0, size);
usArray.quickSortJava(0, size);
msArray.sort(0, size);
usArray.sort(0, size);
keys = new long[1000];
Random r = new Random();
for (int i = 0; i < 1000; i++) {
@ -36,27 +36,12 @@ public class SearchBenchmark {
@Warmup(iterations = 5)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public long msSort64(SortState state) {
public long msSearch64(SearchState state) {
var array = state.usArray;
long ret = 0;
for (var key : state.keys) {
ret += array.binarySearchNJava(2, key, 0, array.size());
}
return ret;
}
@Fork(value = 3, warmups = 5)
@Warmup(iterations = 5)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public long msSort64_2(SortState state) {
var array = state.usArray;
long ret = 0;
for (var key : state.keys) {
ret += array.binarySearchNJava2(2, key, 0, array.size());
ret += array.binarySearch(key, 0, array.size());
}
return ret;
@ -66,12 +51,12 @@ public class SearchBenchmark {
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public long msSort128(SortState state) {
public long msSearch128(SearchState state) {
var array = state.msArray;
long ret = 0;
for (var key : state.keys) {
ret += array.binarySearchNJava(2, 0, array.size(), key);
ret += array.binarySearchN(2, 0, array.size(), key);
}
return ret;
@ -81,12 +66,12 @@ public class SearchBenchmark {
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public long usSort64(SortState state) {
public long usSearch64(SearchState state) {
var array = state.usArray;
long ret = 0;
for (var key : state.keys) {
ret += array.binarySearchUpperBoundJava(0, array.size(), key);
ret += array.binarySearch(0, array.size(), key);
}
return ret;
@ -96,12 +81,12 @@ public class SearchBenchmark {
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public long usSort128(SortState state) {
public long usSearch128(SearchState state) {
var array = state.usArray;
long ret = 0;
for (var key : state.keys) {
ret += array.binarySearchNJava(2, 0, array.size(), key);
ret += array.binarySearchN(2, 0, array.size(), key);
}
return ret;

View File

@ -1,6 +1,8 @@
package nu.marginalia.array.page;
import nu.marginalia.NativeAlgos;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.algo.LongArraySort;
import org.openjdk.jmh.annotations.*;
import java.lang.foreign.Arena;
@ -30,31 +32,7 @@ public class SortBenchmark {
public LongArray msSort64(BenchState state) {
var array = state.msArray;
array.quickSortJavaN(2, 0, array.size());
return array;
}
@Fork(value = 5, warmups = 5)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public LongArray msSort128(BenchState state) {
var array = state.msArray;
array.quickSortJavaN(2, 0, array.size());
return array;
}
@Fork(value = 5, warmups = 5)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public LongArray usSort128(BenchState state) {
var array = state.usArray;
array.quickSortJavaN(2, 0, array.size());
LongArraySort.quickSortJava(array, 0, array.size());
return array;
}
@ -66,7 +44,55 @@ public class SortBenchmark {
public LongArray usSort64(BenchState state) {
var array = state.usArray;
array.quickSortJavaN(2, 0, array.size());
LongArraySort.quickSortJava(array,0, array.size());
return array;
}
@Fork(value = 5, warmups = 5)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public LongArray msSort128(BenchState state) {
var array = state.msArray;
LongArraySort.quickSortJavaN(array,2, 0, array.size());
return array;
}
@Fork(value = 5, warmups = 5)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public LongArray usSort128(BenchState state) {
var array = state.usArray;
LongArraySort.quickSortJavaN(array,2, 0, array.size());
return array;
}
@Fork(value = 5, warmups = 5)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public LongArray msSort128_2(BenchState state) {
var array = state.msArray;
LongArraySort.quickSortJava2(array, 0, array.size());
return array;
}
@Fork(value = 5, warmups = 5)
@Warmup(iterations = 1)
@Benchmark
@BenchmarkMode(Mode.Throughput)
public LongArray usSort128_2(BenchState state) {
var array = state.usArray;
LongArraySort.quickSortJava2(array,0, array.size());
return array;
}
@ -82,7 +108,7 @@ public class SortBenchmark {
var array = state.usArray; // realistically doesn't matter
array.quickSortNative128(0, array.size());
NativeAlgos.sort128(array.getMemorySegment(), 0, array.size());
return array;
}
@ -95,7 +121,7 @@ public class SortBenchmark {
var array = state.usArray; // realistically doesn't matter
array.quickSortNative(0, array.size());
NativeAlgos.sort(array.getMemorySegment(), 0, array.size());
return array;
}

View File

@ -1,21 +0,0 @@
package nu.marginalia.array;
import nu.marginalia.array.scheme.SequentialPartitioningScheme;
import org.junit.jupiter.api.Test;
class IntLowBitPartitioningSchemeTest {
@Test
public void testLBPT() {
var p = new SequentialPartitioningScheme(18);
System.out.println(p.getRequiredPageSize(0, 51));
System.out.println(p.getRequiredPageSize(1, 51));
System.out.println(p.getRequiredPageSize(2, 51));
System.out.println(p.getRequiredPageSize(3, 51));
for (int i = 0; i < 100; i++) {
System.out.println(p.getPage(i) + ":" + p.getOffset(i));
}
}
}

View File

@ -1,132 +0,0 @@
package nu.marginalia.array.algo;
import nu.marginalia.array.IntArray;
import nu.marginalia.array.buffer.IntQueryBuffer;
import nu.marginalia.array.page.SegmentIntArray;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.lang.foreign.Arena;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
class IntArraySearchTest {
IntArray basicArray = IntArray.allocate(1024);
IntArray pagingArray = SegmentIntArray.onHeap(Arena.global(), 1024);
IntArray shiftedArray = IntArray.allocate(1054).range(30, 1054);
@BeforeEach
public void setUp() {
for (int i = 0; i < basicArray.size(); i++) {
basicArray.set(i, 3*i);
pagingArray.set(i, 3*i);
shiftedArray.set(i, 3*i);
}
}
@Test
void linearSearch() {
linearSearchTester(basicArray);
linearSearchTester(pagingArray);
linearSearchTester(shiftedArray);
}
@Test
void binarySearch() {
binarySearchTester(basicArray);
binarySearchTester(pagingArray);
binarySearchTester(shiftedArray);
}
@Test
void binarySearchUpperbound() {
binarySearchUpperBoundTester(basicArray);
binarySearchUpperBoundTester(pagingArray);
binarySearchUpperBoundTester(shiftedArray);
}
void linearSearchTester(IntArray array) {
for (int i = 0; i < array.size() * 3; i++) {
long ret = array.linearSearch(i, 0, array.size());
if ((i % 3) == 0) {
assertTrue(ret >= 0);
assertEquals(i, array.get(ret));
}
else {
long higher = LongArraySearch.decodeSearchMiss(1, ret);
if (i > 0 && higher < array.size()) {
assertTrue(array.get(higher) < i);
}
}
}
}
void binarySearchTester(IntArray array) {
for (int i = 0; i < array.size() * 3; i++) {
long ret = array.binarySearch(i, 0, array.size());
if ((i % 3) == 0) {
assertTrue(ret >= 0);
assertEquals(i, array.get(ret));
}
else {
long higher = LongArraySearch.decodeSearchMiss(1, ret);
if (i > 0 && higher+1 < array.size()) {
assertTrue(array.get(higher) < i);
}
}
}
}
void binarySearchUpperBoundTester(IntArray array) {
for (int i = 0; i < array.size() * 3; i++) {
long ret = array.binarySearchUpperBound(i, 0, array.size());
if ((i % 3) == 0) {
assertTrue(ret >= 0);
assertEquals(i, array.get(ret));
}
else {
if (i > 0 && ret > 0 && ret < array.size()) {
assertTrue(array.get(ret-1) < i);
}
}
}
}
@Test
void retain() {
int[] vals = new int[128];
for (int i = 0; i < vals.length; i++) { vals[i] = i; }
var buffer = new IntQueryBuffer(vals, 128);
basicArray.retain(buffer, 128, 0, basicArray.size());
buffer.finalizeFiltering();
assertEquals(43, buffer.size());
for (int i = 0; i < 43; i++) {
assertEquals(buffer.data[i], i*3);
}
}
@Test
void reject() {
int[] vals = new int[128];
for (int i = 0; i < vals.length; i++) { vals[i] = i; }
var buffer = new IntQueryBuffer(vals, 128);
basicArray.reject(buffer, 128, 0, basicArray.size());
buffer.finalizeFiltering();
assertEquals(128-43, buffer.size());
int j = 0;
for (int i = 0; i < 43; i++) {
if (++j % 3 == 0) j++;
assertEquals(buffer.data[i], j);
}
}
}

View File

@ -1,149 +0,0 @@
package nu.marginalia.array.algo;
import nu.marginalia.array.IntArray;
import nu.marginalia.util.test.TestUtil;
import org.apache.commons.lang3.ArrayUtils;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Random;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
@Tag("slow")
class IntArraySortTest {
IntArray basic;
IntArray paged;
IntArray shifted;
final int size = 1026;
@BeforeEach
public void setUp() {
basic = IntArray.allocate(size);
paged = IntArray.allocate(size);
shifted = IntArray.allocate(size+30).shifted(30);
var random = new Random();
int[] values = new int[size];
for (int i = 0; i < size; i++) {
values[i] = random.nextInt(0, 1000);
}
basic.transformEach(0, size, (i, old) -> values[(int) i]);
paged.transformEach(0, size, (i, old) -> values[(int) i]);
shifted.transformEach(0, size, (i, old) -> values[(int) i]);
}
interface SortOperation {
void sort(IntArray array, long start, long end) throws IOException;
}
@Test
public void quickSortStressTest() throws IOException {
IntArray array = IntArray.allocate(65536);
sortAlgorithmTester(array, IntArraySort::quickSort);
}
@Test
public void insertionSortStressTest() throws IOException {
IntArray array = IntArray.allocate(8192);
sortAlgorithmTester(array, IntArraySort::insertionSort);
}
@Test
public void mergeSortStressTest() throws IOException {
IntArray array = IntArray.allocate(65536);
Path tempDir = Files.createTempDirectory(getClass().getSimpleName());
sortAlgorithmTester(array, (a, s, e) -> a.mergeSort(s, e, tempDir));
TestUtil.clearTempDir(tempDir);
}
void sortAlgorithmTester(IntArray array, SortOperation operation) throws IOException {
int[] values = new int[(int) array.size()];
for (int i = 0; i < values.length; i++) {
values[i] = i;
}
ArrayUtils.shuffle(values);
int sentinelA = 0xFEEDBEEF;
int sentinelB = 0xB000B000;
int start = 6;
for (int end = start + 1; end < values.length - 1; end+=97) {
// Use sentinel values to catch if the sort algorithm overwrites end values
array.set(start - 1, sentinelA);
array.set(end, sentinelB);
long orderInvariantChecksum = 0;
for (long i = 0; i < end - start; i++) {
array.set(start + i, values[start + (int)i]);
// Try to checksum the contents to catch bugs where the result is sorted
// but a value has been duplicated, overwriting another
orderInvariantChecksum ^= values[start + (int)i];
}
operation.sort(array, start, end);
assertTrue(array.isSorted(start, end), "Array wasn't sorted");
assertEquals(sentinelA, array.get(start - 1), "Start position sentinel overwritten");
assertEquals(sentinelB, array.get(end), "End position sentinel overwritten");
long actualChecksum = 0;
for (long i = start; i < end; i++) {
actualChecksum ^= array.get(i);
}
assertEquals(orderInvariantChecksum, actualChecksum, "Checksum validation failed");
}
}
@Test
void insertionSort() {
basic.insertionSort(0, size);
assertTrue(basic.isSorted(0, 128));
paged.insertionSort(0, size);
assertTrue(paged.isSorted(0, 128));
shifted.insertionSort(0, size);
assertTrue(shifted.isSorted(0, 128));
}
@Test
void quickSort() {
basic.quickSort(0, size);
assertTrue(basic.isSorted(0, size));
paged.quickSort(0, size);
assertTrue(paged.isSorted(0, size));
shifted.quickSort(0, size);
assertTrue(shifted.isSorted(0, 128));
}
@Test
void mergeSort() throws IOException {
basic.mergeSort(0, size, Path.of("/tmp"));
assertTrue(basic.isSorted(0, size));
paged.mergeSort(0, size, Path.of("/tmp"));
assertTrue(paged.isSorted(0, size));
shifted.mergeSort(0, size, Path.of("/tmp"));
assertTrue(shifted.isSorted(0, 128));
}
}

View File

@ -1,94 +0,0 @@
package nu.marginalia.array.algo;
import nu.marginalia.array.IntArray;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.ArrayList;
import static org.junit.jupiter.api.Assertions.assertEquals;
class IntArrayTransformations2Test {
IntArray basic;
IntArray paged;
IntArray shifted;
final int size = 1026;
@BeforeEach
public void setUp() {
basic = IntArray.allocate(size);
paged = IntArray.allocate(size);
shifted = IntArray.allocate(size+30).shifted(30);
int[] vals = new int[size];
for (int i = 0; i < vals.length; i++) {
vals[i] = i+10;
}
basic.set(0, vals);
paged.set(0, vals);
shifted.set(0, vals);
}
@Test
void forEach() {
foreachTester(basic);
foreachTester(paged);
foreachTester(shifted);
}
@Test
void transformEach() {
transformTester(basic);
transformTester(paged);
transformTester(shifted);
}
@Test
void transformEachIO() throws IOException {
transformTesterIO(basic);
transformTesterIO(paged);
transformTesterIO(shifted);
}
private void transformTester(IntArray array) {
ArrayList<Long> offsets = new ArrayList<>();
array.transformEach(0, size, (i, val) -> {
assertEquals(i+10, val);
offsets.add(i);
return -val;
});
for (int i = 0; i < size; i++) {
assertEquals(-(i+10), array.get(i));
}
}
private void transformTesterIO(IntArray array) throws IOException {
ArrayList<Long> offsets = new ArrayList<>();
array.transformEachIO(0, size, (i, val) -> {
assertEquals(i+10, val);
offsets.add(i);
return -val;
});
for (int i = 0; i < size; i++) {
assertEquals(-(i+10), array.get(i));
}
for (int i = 0; i < size; i++) {
assertEquals(offsets.get(i), i);
}
}
private void foreachTester(IntArray array) {
ArrayList<Long> offsets = new ArrayList<>();
array.forEach(0, size, (i, val) -> {
assertEquals(i+10, val);
offsets.add(i);
});
for (int i = 0; i < size; i++) {
assertEquals(offsets.get(i), i);
}
}
}

View File

@ -1,84 +0,0 @@
package nu.marginalia.array.algo;
import nu.marginalia.array.IntArray;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import static org.junit.jupiter.api.Assertions.assertEquals;
class IntArrayTransformationsTest {
IntArray basic;
IntArray paged;
IntArray shifted;
final int size = 1026;
@BeforeEach
public void setUp() {
basic = IntArray.allocate(size);
paged = IntArray.allocate(size);
shifted = IntArray.allocate(size+30).shifted(30);
for (int i = 0; i < basic.size(); i++) {
basic.set(i, 3*i);
paged.set(i, 3*i);
shifted.set(i, 3*i);
}
}
@Test
void transformEach() {
transformTester(basic);
transformTester(paged);
transformTester(shifted);
}
@Test
void transformEachIO() throws IOException {
transformTesterIO(basic);
transformTesterIO(paged);
transformTesterIO(shifted);
}
@Test
void foldIO() throws IOException {
assertEquals(3*(5+6+7+8+9), basic.foldIO(0, 5, 10, Integer::sum));
assertEquals(3*(5+6+7+8+9), paged.foldIO(0, 5, 10, Integer::sum));
assertEquals(3*(5+6+7+8+9), shifted.foldIO(0, 5, 10, Integer::sum));
}
@Test
void fold() {
assertEquals(3*(5+6+7+8+9), basic.fold(0, 5, 10, Integer::sum));
assertEquals(3*(5+6+7+8+9), paged.fold(0, 5, 10, Integer::sum));
assertEquals(3*(5+6+7+8+9), shifted.fold(0, 5, 10, Integer::sum));
}
private void transformTester(IntArray array) {
array.transformEach(5, 15, (i, o) -> (int) (i - o));
for (int i = 0; i < 5; i++) {
assertEquals(3*i, array.get(i));
}
for (int i = 5; i < 15; i++) {
assertEquals(-2*i, array.get(i));
}
for (int i = 15; i < 20; i++) {
assertEquals(3*i, array.get(i));
}
}
private void transformTesterIO(IntArray array) throws IOException {
array.transformEachIO(5, 15, (i, o) -> (int) (i - o));
for (int i = 0; i < 5; i++) {
assertEquals(3*i, array.get(i));
}
for (int i = 5; i < 15; i++) {
assertEquals(-2*i, array.get(i));
}
for (int i = 15; i < 20; i++) {
assertEquals(3*i, array.get(i));
}
}
}

View File

@ -2,7 +2,7 @@ package nu.marginalia.array.algo;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.LongArrayFactory;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

View File

@ -22,7 +22,6 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
class LongArraySortNTest {
LongArray basic;
LongArray paged;
LongArray shifted;
LongArray segment;
@ -33,7 +32,6 @@ class LongArraySortNTest {
@BeforeEach
public void setUp() {
basic = LongArray.allocate(size);
paged = LongArray.allocate(size);
shifted = LongArray.allocate(size+30).shifted(30);
segment = LongArrayFactory.onHeapShared(size + 30).shifted(30);
@ -47,7 +45,6 @@ class LongArraySortNTest {
}
basic.set(0, values);
paged.set(0, values);
shifted.set(0, values);
segment.set(0, values);
@ -62,7 +59,7 @@ class LongArraySortNTest {
@Test
public void quickSortStressTest() throws IOException {
LongArray array = LongArray.allocate(65536);
sortAlgorithmTester(array, LongArraySort::quickSort);
sortAlgorithmTester(array, LongArraySort::sort);
}
@ -132,20 +129,16 @@ class LongArraySortNTest {
@Test
void insertionSortN() {
basic.insertionSortN(2, 0, size);
LongArraySort.insertionSortN(basic, 2, 0, size);
assertTrue(basic.isSortedN(2, 0, size));
paged.insertionSortN(2, 0, size);
assertTrue(paged.isSortedN(2, 0, size));
shifted.insertionSortN(2, 0, size);
LongArraySort.insertionSortN(shifted, 2, 0, size);
assertTrue(shifted.isSortedN(2, 0, size));
segment.insertionSortN(2, 0, size);
LongArraySort.insertionSortN(segment, 2, 0, size);
assertTrue(segment.isSortedN(2, 0, size));
compare(basic, dataAsPairs);
compare(paged, dataAsPairs);
compare(shifted, dataAsPairs);
compare(segment, dataAsPairs);
}
@ -155,9 +148,6 @@ class LongArraySortNTest {
basic.quickSortN(2, 0, size);
assertTrue(basic.isSortedN(2, 0, size));
paged.quickSortN(2, 0, size);
assertTrue(paged.isSortedN(2, 0, size));
shifted.quickSortN(2, 0, size);
assertTrue(shifted.isSortedN(2, 0, size));
@ -165,7 +155,6 @@ class LongArraySortNTest {
assertTrue(segment.isSortedN(2, 0, size));
compare(basic, dataAsPairs);
compare(paged, dataAsPairs);
compare(shifted, dataAsPairs);
compare(segment, dataAsPairs);
}
@ -176,9 +165,6 @@ class LongArraySortNTest {
basic.mergeSortN(2, 0, size, Path.of("/tmp"));
assertTrue(basic.isSortedN(2, 0, size));
paged.mergeSortN(2, 0, size, Path.of("/tmp"));
assertTrue(paged.isSortedN(2, 0, size));
shifted.mergeSortN(2, 0, size, Path.of("/tmp"));
assertTrue(shifted.isSortedN(2, 0, size));
@ -186,7 +172,6 @@ class LongArraySortNTest {
assertTrue(segment.isSortedN(2, 0, size));
compare(basic, dataAsPairs);
compare(paged, dataAsPairs);
compare(shifted, dataAsPairs);
compare(segment, dataAsPairs);
}

View File

@ -21,7 +21,6 @@ import static org.junit.jupiter.api.Assertions.assertTrue;
class LongArraySortTest {
LongArray basic;
LongArray paged;
LongArray shifted;
LongArray segment;
@ -32,7 +31,6 @@ class LongArraySortTest {
@BeforeEach
public void setUp() {
basic = LongArray.allocate(size);
paged = LongArray.allocate(size);
shifted = LongArray.allocate(size+30).shifted(30);
segment = LongArrayFactory.onHeapConfined(size + 30).shifted(30);
@ -46,12 +44,10 @@ class LongArraySortTest {
}
basic.set(0, values);
paged.set(0, values);
shifted.set(0, values);
segment.set(0, values);
basic.transformEach(0, size, (i, old) -> values[(int) i]);
paged.transformEach(0, size, (i, old) -> values[(int) i]);
shifted.transformEach(0, size, (i, old) -> values[(int) i]);
segment.transformEach(0, size, (i, old) -> values[(int) i]);
}
@ -68,7 +64,7 @@ class LongArraySortTest {
array.set(1, 4);
array.set(2, 3);
array.set(3, 2);
array.quickSortNative(0, 4);
array.sort(0, 4);
assertTrue(array.isSorted(0, 4));
assertEquals(1, array.get(0));
assertEquals(2, array.get(1));
@ -76,13 +72,14 @@ class LongArraySortTest {
assertEquals(4, array.get(3));
array.set(2, 5);
array.quickSortNative(2, 4);
array.sort(2, 4);
assertEquals(4, array.get(2));
assertEquals(5, array.get(3));
assertTrue(array.isSorted(2, 4));
}
@Test
public void quickSortStressTest() throws IOException {
LongArray array = LongArray.allocate(65536);
@ -92,7 +89,7 @@ class LongArraySortTest {
@Test
public void nativeSortTest() throws IOException {
LongArray array = LongArray.allocate(65536);
sortAlgorithmTester(array, LongArraySort::quickSortNative);
sortAlgorithmTester(array, LongArraySort::sort);
}
@ -166,41 +163,33 @@ class LongArraySortTest {
@Test
void insertionSort() {
basic.insertionSort(0, size);
LongArraySort.insertionSort(basic, 0, size);
assertTrue(basic.isSorted(0, 128));
paged.insertionSort(0, size);
assertTrue(paged.isSorted(0, 128));
shifted.insertionSort(0, size);
LongArraySort.insertionSort(shifted, 0, size);
assertTrue(shifted.isSorted(0, 128));
segment.insertionSort(0, size);
LongArraySort.insertionSort(segment, 0, size);
assertTrue(segment.isSorted(0, 128));
verifyValuesPresent(basic);
verifyValuesPresent(paged);
verifyValuesPresent(shifted);
verifyValuesPresent(segment);
}
@Test
void quickSort() {
basic.quickSort(0, size);
basic.sort(0, size);
assertTrue(basic.isSorted(0, size));
paged.quickSort(0, size);
assertTrue(paged.isSorted(0, size));
shifted.quickSort(0, size);
shifted.sort(0, size);
assertTrue(shifted.isSorted(0, size));
segment.quickSort(0, size);
segment.sort(0, size);
assertTrue(segment.isSorted(0, size));
verifyValuesPresent(basic);
verifyValuesPresent(paged);
verifyValuesPresent(shifted);
verifyValuesPresent(segment);
@ -211,9 +200,6 @@ class LongArraySortTest {
basic.mergeSort(0, size, Path.of("/tmp"));
assertTrue(basic.isSorted(0, size));
paged.mergeSort(0, size, Path.of("/tmp"));
assertTrue(paged.isSorted(0, size));
shifted.mergeSort(0, size, Path.of("/tmp"));
assertTrue(shifted.isSorted(0, size));
@ -221,7 +207,6 @@ class LongArraySortTest {
assertTrue(segment.isSorted(0, size));
verifyValuesPresent(basic);
verifyValuesPresent(paged);
verifyValuesPresent(shifted);
verifyValuesPresent(segment);
}
@ -231,7 +216,7 @@ class LongArraySortTest {
var array = LongArray.allocate(1000);
var random = new Random();
array.transformEach(0, 1000, (i, val) -> random.nextInt(0, 2000));
array.quickSort(0, 1000);
array.sort(0, 1000);
Set<Long> expectedValues = new HashSet<>();
array.forEach(0, 1000, (i, v) -> expectedValues.add(v));

View File

@ -11,7 +11,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
class LongArrayTransformations2Test {
LongArray basic;
LongArray paged;
LongArray shifted;
final int size = 1026;
@ -19,7 +18,6 @@ class LongArrayTransformations2Test {
@BeforeEach
public void setUp() {
basic = LongArray.allocate(size);
paged = LongArray.allocate(size);
shifted = LongArray.allocate(size+30).shifted(30);
long[] vals = new long[size];
@ -27,26 +25,22 @@ class LongArrayTransformations2Test {
vals[i] = i+10;
}
basic.set(0, vals);
paged.set(0, vals);
shifted.set(0, vals);
}
@Test
void forEach() {
foreachTester(basic);
foreachTester(paged);
foreachTester(shifted);
}
@Test
void transformEach() {
transformTester(basic);
transformTester(paged);
transformTester(shifted);
}
@Test
void transformEachIO() throws IOException {
transformTesterIO(basic);
transformTesterIO(paged);
transformTesterIO(shifted);
}

View File

@ -10,7 +10,6 @@ import static org.junit.jupiter.api.Assertions.assertEquals;
class LongArrayTransformationsTest {
LongArray basic;
LongArray paged;
LongArray shifted;
final int size = 1026;
@ -18,12 +17,10 @@ class LongArrayTransformationsTest {
@BeforeEach
public void setUp() {
basic = LongArray.allocate(size);
paged = LongArray.allocate(size);
shifted = LongArray.allocate(size+30).shifted(30);
for (int i = 0; i < basic.size(); i++) {
basic.set(i, 3L*i);
paged.set(i, 3L*i);
shifted.set(i, 3L*i);
}
}
@ -31,28 +28,24 @@ class LongArrayTransformationsTest {
@Test
void transformEach() {
transformTester(basic);
transformTester(paged);
transformTester(shifted);
}
@Test
void transformEachIO() throws IOException {
transformTesterIO(basic);
transformTesterIO(paged);
transformTesterIO(shifted);
}
@Test
void fold() {
assertEquals(3*(5+6+7+8+9), basic.fold(0, 5, 10, Long::sum));
assertEquals(3*(5+6+7+8+9), paged.fold(0, 5, 10, Long::sum));
assertEquals(3*(5+6+7+8+9), shifted.fold(0, 5, 10, Long::sum));
}
@Test
void foldIO() throws IOException {
assertEquals(3*(5+6+7+8+9), basic.foldIO(0, 5, 10, Long::sum));
assertEquals(3*(5+6+7+8+9), paged.foldIO(0, 5, 10, Long::sum));
assertEquals(3*(5+6+7+8+9), shifted.foldIO(0, 5, 10, Long::sum));
}

View File

@ -1,19 +0,0 @@
package nu.marginalia.array.scheme;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
class ArrayPartitioningSchemeTest {
@Test
public void testPo2() {
var p2 = new PowerOf2PartitioningScheme(64);
var seq = new SequentialPartitioningScheme(64);
for (int i = 0; i < 512; i++) {
Assertions.assertEquals(p2.getPage(i), seq.getPage(i), "Unexpected buffer @ " + i);
Assertions.assertEquals(p2.getOffset(i), seq.getOffset(i), "Unexpected offset @ " + i);
Assertions.assertEquals(p2.isSamePage(i, i+1), seq.isSamePage(i, i+1), "Unexpected value @ " + i);
}
}
}

View File

@ -1,7 +1,7 @@
package nu.marginalia.btree;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
import nu.marginalia.btree.model.BTreeContext;
import nu.marginalia.btree.model.BTreeHeader;

View File

@ -1,7 +1,7 @@
package nu.marginalia.btree;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
import nu.marginalia.btree.model.BTreeBlockSize;
import nu.marginalia.btree.model.BTreeContext;
import nu.marginalia.util.NextPrimeUtil;
@ -9,7 +9,6 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.Arrays;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;

View File

@ -1,7 +1,7 @@
package nu.marginalia.btree;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.buffer.LongQueryBuffer;
import nu.marginalia.array.page.LongQueryBuffer;
import nu.marginalia.btree.model.BTreeBlockSize;
import nu.marginalia.btree.model.BTreeContext;
import nu.marginalia.util.NextPrimeUtil;
@ -9,7 +9,6 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.io.IOException;
import java.util.Arrays;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;