mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(array) Attempting to debug strange errors
This commit is contained in:
parent
5604e9f531
commit
67aa20ea2c
@ -30,7 +30,7 @@ public class ReverseIndexBTreeTransformer implements LongIOTransformer {
|
|||||||
@Override
|
@Override
|
||||||
public long transform(long pos, long end) throws IOException {
|
public long transform(long pos, long end) throws IOException {
|
||||||
|
|
||||||
final int size = (int) (end - start) / entrySize;
|
final int size = (int) ((end - start) / entrySize);
|
||||||
|
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -92,7 +92,8 @@ public class ReversePreindex {
|
|||||||
|
|
||||||
LongArray wordIds = segments.wordIds;
|
LongArray wordIds = segments.wordIds;
|
||||||
|
|
||||||
assert offsets.size() == wordIds.size() : "Offsets and word-ids of different size";
|
if (offsets.size() != wordIds.size())
|
||||||
|
throw new IllegalStateException("Offsets and word-ids of different size");
|
||||||
if (offsets.size() > Integer.MAX_VALUE) {
|
if (offsets.size() > Integer.MAX_VALUE) {
|
||||||
throw new IllegalStateException("offsets.size() too big!");
|
throw new IllegalStateException("offsets.size() too big!");
|
||||||
}
|
}
|
||||||
@ -137,7 +138,7 @@ public class ReversePreindex {
|
|||||||
|
|
||||||
Path docsFile = Files.createTempFile(destDir, "docs", ".dat");
|
Path docsFile = Files.createTempFile(destDir, "docs", ".dat");
|
||||||
|
|
||||||
LongArray mergedDocuments = LongArrayFactory.mmapForWritingConfined(docsFile, 2 * (left.documents.size() + right.documents.size()));
|
LongArray mergedDocuments = LongArrayFactory.mmapForWritingConfined(docsFile, left.documents.size() + right.documents.size());
|
||||||
|
|
||||||
leftIter.next();
|
leftIter.next();
|
||||||
rightIter.next();
|
rightIter.next();
|
||||||
@ -180,9 +181,15 @@ public class ReversePreindex {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
assert !leftIter.isPositionBeforeEnd() : "Left has more to go";
|
if (leftIter.isPositionBeforeEnd())
|
||||||
assert !rightIter.isPositionBeforeEnd() : "Right has more to go";
|
throw new IllegalStateException("Left has more to go");
|
||||||
assert !mergingIter.canPutMore() : "Source iters ran dry before merging iter";
|
if (rightIter.isPositionBeforeEnd())
|
||||||
|
throw new IllegalStateException("Right has more to go");
|
||||||
|
if (mergingIter.canPutMore())
|
||||||
|
throw new IllegalStateException("Source iters ran dry before merging iter");
|
||||||
|
|
||||||
|
|
||||||
|
mergingSegment.force();
|
||||||
|
|
||||||
// We may have overestimated the size of the merged docs size in the case there were
|
// We may have overestimated the size of the merged docs size in the case there were
|
||||||
// duplicates in the data, so we need to shrink it to the actual size we wrote.
|
// duplicates in the data, so we need to shrink it to the actual size we wrote.
|
||||||
@ -190,8 +197,6 @@ public class ReversePreindex {
|
|||||||
mergedDocuments = shrinkMergedDocuments(mergedDocuments,
|
mergedDocuments = shrinkMergedDocuments(mergedDocuments,
|
||||||
docsFile, 2 * mergingSegment.totalSize());
|
docsFile, 2 * mergingSegment.totalSize());
|
||||||
|
|
||||||
mergingSegment.force();
|
|
||||||
|
|
||||||
return new ReversePreindex(
|
return new ReversePreindex(
|
||||||
mergingSegment,
|
mergingSegment,
|
||||||
new ReversePreindexDocuments(mergedDocuments, docsFile)
|
new ReversePreindexDocuments(mergedDocuments, docsFile)
|
||||||
@ -233,16 +238,15 @@ public class ReversePreindex {
|
|||||||
mergedDocuments.force();
|
mergedDocuments.force();
|
||||||
|
|
||||||
long beforeSize = mergedDocuments.size();
|
long beforeSize = mergedDocuments.size();
|
||||||
|
long afterSize = sizeLongs * 8;
|
||||||
|
if (beforeSize != afterSize) {
|
||||||
|
mergedDocuments.close();
|
||||||
try (var bc = Files.newByteChannel(docsFile, StandardOpenOption.WRITE)) {
|
try (var bc = Files.newByteChannel(docsFile, StandardOpenOption.WRITE)) {
|
||||||
bc.truncate(sizeLongs * 8);
|
bc.truncate(sizeLongs * 8);
|
||||||
}
|
}
|
||||||
long afterSize = mergedDocuments.size();
|
|
||||||
mergedDocuments.close();
|
|
||||||
|
|
||||||
mergedDocuments = LongArrayFactory.mmapForWritingConfined(docsFile, sizeLongs);
|
|
||||||
|
|
||||||
if (beforeSize != afterSize) {
|
|
||||||
logger.info("Shrunk {} from {}b to {}b", docsFile, beforeSize, afterSize);
|
logger.info("Shrunk {} from {}b to {}b", docsFile, beforeSize, afterSize);
|
||||||
|
mergedDocuments = LongArrayFactory.mmapForWritingConfined(docsFile, sizeLongs);
|
||||||
}
|
}
|
||||||
|
|
||||||
return mergedDocuments;
|
return mergedDocuments;
|
||||||
@ -291,7 +295,8 @@ public class ReversePreindex {
|
|||||||
boolean putNext = mergingIter.putNext(size / 2);
|
boolean putNext = mergingIter.putNext(size / 2);
|
||||||
boolean iterNext = sourceIter.next();
|
boolean iterNext = sourceIter.next();
|
||||||
|
|
||||||
assert putNext || !iterNext : "Source iterator ran out before dest iterator?!";
|
if (!putNext && iterNext)
|
||||||
|
throw new IllegalStateException("Source iterator ran out before dest iterator?!");
|
||||||
|
|
||||||
return iterNext;
|
return iterNext;
|
||||||
}
|
}
|
||||||
|
@ -38,6 +38,9 @@ public class ReversePreindexWordSegments {
|
|||||||
* and each value is the start offset of the data.
|
* and each value is the start offset of the data.
|
||||||
*/
|
*/
|
||||||
public Long2LongOpenHashMap asMap(int recordSize) {
|
public Long2LongOpenHashMap asMap(int recordSize) {
|
||||||
|
if (wordIds.size() > Integer.MAX_VALUE)
|
||||||
|
throw new IllegalArgumentException("Cannot create a map with more than Integer.MAX_VALUE entries");
|
||||||
|
|
||||||
Long2LongOpenHashMap ret = new Long2LongOpenHashMap((int) wordIds.size(), 0.75f);
|
Long2LongOpenHashMap ret = new Long2LongOpenHashMap((int) wordIds.size(), 0.75f);
|
||||||
var iter = iterator(recordSize);
|
var iter = iterator(recordSize);
|
||||||
|
|
||||||
@ -62,7 +65,7 @@ public class ReversePreindexWordSegments {
|
|||||||
|
|
||||||
// Create the words file by iterating over the map and inserting them into
|
// Create the words file by iterating over the map and inserting them into
|
||||||
// the words file in whatever bizarro hash table order they appear in
|
// the words file in whatever bizarro hash table order they appear in
|
||||||
int i = 0;
|
long i = 0;
|
||||||
LongIterator iter = countsMap.keySet().iterator();
|
LongIterator iter = countsMap.keySet().iterator();
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
words.set(i++, iter.nextLong());
|
words.set(i++, iter.nextLong());
|
||||||
@ -120,8 +123,8 @@ public class ReversePreindexWordSegments {
|
|||||||
this.fileSize = wordIds.size();
|
this.fileSize = wordIds.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
private int i = -1;
|
private long i = -1;
|
||||||
public int idx() {
|
public long idx() {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
public boolean next() {
|
public boolean next() {
|
||||||
@ -166,8 +169,8 @@ public class ReversePreindexWordSegments {
|
|||||||
this.wordId = wordIds.get(0);
|
this.wordId = wordIds.get(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private int i = 0;
|
private long i = 0;
|
||||||
public int idx() {
|
public long idx() {
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -154,7 +154,11 @@ public class SegmentLongArray implements PartitionPage, LongArray {
|
|||||||
@Override
|
@Override
|
||||||
public void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException {
|
public void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException {
|
||||||
|
|
||||||
final int stride = 1024*1204*128; // Copy 1 GB at a time 'cause byte buffers are 'a byte buffering
|
final int stride = 1024*1024*128; // Copy 1 GB at a time 'cause byte buffers are 'a byte buffering
|
||||||
|
|
||||||
|
if (source.size() / 8 < sourceStart + (arrayEnd - arrayStart)) {
|
||||||
|
throw new IndexOutOfBoundsException(STR."Source channel too small: \{source.size()} < \{sourceStart + (arrayEnd - arrayStart)}");
|
||||||
|
}
|
||||||
|
|
||||||
long ss = sourceStart;
|
long ss = sourceStart;
|
||||||
for (long as = arrayStart; as < arrayEnd; as += stride, ss += stride) {
|
for (long as = arrayStart; as < arrayEnd; as += stride, ss += stride) {
|
||||||
|
@ -2,8 +2,11 @@ package nu.marginalia.array.page;
|
|||||||
|
|
||||||
import nu.marginalia.array.ArrayRangeReference;
|
import nu.marginalia.array.ArrayRangeReference;
|
||||||
import nu.marginalia.array.LongArray;
|
import nu.marginalia.array.LongArray;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
import sun.misc.Unsafe;
|
import sun.misc.Unsafe;
|
||||||
|
|
||||||
|
import javax.annotation.Nonnull;
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.foreign.Arena;
|
import java.lang.foreign.Arena;
|
||||||
@ -12,7 +15,6 @@ import java.nio.ByteBuffer;
|
|||||||
import java.nio.LongBuffer;
|
import java.nio.LongBuffer;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.OpenOption;
|
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.StandardOpenOption;
|
import java.nio.file.StandardOpenOption;
|
||||||
|
|
||||||
@ -23,9 +25,13 @@ import static java.lang.foreign.ValueLayout.JAVA_LONG;
|
|||||||
public class UnsafeLongArray implements PartitionPage, LongArray {
|
public class UnsafeLongArray implements PartitionPage, LongArray {
|
||||||
|
|
||||||
private static final Unsafe unsafe = UnsafeProvider.getUnsafe();
|
private static final Unsafe unsafe = UnsafeProvider.getUnsafe();
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(UnsafeLongArray.class);
|
||||||
|
|
||||||
@Nullable
|
@Nullable
|
||||||
private final Arena arena;
|
private final Arena arena;
|
||||||
|
@Nullable
|
||||||
|
private final FileChannel channel;
|
||||||
|
|
||||||
private final MemorySegment segment;
|
private final MemorySegment segment;
|
||||||
private boolean closed;
|
private boolean closed;
|
||||||
|
|
||||||
@ -33,6 +39,15 @@ public class UnsafeLongArray implements PartitionPage, LongArray {
|
|||||||
@Nullable Arena arena) {
|
@Nullable Arena arena) {
|
||||||
this.segment = segment;
|
this.segment = segment;
|
||||||
this.arena = arena;
|
this.arena = arena;
|
||||||
|
this.channel = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
UnsafeLongArray(MemorySegment segment,
|
||||||
|
@Nonnull FileChannel channel,
|
||||||
|
@Nullable Arena arena) {
|
||||||
|
this.segment = segment;
|
||||||
|
this.arena = arena;
|
||||||
|
this.channel = channel;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static UnsafeLongArray onHeap(Arena arena, long size) {
|
public static UnsafeLongArray onHeap(Arena arena, long size) {
|
||||||
@ -40,38 +55,26 @@ public class UnsafeLongArray implements PartitionPage, LongArray {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public static UnsafeLongArray fromMmapReadOnly(Arena arena, Path file, long offset, long size) throws IOException {
|
public static UnsafeLongArray fromMmapReadOnly(Arena arena, Path file, long offset, long size) throws IOException {
|
||||||
return new UnsafeLongArray(
|
try (var channel = (FileChannel) Files.newByteChannel(file, StandardOpenOption.READ)) {
|
||||||
mmapFile(arena, file, offset, size, FileChannel.MapMode.READ_ONLY, StandardOpenOption.READ),
|
return new UnsafeLongArray(channel.map(FileChannel.MapMode.READ_ONLY,
|
||||||
arena);
|
JAVA_LONG.byteSize() * offset, JAVA_LONG.byteSize() * size,
|
||||||
}
|
arena), arena);
|
||||||
|
|
||||||
public static UnsafeLongArray fromMmapReadWrite(Arena arena, Path file, long offset, long size) throws IOException {
|
|
||||||
|
|
||||||
return new UnsafeLongArray(
|
|
||||||
mmapFile(arena, file, offset, size, FileChannel.MapMode.READ_WRITE,
|
|
||||||
StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE),
|
|
||||||
arena);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static MemorySegment mmapFile(Arena arena,
|
|
||||||
Path file,
|
|
||||||
long offset,
|
|
||||||
long size,
|
|
||||||
FileChannel.MapMode mode,
|
|
||||||
OpenOption... openOptions) throws IOException
|
|
||||||
{
|
|
||||||
try (var channel = (FileChannel) Files.newByteChannel(file, openOptions)) {
|
|
||||||
|
|
||||||
return channel.map(mode,
|
|
||||||
JAVA_LONG.byteSize() * offset,
|
|
||||||
JAVA_LONG.byteSize() * size,
|
|
||||||
arena);
|
|
||||||
}
|
}
|
||||||
catch (IOException ex) {
|
catch (IOException ex) {
|
||||||
throw new IOException("Failed to map file " + file + " (" + offset + ":" + size + ")", ex);
|
throw new IOException("Failed to map file " + file + " (" + offset + ":" + size + ")", ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static UnsafeLongArray fromMmapReadWrite(Arena arena, Path file, long offset, long size) throws IOException {
|
||||||
|
var channel = (FileChannel) Files.newByteChannel(file,
|
||||||
|
StandardOpenOption.READ, StandardOpenOption.WRITE, StandardOpenOption.CREATE);
|
||||||
|
var segment = channel.map(FileChannel.MapMode.READ_WRITE,
|
||||||
|
JAVA_LONG.byteSize() * offset, JAVA_LONG.byteSize() * size,
|
||||||
|
arena);
|
||||||
|
|
||||||
|
return new UnsafeLongArray(segment, channel, arena);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public LongArray range(long start, long end) {
|
public LongArray range(long start, long end) {
|
||||||
return new UnsafeLongArray(
|
return new UnsafeLongArray(
|
||||||
@ -122,6 +125,15 @@ public class UnsafeLongArray implements PartitionPage, LongArray {
|
|||||||
if (arena != null && !closed) {
|
if (arena != null && !closed) {
|
||||||
arena.close();
|
arena.close();
|
||||||
}
|
}
|
||||||
|
if (channel != null && !closed) {
|
||||||
|
try {
|
||||||
|
channel.close();
|
||||||
|
}
|
||||||
|
catch (IOException ex) {
|
||||||
|
throw new RuntimeException("Failed to close channel", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
closed = true;
|
closed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -149,6 +161,13 @@ public class UnsafeLongArray implements PartitionPage, LongArray {
|
|||||||
public void force() {
|
public void force() {
|
||||||
if (segment.isMapped()) {
|
if (segment.isMapped()) {
|
||||||
segment.force();
|
segment.force();
|
||||||
|
try {
|
||||||
|
if (channel != null) {
|
||||||
|
channel.force(false);
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException("Failed to force channel", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -156,26 +175,102 @@ public class UnsafeLongArray implements PartitionPage, LongArray {
|
|||||||
return new ArrayRangeReference<>(this, start, end);
|
return new ArrayRangeReference<>(this, start, end);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void chanelChannelTransfer(FileChannel source,
|
||||||
|
long sourceStartL,
|
||||||
|
long arrayStartL,
|
||||||
|
long arrayEndL) throws IOException {
|
||||||
|
|
||||||
|
assert channel != null;
|
||||||
|
|
||||||
|
final int B_per_L = (int) JAVA_LONG.byteSize();
|
||||||
|
|
||||||
|
final int strideB = 128*1024*1024; // Copy in 128 MB chunks
|
||||||
|
|
||||||
|
final long destStartB = arrayStartL * B_per_L;
|
||||||
|
final long destEndB = arrayEndL * B_per_L;
|
||||||
|
final long lengthB = destEndB - destStartB;
|
||||||
|
|
||||||
|
final long sourceStartB = sourceStartL * B_per_L;
|
||||||
|
final long sourceEndB = sourceStartB + lengthB;
|
||||||
|
|
||||||
|
|
||||||
|
if (sourceStartB > sourceEndB)
|
||||||
|
throw new IndexOutOfBoundsException("Source start after end");
|
||||||
|
if (sourceStartB > source.size())
|
||||||
|
throw new IndexOutOfBoundsException("Source channel too small, start " + sourceStartB + " < input size " + source.size());
|
||||||
|
if (sourceEndB > source.size())
|
||||||
|
throw new IndexOutOfBoundsException("Source channel too small, end " + sourceEndB + " < input size " + source.size());
|
||||||
|
|
||||||
|
long destIndexB = destStartB;
|
||||||
|
|
||||||
|
source.position(sourceStartB);
|
||||||
|
|
||||||
|
while (destIndexB < destEndB)
|
||||||
|
{
|
||||||
|
long stepSizeB = Math.min(destIndexB + strideB, destEndB);
|
||||||
|
long copyLengthB = (stepSizeB - destIndexB);
|
||||||
|
|
||||||
|
long transferred = channel.transferFrom(source, destIndexB, copyLengthB);
|
||||||
|
if (transferred != copyLengthB) {
|
||||||
|
logger.warn("Less than {} bytes were copied: {}", copyLengthB, transferred);
|
||||||
|
}
|
||||||
|
|
||||||
|
destIndexB += copyLengthB;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException {
|
public void transferFrom(FileChannel source,
|
||||||
|
long sourceStartL,
|
||||||
|
long arrayStartL,
|
||||||
|
long arrayEndL) throws IOException {
|
||||||
|
|
||||||
final int stride = 1024*1204*128; // Copy 1 GB at a time 'cause byte buffers are 'a byte buffering
|
|
||||||
|
|
||||||
long ss = sourceStart;
|
if (channel != null) {
|
||||||
for (long as = arrayStart; as < arrayEnd; as += stride, ss += stride) {
|
chanelChannelTransfer(source, sourceStartL, arrayStartL, arrayEndL);
|
||||||
long ae = Math.min(as + stride, arrayEnd);
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
long index = as * JAVA_LONG.byteSize();
|
final int B_per_L = (int) JAVA_LONG.byteSize();
|
||||||
long length = (ae - as) * JAVA_LONG.byteSize();
|
|
||||||
|
|
||||||
var bufferSlice = segment.asSlice(index, length).asByteBuffer();
|
final int strideB = 1024*1024*1024; // Copy 1 GB at a time
|
||||||
|
|
||||||
|
final long arrayStartB = arrayStartL * B_per_L;
|
||||||
|
final long arrayEndB = arrayEndL * B_per_L;
|
||||||
|
final long arrayLengthB = arrayEndB - arrayStartB;
|
||||||
|
|
||||||
|
final long sourceStartB = sourceStartL * B_per_L;
|
||||||
|
final long sourceEndB = sourceStartB + arrayLengthB;
|
||||||
|
|
||||||
|
|
||||||
|
if (sourceStartB > sourceEndB)
|
||||||
|
throw new IndexOutOfBoundsException("Source start after end");
|
||||||
|
if (sourceStartB > source.size())
|
||||||
|
throw new IndexOutOfBoundsException("Source channel too small, start " + sourceStartB + " < input size " + source.size());
|
||||||
|
if (sourceEndB > source.size())
|
||||||
|
throw new IndexOutOfBoundsException("Source channel too small, end " + sourceEndB + " < input size " + source.size());
|
||||||
|
|
||||||
|
long channelIndexB = sourceStartB;
|
||||||
|
long segmentIndexB = arrayStartB;
|
||||||
|
|
||||||
|
while (segmentIndexB < arrayEndB)
|
||||||
|
{
|
||||||
|
long segmentEndB = Math.min(segmentIndexB + strideB, arrayEndB);
|
||||||
|
long lengthB = (segmentEndB - segmentIndexB);
|
||||||
|
|
||||||
|
var bufferSlice = segment.asSlice(segmentIndexB, lengthB).asByteBuffer();
|
||||||
|
|
||||||
long startPos = ss * JAVA_LONG.byteSize();
|
|
||||||
while (bufferSlice.position() < bufferSlice.capacity()) {
|
while (bufferSlice.position() < bufferSlice.capacity()) {
|
||||||
source.read(bufferSlice, startPos + bufferSlice.position());
|
if (source.position() + bufferSlice.capacity() > sourceEndB)
|
||||||
}
|
throw new IndexOutOfBoundsException("Source channel too small");
|
||||||
|
|
||||||
|
if (source.read(bufferSlice, channelIndexB + bufferSlice.position()) < 0)
|
||||||
|
throw new IOException("Failed to read from source");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
channelIndexB += lengthB;
|
||||||
|
segmentIndexB += lengthB;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user