diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexBTreeTransformer.java b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexBTreeTransformer.java index ccf21331..0af6165e 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexBTreeTransformer.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullIndexBTreeTransformer.java @@ -6,14 +6,12 @@ import nu.marginalia.btree.BTreeWriter; import nu.marginalia.btree.model.BTreeContext; import java.io.IOException; -import java.nio.channels.FileChannel; /** Constructs the BTrees in a reverse index */ public class FullIndexBTreeTransformer implements LongArrayTransformations.LongIOTransformer { private final BTreeWriter writer; - private final FileChannel intermediateChannel; - private final int entrySize; + private final LongArray documentsArray; long start = 0; long writeOffset = 0; @@ -21,10 +19,10 @@ public class FullIndexBTreeTransformer implements LongArrayTransformations.LongI public FullIndexBTreeTransformer(LongArray urlsFileMap, int entrySize, BTreeContext bTreeContext, - FileChannel intermediateChannel) { + LongArray documentsArray) { + this.documentsArray = documentsArray; this.writer = new BTreeWriter(urlsFileMap, bTreeContext); this.entrySize = entrySize; - this.intermediateChannel = intermediateChannel; } @Override @@ -39,7 +37,7 @@ public class FullIndexBTreeTransformer implements LongArrayTransformations.LongI final long offsetForBlock = writeOffset; writeOffset += writer.write(writeOffset, size, - mapRegion -> mapRegion.transferFrom(intermediateChannel, start, 0, end - start) + mapRegion -> mapRegion.transferFrom(documentsArray, start, 0, end - start) ); start = end; diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindex.java b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindex.java index 50f3a4bb..4774519e 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindex.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/full/FullPreindex.java @@ -13,7 +13,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.channels.FileChannel; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; @@ -87,13 +86,10 @@ public class FullPreindex { // Write the docs file LongArray finalDocs = LongArrayFactory.mmapForWritingConfined(outputFileDocs, sizeEstimator.size); - try (var intermediateDocChannel = documents.createDocumentsFileChannel()) { - offsets.transformEachIO(0, offsets.size(), - new FullIndexBTreeTransformer(finalDocs, 2, - ReverseIndexParameters.fullDocsBTreeContext, - intermediateDocChannel)); - intermediateDocChannel.force(false); - } + offsets.transformEachIO(0, offsets.size(), + new FullIndexBTreeTransformer(finalDocs, 2, + ReverseIndexParameters.fullDocsBTreeContext, + documents.documents)); LongArray wordIds = segments.wordIds; @@ -148,42 +144,36 @@ public class FullPreindex { leftIter.next(); rightIter.next(); - try (FileChannel leftChannel = left.documents.createDocumentsFileChannel(); - FileChannel rightChannel = right.documents.createDocumentsFileChannel()) + while (mergingIter.canPutMore() + && leftIter.isPositionBeforeEnd() + && rightIter.isPositionBeforeEnd()) { + final long currentWord = mergingIter.wordId; - while (mergingIter.canPutMore() - && leftIter.isPositionBeforeEnd() - && rightIter.isPositionBeforeEnd()) + if (leftIter.wordId == currentWord && rightIter.wordId == currentWord) { - final long currentWord = mergingIter.wordId; - - if (leftIter.wordId == currentWord && rightIter.wordId == currentWord) - { - // both inputs have documents for the current word - mergeSegments(leftIter, rightIter, - left.documents, right.documents, - mergedDocuments, mergingIter); - } - else if (leftIter.wordId == currentWord) { - if (!copySegment(leftIter, mergedDocuments, leftChannel, mergingIter)) - break; - } - else if (rightIter.wordId == currentWord) { - if (!copySegment(rightIter, mergedDocuments, rightChannel, mergingIter)) - break; - } - else assert false : "This should never happen"; // the helvetica scenario + // both inputs have documents for the current word + mergeSegments(leftIter, rightIter, + left.documents, right.documents, + mergedDocuments, mergingIter); } - - if (leftIter.isPositionBeforeEnd()) { - while (copySegment(leftIter, mergedDocuments, leftChannel, mergingIter)); + else if (leftIter.wordId == currentWord) { + if (!copySegment(leftIter, left.documents, mergingIter, mergedDocuments)) + break; } - - if (rightIter.isPositionBeforeEnd()) { - while (copySegment(rightIter, mergedDocuments, rightChannel, mergingIter)); + else if (rightIter.wordId == currentWord) { + if (!copySegment(rightIter, right.documents, mergingIter, mergedDocuments)) + break; } + else assert false : "This should never happen"; // the helvetica scenario + } + if (leftIter.isPositionBeforeEnd()) { + while (copySegment(leftIter, left.documents, mergingIter, mergedDocuments)); + } + + if (rightIter.isPositionBeforeEnd()) { + while (copySegment(rightIter, right.documents, mergingIter, mergedDocuments)); } if (leftIter.isPositionBeforeEnd()) @@ -284,15 +274,15 @@ public class FullPreindex { * into the destination segment, and advance the construction iterator. */ private static boolean copySegment(FullPreindexWordSegments.SegmentIterator sourceIter, - LongArray dest, - FileChannel sourceChannel, - FullPreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException { + FullPreindexDocuments srcDocuments, + FullPreindexWordSegments.SegmentConstructionIterator mergingIter, + LongArray dest) throws IOException { long size = sourceIter.endOffset - sourceIter.startOffset; long start = mergingIter.startOffset; long end = start + size; - dest.transferFrom(sourceChannel, + dest.transferFrom(srcDocuments.documents, sourceIter.startOffset, mergingIter.startOffset, end); diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindex.java b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindex.java index ee1ab3ac..e0a8db92 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindex.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/prio/PrioPreindex.java @@ -139,44 +139,39 @@ public class PrioPreindex { leftIter.next(); rightIter.next(); - try (FileChannel leftChannel = left.documents.createDocumentsFileChannel(); - FileChannel rightChannel = right.documents.createDocumentsFileChannel()) + while (mergingIter.canPutMore() + && leftIter.isPositionBeforeEnd() + && rightIter.isPositionBeforeEnd()) { + final long currentWord = mergingIter.wordId; - while (mergingIter.canPutMore() - && leftIter.isPositionBeforeEnd() - && rightIter.isPositionBeforeEnd()) + if (leftIter.wordId == currentWord && rightIter.wordId == currentWord) { - final long currentWord = mergingIter.wordId; - - if (leftIter.wordId == currentWord && rightIter.wordId == currentWord) - { - // both inputs have documents for the current word - mergeSegments(leftIter, rightIter, - left.documents, right.documents, - mergedDocuments, mergingIter); - } - else if (leftIter.wordId == currentWord) { - if (!copySegment(leftIter, mergedDocuments, leftChannel, mergingIter)) - break; - } - else if (rightIter.wordId == currentWord) { - if (!copySegment(rightIter, mergedDocuments, rightChannel, mergingIter)) - break; - } - else assert false : "This should never happen"; // the helvetica scenario + // both inputs have documents for the current word + mergeSegments(leftIter, rightIter, + left.documents, right.documents, + mergedDocuments, mergingIter); } - - if (leftIter.isPositionBeforeEnd()) { - while (copySegment(leftIter, mergedDocuments, leftChannel, mergingIter)); + else if (leftIter.wordId == currentWord) { + if (!copySegment(leftIter, left.documents, mergingIter, mergedDocuments)) + break; } - - if (rightIter.isPositionBeforeEnd()) { - while (copySegment(rightIter, mergedDocuments, rightChannel, mergingIter)); + else if (rightIter.wordId == currentWord) { + if (!copySegment(rightIter, right.documents, mergingIter, mergedDocuments)) + break; } - + else assert false : "This should never happen"; // the helvetica scenario } + if (leftIter.isPositionBeforeEnd()) { + while (copySegment(leftIter, left.documents, mergingIter, mergedDocuments)); + } + + if (rightIter.isPositionBeforeEnd()) { + while (copySegment(rightIter, right.documents, mergingIter, mergedDocuments)); + } + + if (leftIter.isPositionBeforeEnd()) throw new IllegalStateException("Left has more to go"); if (rightIter.isPositionBeforeEnd()) @@ -270,24 +265,27 @@ public class PrioPreindex { rightIter.next(); } + /** Copy the data from the source segment at the position and length indicated by sourceIter, + * into the destination segment, and advance the construction iterator. + */ /** Copy the data from the source segment at the position and length indicated by sourceIter, * into the destination segment, and advance the construction iterator. */ private static boolean copySegment(PrioPreindexWordSegments.SegmentIterator sourceIter, - LongArray dest, - FileChannel sourceChannel, - PrioPreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException { + PrioPreindexDocuments srcDocuments, + PrioPreindexWordSegments.SegmentConstructionIterator mergingIter, + LongArray dest) throws IOException { long size = sourceIter.endOffset - sourceIter.startOffset; long start = mergingIter.startOffset; long end = start + size; - dest.transferFrom(sourceChannel, + dest.transferFrom(srcDocuments.documents, sourceIter.startOffset, mergingIter.startOffset, end); - boolean putNext = mergingIter.putNext(size); + boolean putNext = mergingIter.putNext(size / 2); boolean iterNext = sourceIter.next(); if (!putNext && iterNext) diff --git a/code/libraries/array/java/nu/marginalia/array/algo/LongArrayBase.java b/code/libraries/array/java/nu/marginalia/array/algo/LongArrayBase.java index b5ef03da..5ce59973 100644 --- a/code/libraries/array/java/nu/marginalia/array/algo/LongArrayBase.java +++ b/code/libraries/array/java/nu/marginalia/array/algo/LongArrayBase.java @@ -108,4 +108,5 @@ public interface LongArrayBase extends BulkTransferArray { void write(Path file) throws IOException; void transferFrom(FileChannel source, long sourceStart, long arrayStart, long arrayEnd) throws IOException; + void transferFrom(LongArray source, long sourceStart, long arrayStart, long arrayEnd) throws IOException; } diff --git a/code/libraries/array/java/nu/marginalia/array/page/SegmentLongArray.java b/code/libraries/array/java/nu/marginalia/array/page/SegmentLongArray.java index ac420de9..5c63e5c3 100644 --- a/code/libraries/array/java/nu/marginalia/array/page/SegmentLongArray.java +++ b/code/libraries/array/java/nu/marginalia/array/page/SegmentLongArray.java @@ -167,6 +167,25 @@ public class SegmentLongArray implements LongArray { } } + + @Override + public void transferFrom(LongArray source, + long sourceStartL, + long destStartL, + long destEndL) + { + if (destStartL > destEndL) + throw new IndexOutOfBoundsException("Source start after end"); + + if (sourceStartL + (destEndL - destStartL) > source.size()) + throw new IndexOutOfBoundsException("Source array too small"); + if (destEndL > size()) + throw new IndexOutOfBoundsException("Destination array too small"); + + for (long i = destStartL; i < destEndL; i++) { + set(i, source.get(sourceStartL + i - destStartL)); + } + } @Override public MemorySegment getMemorySegment() { diff --git a/code/libraries/array/java/nu/marginalia/array/page/UnsafeLongArray.java b/code/libraries/array/java/nu/marginalia/array/page/UnsafeLongArray.java index 04ea42d4..509fb829 100644 --- a/code/libraries/array/java/nu/marginalia/array/page/UnsafeLongArray.java +++ b/code/libraries/array/java/nu/marginalia/array/page/UnsafeLongArray.java @@ -269,4 +269,23 @@ public class UnsafeLongArray implements LongArray { } } + @Override + public void transferFrom(LongArray source, + long sourceStartL, + long destStartL, + long destEndL) + { + if (destStartL > destEndL) + throw new IndexOutOfBoundsException("Source start after end"); + + if (sourceStartL + (destEndL - destStartL) > source.size()) + throw new IndexOutOfBoundsException("Source array too small"); + if (destEndL > size()) + throw new IndexOutOfBoundsException("Destination array too small"); + + for (long i = destStartL; i < destEndL; i++) { + set(i, source.get(sourceStartL + i - destStartL)); + } + } + }