diff --git a/code/features-index/index-reverse/index.svg b/code/features-index/index-reverse/index.svg new file mode 100644 index 00000000..8c0184ea --- /dev/null +++ b/code/features-index/index-reverse/index.svg @@ -0,0 +1,4 @@ + + + +
Words
Words
Static BTree Index
Static BTree Index
word1, offset1
word1, offset1
word2, offset2
word2, offset2
...
...
wordN, offsetN
wordN, offsetN
Documents
Documents
Static BTree Index 1
Static BTree Index 1
doc1, meta1
doc1, meta1
doc2, meta2
doc2, meta2
doc3, meta3
doc3, meta3
Static BTree Index 2
Static BTree Index 2
doc1, meta1
doc1, meta1
doc2, meta2
doc2, meta2
Static BTree Index N
Static BTree Index N
doc2, meta2
doc2, meta2
doc3, meta3
doc3, meta3
Reverse Index
Reverse Index
Text is not SVG - cannot display
\ No newline at end of file diff --git a/code/features-index/index-reverse/merging.svg b/code/features-index/index-reverse/merging.svg new file mode 100644 index 00000000..ed023d52 --- /dev/null +++ b/code/features-index/index-reverse/merging.svg @@ -0,0 +1,4 @@ + + + +
journal1
journal1
journal2
journal2
journal3
journal3
journalN
journalN
preindex1
preindex1
preindex2
preindex2
journal3
journal3
preindexN
preindexN
Merge
Merge
Process
Process
Input
Input
partial merge preindex 1
partial merge pre...
partial merge
preindex N
partial merge...
final merged preindex
(100s of Gb)
final merged prei...
Text is not SVG - cannot display
\ No newline at end of file diff --git a/code/features-index/index-reverse/preindex.svg b/code/features-index/index-reverse/preindex.svg new file mode 100644 index 00000000..456f56a4 --- /dev/null +++ b/code/features-index/index-reverse/preindex.svg @@ -0,0 +1,4 @@ + + + +
WordIds
WordIds
Foo
Foo
Bar
Bar
Baz
Baz
Counts
Counts
3
3
2
2
2
2
Documents
Documents
doc1
doc1
doc2
doc2
doc3
doc3
doc1
doc1
doc2
doc2
doc2
doc2
doc3
doc3
0
0
3
3
5
5
Offsets
Offsets
0
0
0+3 = 3
0+3 = 3
0+3+2 = 5
0+3+2 = 5
Offset(n) = sum(Counts; 0,n-1)
Offset(n) = sum(Counts; 0,n-1)
Preindex Segment
Preindex Segment
Preindex Documents
Preindex Documents
Text is not SVG - cannot display
\ No newline at end of file diff --git a/code/features-index/index-reverse/readme.md b/code/features-index/index-reverse/readme.md index 5a9db1e5..a27371d6 100644 --- a/code/features-index/index-reverse/readme.md +++ b/code/features-index/index-reverse/readme.md @@ -12,9 +12,35 @@ The full index also provides access to term-level metadata, while the priority i [1] See WordFlags in [common/model](../../common/model/) and KeywordMetadata in [features-convert/keyword-extraction](../../features-convert/keyword-extraction). +## Construction + +The reverse index is constructed by first building a series of preindexes. +Preindexes consist of a Segment and a Documents object. The segment contains +information about which word identifiers are present and how many, and the +documents contain information about in which documents the words can be found. + +![Memory layout illustrations](./preindex.svg) + +These would typically not fit in RAM, so the index journal is paged +and the preindexes are constructed small enough to fit in memory, and +then merged. Merging sorted arrays is a very fast operation that does +not require additional RAM. + +![Illustration of successively merged preindex files](./merging.svg) + +Once merged into one large preindex, indexes are added to the preindex data +to form a finalized reverse index. + +![Illustration of the data layout of the finalized index](index.svg) ## Central Classes -* [ReverseIndexFullConverter](src/main/java/nu/marginalia/index/full/ReverseIndexFullConverter.java) constructs the full index. -* [ReverseIndexFullReader](src/main/java/nu/marginalia/index/full/ReverseIndexFullReader.java) interrogates the full index. -* [ReverseIndexPriorityConverter](src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityConverter.java) constructs the priority index. -* [ReverseIndexPriorityReader](src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityReader.java) interrogates the priority index. +* [ReversePreindex](src/main/java/nu/marginalia/index/construction/ReversePreindex.java) intermediate reverse index state. +* [ReverseIndexConstructor](src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java) constructs the index. +* [ReverseIndexReader](src/main/java/nu/marginalia/index/ReverseIndexReader.java) interrogates the index. + +## See Also + +* [index-journal](../index-journal) +* [index-forward](../index-forward) +* [libraries/btree](../../libraries/btree) +* [libraries/array](../../libraries/array) \ No newline at end of file diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java index 91e6e60b..c7e42e98 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java +++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java @@ -32,7 +32,7 @@ public class ReverseIndexConstructor { for (var input : inputs) { logger.info("Construcing preindex from {}", input); - var preindex = ReversePreindex.constructPreindex(readerSource.construct(input), docIdRewriter, tmpDir, tmpDir); + var preindex = ReversePreindex.constructPreindex(readerSource.construct(input), docIdRewriter, tmpDir); preindexes.add(preindex); } diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindex.java b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindex.java index 19d3ad99..284f7df7 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindex.java +++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindex.java @@ -16,9 +16,18 @@ import java.nio.file.StandardOpenOption; import static nu.marginalia.array.algo.TwoArrayOperations.*; +/** Contains the data that would go into a reverse index, + * that is, a mapping from words to documents, minus the actual + * index structure that makes the data quick to access while + * searching. + *

+ * Two preindexes can be merged into a third preindex containing + * the union of their data. This operation requires no additional + * RAM. + */ public class ReversePreindex { - public final ReversePreindexWordSegments segments; - public final ReversePreindexDocuments documents; + final ReversePreindexWordSegments segments; + final ReversePreindexDocuments documents; private static final Logger logger = LoggerFactory.getLogger(ReversePreindex.class); @@ -27,6 +36,26 @@ public class ReversePreindex { this.documents = documents; } + /** Constructs a new preindex with the data associated with reader. The backing files + * will have randomly assigned names. + */ + public static ReversePreindex constructPreindex(IndexJournalReader reader, + DocIdRewriter docIdRewriter, + Path destDir) throws IOException + { + Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat"); + Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat"); + Path docsFile = Files.createTempFile(destDir, "docs", ".dat"); + + logger.info("Segmenting"); + var segments = ReversePreindexWordSegments.construct(reader, segmentWordsFile, segmentCountsFile); + logger.info("Mapping docs"); + var docs = ReversePreindexDocuments.construct(docsFile, reader, docIdRewriter, segments); + logger.info("Done"); + return new ReversePreindex(segments, docs); + } + + /** Transform the preindex into a reverse index */ public void finalizeIndex(Path outputFileDocs, Path outputFileWords) throws IOException { var offsets = segments.counts; @@ -72,30 +101,87 @@ public class ReversePreindex { segments.delete(); documents.delete(); } - public static ReversePreindex constructPreindex(IndexJournalReader reader, - DocIdRewriter docIdRewriter, - Path tempDir, - Path destDir) throws IOException - { - Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat"); - Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat"); + + public static ReversePreindex merge(Path destDir, + ReversePreindex left, + ReversePreindex right) throws IOException { + + ReversePreindexWordSegments mergingSegment = + createMergedSegmentWordFile(destDir, left.segments, right.segments); + + var mergingIter = mergingSegment.constructionIterator(2); + var leftIter = left.segments.iterator(2); + var rightIter = right.segments.iterator(2); + Path docsFile = Files.createTempFile(destDir, "docs", ".dat"); - SortingContext ctx = new SortingContext(tempDir, 1<<31); - logger.info("Segmenting"); - var segments = ReversePreindexWordSegments.construct(reader, ctx, segmentWordsFile, segmentCountsFile); - logger.info("Mapping docs"); - var docs = ReversePreindexDocuments.construct(docsFile, reader, docIdRewriter, ctx, segments); - logger.info("Done"); - return new ReversePreindex(segments, docs); + LongArray mergedDocuments = LongArray.mmapForWriting(docsFile, 8 * (left.documents.size() + right.documents.size())); + + leftIter.next(); + rightIter.next(); + + try (FileChannel leftChannel = left.documents.createDocumentsFileChannel(); + FileChannel rightChannel = right.documents.createDocumentsFileChannel()) + { + + while (mergingIter.canPutMore() + && leftIter.isPositionBeforeEnd() + && rightIter.isPositionBeforeEnd()) + { + final long currentWord = mergingIter.wordId; + + if (leftIter.wordId == currentWord && rightIter.wordId == currentWord) + { + // both inputs have documents for the current word + mergeSegments(leftIter, rightIter, + left.documents, right.documents, + mergedDocuments, mergingIter); + } + else if (leftIter.wordId == currentWord) { + if (!copySegment(leftIter, mergedDocuments, leftChannel, mergingIter)) + break; + } + else if (rightIter.wordId == currentWord) { + if (!copySegment(rightIter, mergedDocuments, rightChannel, mergingIter)) + break; + } + else assert false : "This should never happen"; // the helvetica scenario + } + + if (leftIter.isPositionBeforeEnd()) { + while (copySegment(leftIter, mergedDocuments, leftChannel, mergingIter)); + } + + if (rightIter.isPositionBeforeEnd()) { + while (copySegment(rightIter, mergedDocuments, rightChannel, mergingIter)); + } + + } + + assert !leftIter.isPositionBeforeEnd() : "Left has more to go"; + assert !rightIter.isPositionBeforeEnd() : "Right has more to go"; + assert !mergingIter.canPutMore() : "Source iters ran dry before merging iter"; + + // We may have overestimated the size of the merged docs size in the case there were + // duplicates in the data, so we need to shrink it to the actual size we wrote. + + mergedDocuments = shrinkMergedDocuments(mergedDocuments, + docsFile, 2 * mergingSegment.totalSize()); + + mergingSegment.force(); + + return new ReversePreindex( + mergingSegment, + new ReversePreindexDocuments(mergedDocuments, docsFile) + ); } /** Create a segment word file with each word from both inputs, with zero counts for all the data. * This is an intermediate product in merging. */ static ReversePreindexWordSegments createMergedSegmentWordFile(Path destDir, - ReversePreindexWordSegments left, - ReversePreindexWordSegments right) throws IOException { + ReversePreindexWordSegments left, + ReversePreindexWordSegments right) throws IOException { Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat"); Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat"); @@ -114,79 +200,10 @@ public class ReversePreindex { return new ReversePreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile); } - public static ReversePreindex merge(Path destDir, - ReversePreindex left, - ReversePreindex right) throws IOException { - - ReversePreindexWordSegments mergingSegment = createMergedSegmentWordFile(destDir, - left.segments, - right.segments); - - var mergingIter = mergingSegment.constructionIterator(2); - var leftIter = left.segments.iterator(2); - var rightIter = right.segments.iterator(2); - - Path docsFile = Files.createTempFile(destDir, "docs", ".dat"); - - LongArray mergedDocuments = LongArray.mmapForWriting(docsFile, 8 * (left.documents.size() + right.documents.size())); - - leftIter.next(); - rightIter.next(); - - FileChannel leftChannel = left.documents.createDocumentsFileChannel(); - FileChannel rightChannel = right.documents.createDocumentsFileChannel(); - - while (mergingIter.canPutMore() - && leftIter.isPositionBeforeEnd() - && rightIter.isPositionBeforeEnd()) - { - if (leftIter.wordId == mergingIter.wordId - && rightIter.wordId == mergingIter.wordId) { - mergeSegments(leftIter, - rightIter, - left.documents, - right.documents, - mergedDocuments, - mergingIter); - } - else if (leftIter.wordId == mergingIter.wordId) { - if (!copySegment(leftIter, mergedDocuments, leftChannel, mergingIter)) - break; - } - else if (rightIter.wordId == mergingIter.wordId) { - if (!copySegment(rightIter, mergedDocuments, rightChannel, mergingIter)) - break; - } - else { - assert false : "This should never happen"; - } - } - - if (leftIter.isPositionBeforeEnd()) { - while (copySegment(leftIter, mergedDocuments, leftChannel, mergingIter)); - - } - if (rightIter.isPositionBeforeEnd()) { - while (copySegment(rightIter, mergedDocuments, rightChannel, mergingIter)); - } - - assert !leftIter.isPositionBeforeEnd() : "Left has more to go"; - assert !rightIter.isPositionBeforeEnd() : "Right has more to go"; - assert !mergingIter.canPutMore() : "Source iters ran dry before merging iter"; - - // We may have overestimated the size of the merged docs size in the case there were - // duplicates in the data, so we need to shrink it to the actual size we wrote. - - mergedDocuments = shrinkMergedDocuments(mergedDocuments, docsFile, 2 * mergingSegment.totalSize()); - - mergingSegment.force(); - - return new ReversePreindex( - mergingSegment, - new ReversePreindexDocuments(mergedDocuments, docsFile) - ); - } + /** It's possible we overestimated the necessary size of the documents file, + * this will permit us to shrink it down to the smallest necessary size. + */ private static LongArray shrinkMergedDocuments(LongArray mergedDocuments, Path docsFile, long sizeLongs) throws IOException { mergedDocuments.force(); @@ -205,12 +222,15 @@ public class ReversePreindex { return mergedDocuments; } + /** Merge contents of the segments indicated by leftIter and rightIter into the destionation + * segment, and advance the construction iterator with the appropriate size. + */ private static void mergeSegments(ReversePreindexWordSegments.SegmentIterator leftIter, ReversePreindexWordSegments.SegmentIterator rightIter, ReversePreindexDocuments left, ReversePreindexDocuments right, - LongArray documentsFile, - ReversePreindexWordSegments.SegmentConstructionIterator mergingIter) + LongArray dest, + ReversePreindexWordSegments.SegmentConstructionIterator destIter) { long distinct = countDistinctElementsN(2, left.documents, @@ -218,29 +238,32 @@ public class ReversePreindex { leftIter.startOffset, leftIter.endOffset, rightIter.startOffset, rightIter.endOffset); - mergeArrays2(documentsFile, + mergeArrays2(dest, left.documents, right.documents, - mergingIter.startOffset, - mergingIter.startOffset + 2*distinct, + destIter.startOffset, + destIter.startOffset + 2*distinct, leftIter.startOffset, leftIter.endOffset, rightIter.startOffset, rightIter.endOffset); - mergingIter.putNext(distinct); + destIter.putNext(distinct); leftIter.next(); rightIter.next(); } + /** Copy the data from the source segment at the position and length indicated by sourceIter, + * into the destination segment, and advance the construction iterator. + */ private static boolean copySegment(ReversePreindexWordSegments.SegmentIterator sourceIter, - LongArray documentsFile, - FileChannel leftChannel, + LongArray dest, + FileChannel sourceChannel, ReversePreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException { long size = sourceIter.endOffset - sourceIter.startOffset; long start = mergingIter.startOffset; long end = start + size; - documentsFile.transferFrom(leftChannel, + dest.transferFrom(sourceChannel, sourceIter.startOffset, mergingIter.startOffset, end); @@ -248,12 +271,9 @@ public class ReversePreindex { boolean putNext = mergingIter.putNext(size / 2); boolean iterNext = sourceIter.next(); - if (!putNext) { - assert !iterNext: "Source iterator ran out before dest iterator?!"; - } + assert putNext || !iterNext : "Source iterator ran out before dest iterator?!"; return iterNext; - } diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexDocuments.java b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexDocuments.java index 4f5d0c61..c51a977d 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexDocuments.java +++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexDocuments.java @@ -34,7 +34,6 @@ public class ReversePreindexDocuments { Path docsFile, IndexJournalReader reader, DocIdRewriter docIdRewriter, - SortingContext sortingContext, ReversePreindexWordSegments segments) throws IOException { @@ -43,7 +42,7 @@ public class ReversePreindexDocuments { LongArray docsFileMap = LongArray.mmapForWriting(docsFile, 8 * Files.size(docsFile)); logger.info("Sorting data"); - sortDocsFile(docsFileMap, segments, sortingContext); + sortDocsFile(docsFileMap, segments); return new ReversePreindexDocuments(docsFileMap, docsFile); } @@ -90,7 +89,7 @@ public class ReversePreindexDocuments { } @SneakyThrows - private static void sortDocsFile(LongArray docsFileMap, ReversePreindexWordSegments segments, SortingContext sortingContext) throws IOException { + private static void sortDocsFile(LongArray docsFileMap, ReversePreindexWordSegments segments) throws IOException { var iter = segments.iterator(RECORD_SIZE_LONGS); diff --git a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java index 5a0e8f2d..5acd2219 100644 --- a/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java +++ b/code/features-index/index-reverse/src/main/java/nu/marginalia/index/construction/ReversePreindexWordSegments.java @@ -51,7 +51,6 @@ public class ReversePreindexWordSegments { } public static ReversePreindexWordSegments construct(IndexJournalReader reader, - SortingContext ctx, Path wordIdsFile, Path countsFile) throws IOException @@ -73,7 +72,7 @@ public class ReversePreindexWordSegments { } // Sort the words file - words.sortLargeSpan(ctx, 0, counts.size()); + words.quickSort(0, counts.size()); // Populate the counts for (i = 0; i < countsMap.size(); i++) { diff --git a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/ReverseIndexReaderTest.java b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/ReverseIndexReaderTest.java index 3963fd2d..e05fdf78 100644 --- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/ReverseIndexReaderTest.java +++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/ReverseIndexReaderTest.java @@ -94,7 +94,7 @@ class ReverseIndexReaderTest { private ReverseIndexReader createIndex(EntryDataWithWordMeta... scenario) throws IOException { var reader = journalFactory.createReader(scenario); - var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir, tempDir); + var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir); Path docsFile = tempDir.resolve("docs.dat"); diff --git a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexDocsTest.java b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexDocsTest.java index 517c1ae6..6d3b7bf4 100644 --- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexDocsTest.java +++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexDocsTest.java @@ -20,7 +20,6 @@ class ReversePreindexDocsTest { Path wordsIdFile; Path docsFile; Path tempDir; - SortingContext sortingContext; TestJournalFactory journalFactory; @@ -32,7 +31,6 @@ class ReversePreindexDocsTest { wordsIdFile = Files.createTempFile("words", ".dat"); docsFile = Files.createTempFile("docs", ".dat"); tempDir = Files.createTempDirectory("sort"); - sortingContext = new SortingContext(Path.of("invalid"), 1<<20); } @AfterEach @@ -55,8 +53,8 @@ class ReversePreindexDocsTest { new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33) ); - var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile); - var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), sortingContext, segments); + var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); + var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), segments); List expected = List.of( new TestSegmentData(-100, 0, 2, new long[] { -0xF00BA3L, 0 }), @@ -84,8 +82,8 @@ class ReversePreindexDocsTest { new EntryData(-0xF00BA3L, 0, 4, 4) ); - var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile); - var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), sortingContext, segments); + var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); + var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), segments); List expected = List.of( new TestSegmentData(4, 0, 4, new long[] { -0xF00BA3L, 0, -0xF00BA3L, 0 }) @@ -110,8 +108,8 @@ class ReversePreindexDocsTest { new EntryData(0xF00BA4L, 0, 15, 30, -100, 33) ); - var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile); - var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), sortingContext, segments); + var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); + var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), segments); List expected = List.of( new TestSegmentData(-100, 0, 4, new long[] { -0xF00BA3L, 0, 0xF00BA4L, 0 }), diff --git a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java index 7cf22065..cc79ebac 100644 --- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java +++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java @@ -54,7 +54,7 @@ class ReversePreindexFinalizeTest { @Test public void testFinalizeSimple() throws IOException { var reader = journalFactory.createReader(new EntryDataWithWordMeta(100, 101, wm(50, 51))); - var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir, tempDir); + var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir); preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat")); @@ -92,7 +92,7 @@ class ReversePreindexFinalizeTest { new EntryDataWithWordMeta(101, 101, wm(51, 52)) ); - var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir, tempDir); + var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir); preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat")); preindex.delete(); diff --git a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexMergeTest.java b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexMergeTest.java index 0a772b12..5d9d42f2 100644 --- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexMergeTest.java +++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexMergeTest.java @@ -54,8 +54,8 @@ class ReversePreindexMergeTest { var reader1 = journalFactory.createReader(leftData.toArray(EntryDataWithWordMeta[]::new)); var reader2 = journalFactory.createReader(rightData.toArray(EntryDataWithWordMeta[]::new)); - var left = ReversePreindex.constructPreindex(reader1, DocIdRewriter.identity(), tempDir, tempDir); - var right = ReversePreindex.constructPreindex(reader2, DocIdRewriter.identity(), tempDir, tempDir); + var left = ReversePreindex.constructPreindex(reader1, DocIdRewriter.identity(), tempDir); + var right = ReversePreindex.constructPreindex(reader2, DocIdRewriter.identity(), tempDir); return ReversePreindex.merge(tempDir, left, right); } diff --git a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexWordSegmentsTest.java b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexWordSegmentsTest.java index 72b97996..1f5556ac 100644 --- a/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexWordSegmentsTest.java +++ b/code/features-index/index-reverse/src/test/java/nu/marginalia/index/construction/ReversePreindexWordSegmentsTest.java @@ -22,7 +22,6 @@ class ReversePreindexWordSegmentsTest { Path tempDir; TestJournalFactory journalFactory; - SortingContext sortingContext; @BeforeEach public void setUp() throws IOException { @@ -32,7 +31,6 @@ class ReversePreindexWordSegmentsTest { wordsIdFile = Files.createTempFile("words", ".dat"); docsFile = Files.createTempFile("docs", ".dat"); tempDir = Files.createTempDirectory("sort"); - sortingContext = new SortingContext(Path.of("invalid"), 1<<20); } @AfterEach @@ -54,7 +52,7 @@ class ReversePreindexWordSegmentsTest { new EntryData(-0xF00BA3L, 0, 1L<<33) ); - var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile); + var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); var iter = segments.iterator(1); List expected = List.of( @@ -75,7 +73,7 @@ class ReversePreindexWordSegmentsTest { new EntryData(-0xF00BA3L, 0, 5, 5) ); - var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile); + var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); var iter = segments.iterator(1); List expected = List.of( @@ -97,7 +95,7 @@ class ReversePreindexWordSegmentsTest { new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33) ); - var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile); + var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); var iter = segments.iterator(1); List expected = List.of( @@ -123,7 +121,7 @@ class ReversePreindexWordSegmentsTest { new EntryData(0xF00BA4L, 0, 15, 30, -100, 33) ); - var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile); + var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); var iter = segments.iterator(1); List expected = List.of(