(index-reverse) Add documentation and clean up code.

This commit is contained in:
Viktor Lofgren 2023-08-29 11:35:54 +02:00
parent ba4513e82c
commit a2e6616100
13 changed files with 184 additions and 132 deletions

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 21 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 21 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 29 KiB

View File

@ -12,9 +12,35 @@ The full index also provides access to term-level metadata, while the priority i
[1] See WordFlags in [common/model](../../common/model/) and [1] See WordFlags in [common/model](../../common/model/) and
KeywordMetadata in [features-convert/keyword-extraction](../../features-convert/keyword-extraction). KeywordMetadata in [features-convert/keyword-extraction](../../features-convert/keyword-extraction).
## Construction
The reverse index is constructed by first building a series of preindexes.
Preindexes consist of a Segment and a Documents object. The segment contains
information about which word identifiers are present and how many, and the
documents contain information about in which documents the words can be found.
![Memory layout illustrations](./preindex.svg)
These would typically not fit in RAM, so the index journal is paged
and the preindexes are constructed small enough to fit in memory, and
then merged. Merging sorted arrays is a very fast operation that does
not require additional RAM.
![Illustration of successively merged preindex files](./merging.svg)
Once merged into one large preindex, indexes are added to the preindex data
to form a finalized reverse index.
![Illustration of the data layout of the finalized index](index.svg)
## Central Classes ## Central Classes
* [ReverseIndexFullConverter](src/main/java/nu/marginalia/index/full/ReverseIndexFullConverter.java) constructs the full index. * [ReversePreindex](src/main/java/nu/marginalia/index/construction/ReversePreindex.java) intermediate reverse index state.
* [ReverseIndexFullReader](src/main/java/nu/marginalia/index/full/ReverseIndexFullReader.java) interrogates the full index. * [ReverseIndexConstructor](src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java) constructs the index.
* [ReverseIndexPriorityConverter](src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityConverter.java) constructs the priority index. * [ReverseIndexReader](src/main/java/nu/marginalia/index/ReverseIndexReader.java) interrogates the index.
* [ReverseIndexPriorityReader](src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityReader.java) interrogates the priority index.
## See Also
* [index-journal](../index-journal)
* [index-forward](../index-forward)
* [libraries/btree](../../libraries/btree)
* [libraries/array](../../libraries/array)

View File

@ -32,7 +32,7 @@ public class ReverseIndexConstructor {
for (var input : inputs) { for (var input : inputs) {
logger.info("Construcing preindex from {}", input); logger.info("Construcing preindex from {}", input);
var preindex = ReversePreindex.constructPreindex(readerSource.construct(input), docIdRewriter, tmpDir, tmpDir); var preindex = ReversePreindex.constructPreindex(readerSource.construct(input), docIdRewriter, tmpDir);
preindexes.add(preindex); preindexes.add(preindex);
} }

View File

@ -16,9 +16,18 @@ import java.nio.file.StandardOpenOption;
import static nu.marginalia.array.algo.TwoArrayOperations.*; import static nu.marginalia.array.algo.TwoArrayOperations.*;
/** Contains the data that would go into a reverse index,
* that is, a mapping from words to documents, minus the actual
* index structure that makes the data quick to access while
* searching.
* <p>
* Two preindexes can be merged into a third preindex containing
* the union of their data. This operation requires no additional
* RAM.
*/
public class ReversePreindex { public class ReversePreindex {
public final ReversePreindexWordSegments segments; final ReversePreindexWordSegments segments;
public final ReversePreindexDocuments documents; final ReversePreindexDocuments documents;
private static final Logger logger = LoggerFactory.getLogger(ReversePreindex.class); private static final Logger logger = LoggerFactory.getLogger(ReversePreindex.class);
@ -27,6 +36,26 @@ public class ReversePreindex {
this.documents = documents; this.documents = documents;
} }
/** Constructs a new preindex with the data associated with reader. The backing files
* will have randomly assigned names.
*/
public static ReversePreindex constructPreindex(IndexJournalReader reader,
DocIdRewriter docIdRewriter,
Path destDir) throws IOException
{
Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat");
Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat");
Path docsFile = Files.createTempFile(destDir, "docs", ".dat");
logger.info("Segmenting");
var segments = ReversePreindexWordSegments.construct(reader, segmentWordsFile, segmentCountsFile);
logger.info("Mapping docs");
var docs = ReversePreindexDocuments.construct(docsFile, reader, docIdRewriter, segments);
logger.info("Done");
return new ReversePreindex(segments, docs);
}
/** Transform the preindex into a reverse index */
public void finalizeIndex(Path outputFileDocs, Path outputFileWords) throws IOException { public void finalizeIndex(Path outputFileDocs, Path outputFileWords) throws IOException {
var offsets = segments.counts; var offsets = segments.counts;
@ -72,22 +101,79 @@ public class ReversePreindex {
segments.delete(); segments.delete();
documents.delete(); documents.delete();
} }
public static ReversePreindex constructPreindex(IndexJournalReader reader,
DocIdRewriter docIdRewriter, public static ReversePreindex merge(Path destDir,
Path tempDir, ReversePreindex left,
Path destDir) throws IOException ReversePreindex right) throws IOException {
{
Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat"); ReversePreindexWordSegments mergingSegment =
Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat"); createMergedSegmentWordFile(destDir, left.segments, right.segments);
var mergingIter = mergingSegment.constructionIterator(2);
var leftIter = left.segments.iterator(2);
var rightIter = right.segments.iterator(2);
Path docsFile = Files.createTempFile(destDir, "docs", ".dat"); Path docsFile = Files.createTempFile(destDir, "docs", ".dat");
SortingContext ctx = new SortingContext(tempDir, 1<<31); LongArray mergedDocuments = LongArray.mmapForWriting(docsFile, 8 * (left.documents.size() + right.documents.size()));
logger.info("Segmenting");
var segments = ReversePreindexWordSegments.construct(reader, ctx, segmentWordsFile, segmentCountsFile); leftIter.next();
logger.info("Mapping docs"); rightIter.next();
var docs = ReversePreindexDocuments.construct(docsFile, reader, docIdRewriter, ctx, segments);
logger.info("Done"); try (FileChannel leftChannel = left.documents.createDocumentsFileChannel();
return new ReversePreindex(segments, docs); FileChannel rightChannel = right.documents.createDocumentsFileChannel())
{
while (mergingIter.canPutMore()
&& leftIter.isPositionBeforeEnd()
&& rightIter.isPositionBeforeEnd())
{
final long currentWord = mergingIter.wordId;
if (leftIter.wordId == currentWord && rightIter.wordId == currentWord)
{
// both inputs have documents for the current word
mergeSegments(leftIter, rightIter,
left.documents, right.documents,
mergedDocuments, mergingIter);
}
else if (leftIter.wordId == currentWord) {
if (!copySegment(leftIter, mergedDocuments, leftChannel, mergingIter))
break;
}
else if (rightIter.wordId == currentWord) {
if (!copySegment(rightIter, mergedDocuments, rightChannel, mergingIter))
break;
}
else assert false : "This should never happen"; // the helvetica scenario
}
if (leftIter.isPositionBeforeEnd()) {
while (copySegment(leftIter, mergedDocuments, leftChannel, mergingIter));
}
if (rightIter.isPositionBeforeEnd()) {
while (copySegment(rightIter, mergedDocuments, rightChannel, mergingIter));
}
}
assert !leftIter.isPositionBeforeEnd() : "Left has more to go";
assert !rightIter.isPositionBeforeEnd() : "Right has more to go";
assert !mergingIter.canPutMore() : "Source iters ran dry before merging iter";
// We may have overestimated the size of the merged docs size in the case there were
// duplicates in the data, so we need to shrink it to the actual size we wrote.
mergedDocuments = shrinkMergedDocuments(mergedDocuments,
docsFile, 2 * mergingSegment.totalSize());
mergingSegment.force();
return new ReversePreindex(
mergingSegment,
new ReversePreindexDocuments(mergedDocuments, docsFile)
);
} }
/** Create a segment word file with each word from both inputs, with zero counts for all the data. /** Create a segment word file with each word from both inputs, with zero counts for all the data.
@ -114,79 +200,10 @@ public class ReversePreindex {
return new ReversePreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile); return new ReversePreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile);
} }
public static ReversePreindex merge(Path destDir,
ReversePreindex left,
ReversePreindex right) throws IOException {
ReversePreindexWordSegments mergingSegment = createMergedSegmentWordFile(destDir,
left.segments,
right.segments);
var mergingIter = mergingSegment.constructionIterator(2);
var leftIter = left.segments.iterator(2);
var rightIter = right.segments.iterator(2);
Path docsFile = Files.createTempFile(destDir, "docs", ".dat");
LongArray mergedDocuments = LongArray.mmapForWriting(docsFile, 8 * (left.documents.size() + right.documents.size()));
leftIter.next();
rightIter.next();
FileChannel leftChannel = left.documents.createDocumentsFileChannel();
FileChannel rightChannel = right.documents.createDocumentsFileChannel();
while (mergingIter.canPutMore()
&& leftIter.isPositionBeforeEnd()
&& rightIter.isPositionBeforeEnd())
{
if (leftIter.wordId == mergingIter.wordId
&& rightIter.wordId == mergingIter.wordId) {
mergeSegments(leftIter,
rightIter,
left.documents,
right.documents,
mergedDocuments,
mergingIter);
}
else if (leftIter.wordId == mergingIter.wordId) {
if (!copySegment(leftIter, mergedDocuments, leftChannel, mergingIter))
break;
}
else if (rightIter.wordId == mergingIter.wordId) {
if (!copySegment(rightIter, mergedDocuments, rightChannel, mergingIter))
break;
}
else {
assert false : "This should never happen";
}
}
if (leftIter.isPositionBeforeEnd()) {
while (copySegment(leftIter, mergedDocuments, leftChannel, mergingIter));
}
if (rightIter.isPositionBeforeEnd()) {
while (copySegment(rightIter, mergedDocuments, rightChannel, mergingIter));
}
assert !leftIter.isPositionBeforeEnd() : "Left has more to go";
assert !rightIter.isPositionBeforeEnd() : "Right has more to go";
assert !mergingIter.canPutMore() : "Source iters ran dry before merging iter";
// We may have overestimated the size of the merged docs size in the case there were
// duplicates in the data, so we need to shrink it to the actual size we wrote.
mergedDocuments = shrinkMergedDocuments(mergedDocuments, docsFile, 2 * mergingSegment.totalSize());
mergingSegment.force();
return new ReversePreindex(
mergingSegment,
new ReversePreindexDocuments(mergedDocuments, docsFile)
);
}
/** It's possible we overestimated the necessary size of the documents file,
* this will permit us to shrink it down to the smallest necessary size.
*/
private static LongArray shrinkMergedDocuments(LongArray mergedDocuments, Path docsFile, long sizeLongs) throws IOException { private static LongArray shrinkMergedDocuments(LongArray mergedDocuments, Path docsFile, long sizeLongs) throws IOException {
mergedDocuments.force(); mergedDocuments.force();
@ -205,12 +222,15 @@ public class ReversePreindex {
return mergedDocuments; return mergedDocuments;
} }
/** Merge contents of the segments indicated by leftIter and rightIter into the destionation
* segment, and advance the construction iterator with the appropriate size.
*/
private static void mergeSegments(ReversePreindexWordSegments.SegmentIterator leftIter, private static void mergeSegments(ReversePreindexWordSegments.SegmentIterator leftIter,
ReversePreindexWordSegments.SegmentIterator rightIter, ReversePreindexWordSegments.SegmentIterator rightIter,
ReversePreindexDocuments left, ReversePreindexDocuments left,
ReversePreindexDocuments right, ReversePreindexDocuments right,
LongArray documentsFile, LongArray dest,
ReversePreindexWordSegments.SegmentConstructionIterator mergingIter) ReversePreindexWordSegments.SegmentConstructionIterator destIter)
{ {
long distinct = countDistinctElementsN(2, long distinct = countDistinctElementsN(2,
left.documents, left.documents,
@ -218,29 +238,32 @@ public class ReversePreindex {
leftIter.startOffset, leftIter.endOffset, leftIter.startOffset, leftIter.endOffset,
rightIter.startOffset, rightIter.endOffset); rightIter.startOffset, rightIter.endOffset);
mergeArrays2(documentsFile, mergeArrays2(dest,
left.documents, left.documents,
right.documents, right.documents,
mergingIter.startOffset, destIter.startOffset,
mergingIter.startOffset + 2*distinct, destIter.startOffset + 2*distinct,
leftIter.startOffset, leftIter.endOffset, leftIter.startOffset, leftIter.endOffset,
rightIter.startOffset, rightIter.endOffset); rightIter.startOffset, rightIter.endOffset);
mergingIter.putNext(distinct); destIter.putNext(distinct);
leftIter.next(); leftIter.next();
rightIter.next(); rightIter.next();
} }
/** Copy the data from the source segment at the position and length indicated by sourceIter,
* into the destination segment, and advance the construction iterator.
*/
private static boolean copySegment(ReversePreindexWordSegments.SegmentIterator sourceIter, private static boolean copySegment(ReversePreindexWordSegments.SegmentIterator sourceIter,
LongArray documentsFile, LongArray dest,
FileChannel leftChannel, FileChannel sourceChannel,
ReversePreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException { ReversePreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException {
long size = sourceIter.endOffset - sourceIter.startOffset; long size = sourceIter.endOffset - sourceIter.startOffset;
long start = mergingIter.startOffset; long start = mergingIter.startOffset;
long end = start + size; long end = start + size;
documentsFile.transferFrom(leftChannel, dest.transferFrom(sourceChannel,
sourceIter.startOffset, sourceIter.startOffset,
mergingIter.startOffset, mergingIter.startOffset,
end); end);
@ -248,12 +271,9 @@ public class ReversePreindex {
boolean putNext = mergingIter.putNext(size / 2); boolean putNext = mergingIter.putNext(size / 2);
boolean iterNext = sourceIter.next(); boolean iterNext = sourceIter.next();
if (!putNext) { assert putNext || !iterNext : "Source iterator ran out before dest iterator?!";
assert !iterNext: "Source iterator ran out before dest iterator?!";
}
return iterNext; return iterNext;
} }

View File

@ -34,7 +34,6 @@ public class ReversePreindexDocuments {
Path docsFile, Path docsFile,
IndexJournalReader reader, IndexJournalReader reader,
DocIdRewriter docIdRewriter, DocIdRewriter docIdRewriter,
SortingContext sortingContext,
ReversePreindexWordSegments segments) throws IOException { ReversePreindexWordSegments segments) throws IOException {
@ -43,7 +42,7 @@ public class ReversePreindexDocuments {
LongArray docsFileMap = LongArray.mmapForWriting(docsFile, 8 * Files.size(docsFile)); LongArray docsFileMap = LongArray.mmapForWriting(docsFile, 8 * Files.size(docsFile));
logger.info("Sorting data"); logger.info("Sorting data");
sortDocsFile(docsFileMap, segments, sortingContext); sortDocsFile(docsFileMap, segments);
return new ReversePreindexDocuments(docsFileMap, docsFile); return new ReversePreindexDocuments(docsFileMap, docsFile);
} }
@ -90,7 +89,7 @@ public class ReversePreindexDocuments {
} }
@SneakyThrows @SneakyThrows
private static void sortDocsFile(LongArray docsFileMap, ReversePreindexWordSegments segments, SortingContext sortingContext) throws IOException { private static void sortDocsFile(LongArray docsFileMap, ReversePreindexWordSegments segments) throws IOException {
var iter = segments.iterator(RECORD_SIZE_LONGS); var iter = segments.iterator(RECORD_SIZE_LONGS);

View File

@ -51,7 +51,6 @@ public class ReversePreindexWordSegments {
} }
public static ReversePreindexWordSegments construct(IndexJournalReader reader, public static ReversePreindexWordSegments construct(IndexJournalReader reader,
SortingContext ctx,
Path wordIdsFile, Path wordIdsFile,
Path countsFile) Path countsFile)
throws IOException throws IOException
@ -73,7 +72,7 @@ public class ReversePreindexWordSegments {
} }
// Sort the words file // Sort the words file
words.sortLargeSpan(ctx, 0, counts.size()); words.quickSort(0, counts.size());
// Populate the counts // Populate the counts
for (i = 0; i < countsMap.size(); i++) { for (i = 0; i < countsMap.size(); i++) {

View File

@ -94,7 +94,7 @@ class ReverseIndexReaderTest {
private ReverseIndexReader createIndex(EntryDataWithWordMeta... scenario) throws IOException { private ReverseIndexReader createIndex(EntryDataWithWordMeta... scenario) throws IOException {
var reader = journalFactory.createReader(scenario); var reader = journalFactory.createReader(scenario);
var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir, tempDir); var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir);
Path docsFile = tempDir.resolve("docs.dat"); Path docsFile = tempDir.resolve("docs.dat");

View File

@ -20,7 +20,6 @@ class ReversePreindexDocsTest {
Path wordsIdFile; Path wordsIdFile;
Path docsFile; Path docsFile;
Path tempDir; Path tempDir;
SortingContext sortingContext;
TestJournalFactory journalFactory; TestJournalFactory journalFactory;
@ -32,7 +31,6 @@ class ReversePreindexDocsTest {
wordsIdFile = Files.createTempFile("words", ".dat"); wordsIdFile = Files.createTempFile("words", ".dat");
docsFile = Files.createTempFile("docs", ".dat"); docsFile = Files.createTempFile("docs", ".dat");
tempDir = Files.createTempDirectory("sort"); tempDir = Files.createTempDirectory("sort");
sortingContext = new SortingContext(Path.of("invalid"), 1<<20);
} }
@AfterEach @AfterEach
@ -55,8 +53,8 @@ class ReversePreindexDocsTest {
new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33) new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33)
); );
var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile); var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), sortingContext, segments); var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), segments);
List<TestSegmentData> expected = List.of( List<TestSegmentData> expected = List.of(
new TestSegmentData(-100, 0, 2, new long[] { -0xF00BA3L, 0 }), new TestSegmentData(-100, 0, 2, new long[] { -0xF00BA3L, 0 }),
@ -84,8 +82,8 @@ class ReversePreindexDocsTest {
new EntryData(-0xF00BA3L, 0, 4, 4) new EntryData(-0xF00BA3L, 0, 4, 4)
); );
var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile); var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), sortingContext, segments); var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), segments);
List<TestSegmentData> expected = List.of( List<TestSegmentData> expected = List.of(
new TestSegmentData(4, 0, 4, new long[] { -0xF00BA3L, 0, -0xF00BA3L, 0 }) new TestSegmentData(4, 0, 4, new long[] { -0xF00BA3L, 0, -0xF00BA3L, 0 })
@ -110,8 +108,8 @@ class ReversePreindexDocsTest {
new EntryData(0xF00BA4L, 0, 15, 30, -100, 33) new EntryData(0xF00BA4L, 0, 15, 30, -100, 33)
); );
var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile); var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), sortingContext, segments); var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), segments);
List<TestSegmentData> expected = List.of( List<TestSegmentData> expected = List.of(
new TestSegmentData(-100, 0, 4, new long[] { -0xF00BA3L, 0, 0xF00BA4L, 0 }), new TestSegmentData(-100, 0, 4, new long[] { -0xF00BA3L, 0, 0xF00BA4L, 0 }),

View File

@ -54,7 +54,7 @@ class ReversePreindexFinalizeTest {
@Test @Test
public void testFinalizeSimple() throws IOException { public void testFinalizeSimple() throws IOException {
var reader = journalFactory.createReader(new EntryDataWithWordMeta(100, 101, wm(50, 51))); var reader = journalFactory.createReader(new EntryDataWithWordMeta(100, 101, wm(50, 51)));
var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir, tempDir); var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir);
preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat")); preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat"));
@ -92,7 +92,7 @@ class ReversePreindexFinalizeTest {
new EntryDataWithWordMeta(101, 101, wm(51, 52)) new EntryDataWithWordMeta(101, 101, wm(51, 52))
); );
var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir, tempDir); var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir);
preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat")); preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat"));
preindex.delete(); preindex.delete();

View File

@ -54,8 +54,8 @@ class ReversePreindexMergeTest {
var reader1 = journalFactory.createReader(leftData.toArray(EntryDataWithWordMeta[]::new)); var reader1 = journalFactory.createReader(leftData.toArray(EntryDataWithWordMeta[]::new));
var reader2 = journalFactory.createReader(rightData.toArray(EntryDataWithWordMeta[]::new)); var reader2 = journalFactory.createReader(rightData.toArray(EntryDataWithWordMeta[]::new));
var left = ReversePreindex.constructPreindex(reader1, DocIdRewriter.identity(), tempDir, tempDir); var left = ReversePreindex.constructPreindex(reader1, DocIdRewriter.identity(), tempDir);
var right = ReversePreindex.constructPreindex(reader2, DocIdRewriter.identity(), tempDir, tempDir); var right = ReversePreindex.constructPreindex(reader2, DocIdRewriter.identity(), tempDir);
return ReversePreindex.merge(tempDir, left, right); return ReversePreindex.merge(tempDir, left, right);
} }

View File

@ -22,7 +22,6 @@ class ReversePreindexWordSegmentsTest {
Path tempDir; Path tempDir;
TestJournalFactory journalFactory; TestJournalFactory journalFactory;
SortingContext sortingContext;
@BeforeEach @BeforeEach
public void setUp() throws IOException { public void setUp() throws IOException {
@ -32,7 +31,6 @@ class ReversePreindexWordSegmentsTest {
wordsIdFile = Files.createTempFile("words", ".dat"); wordsIdFile = Files.createTempFile("words", ".dat");
docsFile = Files.createTempFile("docs", ".dat"); docsFile = Files.createTempFile("docs", ".dat");
tempDir = Files.createTempDirectory("sort"); tempDir = Files.createTempDirectory("sort");
sortingContext = new SortingContext(Path.of("invalid"), 1<<20);
} }
@AfterEach @AfterEach
@ -54,7 +52,7 @@ class ReversePreindexWordSegmentsTest {
new EntryData(-0xF00BA3L, 0, 1L<<33) new EntryData(-0xF00BA3L, 0, 1L<<33)
); );
var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile); var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
var iter = segments.iterator(1); var iter = segments.iterator(1);
List<TestSegmentData> expected = List.of( List<TestSegmentData> expected = List.of(
@ -75,7 +73,7 @@ class ReversePreindexWordSegmentsTest {
new EntryData(-0xF00BA3L, 0, 5, 5) new EntryData(-0xF00BA3L, 0, 5, 5)
); );
var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile); var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
var iter = segments.iterator(1); var iter = segments.iterator(1);
List<TestSegmentData> expected = List.of( List<TestSegmentData> expected = List.of(
@ -97,7 +95,7 @@ class ReversePreindexWordSegmentsTest {
new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33) new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33)
); );
var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile); var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
var iter = segments.iterator(1); var iter = segments.iterator(1);
List<TestSegmentData> expected = List.of( List<TestSegmentData> expected = List.of(
@ -123,7 +121,7 @@ class ReversePreindexWordSegmentsTest {
new EntryData(0xF00BA4L, 0, 15, 30, -100, 33) new EntryData(0xF00BA4L, 0, 15, 30, -100, 33)
); );
var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile); var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
var iter = segments.iterator(1); var iter = segments.iterator(1);
List<TestSegmentData> expected = List.of( List<TestSegmentData> expected = List.of(