mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(index-reverse) Add documentation and clean up code.
This commit is contained in:
parent
ba4513e82c
commit
a2e6616100
4
code/features-index/index-reverse/index.svg
Normal file
4
code/features-index/index-reverse/index.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 21 KiB |
4
code/features-index/index-reverse/merging.svg
Normal file
4
code/features-index/index-reverse/merging.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 21 KiB |
4
code/features-index/index-reverse/preindex.svg
Normal file
4
code/features-index/index-reverse/preindex.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 29 KiB |
@ -12,9 +12,35 @@ The full index also provides access to term-level metadata, while the priority i
|
||||
[1] See WordFlags in [common/model](../../common/model/) and
|
||||
KeywordMetadata in [features-convert/keyword-extraction](../../features-convert/keyword-extraction).
|
||||
|
||||
## Construction
|
||||
|
||||
The reverse index is constructed by first building a series of preindexes.
|
||||
Preindexes consist of a Segment and a Documents object. The segment contains
|
||||
information about which word identifiers are present and how many, and the
|
||||
documents contain information about in which documents the words can be found.
|
||||
|
||||

|
||||
|
||||
These would typically not fit in RAM, so the index journal is paged
|
||||
and the preindexes are constructed small enough to fit in memory, and
|
||||
then merged. Merging sorted arrays is a very fast operation that does
|
||||
not require additional RAM.
|
||||
|
||||

|
||||
|
||||
Once merged into one large preindex, indexes are added to the preindex data
|
||||
to form a finalized reverse index.
|
||||
|
||||

|
||||
## Central Classes
|
||||
|
||||
* [ReverseIndexFullConverter](src/main/java/nu/marginalia/index/full/ReverseIndexFullConverter.java) constructs the full index.
|
||||
* [ReverseIndexFullReader](src/main/java/nu/marginalia/index/full/ReverseIndexFullReader.java) interrogates the full index.
|
||||
* [ReverseIndexPriorityConverter](src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityConverter.java) constructs the priority index.
|
||||
* [ReverseIndexPriorityReader](src/main/java/nu/marginalia/index/priority/ReverseIndexPriorityReader.java) interrogates the priority index.
|
||||
* [ReversePreindex](src/main/java/nu/marginalia/index/construction/ReversePreindex.java) intermediate reverse index state.
|
||||
* [ReverseIndexConstructor](src/main/java/nu/marginalia/index/construction/ReverseIndexConstructor.java) constructs the index.
|
||||
* [ReverseIndexReader](src/main/java/nu/marginalia/index/ReverseIndexReader.java) interrogates the index.
|
||||
|
||||
## See Also
|
||||
|
||||
* [index-journal](../index-journal)
|
||||
* [index-forward](../index-forward)
|
||||
* [libraries/btree](../../libraries/btree)
|
||||
* [libraries/array](../../libraries/array)
|
@ -32,7 +32,7 @@ public class ReverseIndexConstructor {
|
||||
|
||||
for (var input : inputs) {
|
||||
logger.info("Construcing preindex from {}", input);
|
||||
var preindex = ReversePreindex.constructPreindex(readerSource.construct(input), docIdRewriter, tmpDir, tmpDir);
|
||||
var preindex = ReversePreindex.constructPreindex(readerSource.construct(input), docIdRewriter, tmpDir);
|
||||
preindexes.add(preindex);
|
||||
}
|
||||
|
||||
|
@ -16,9 +16,18 @@ import java.nio.file.StandardOpenOption;
|
||||
|
||||
import static nu.marginalia.array.algo.TwoArrayOperations.*;
|
||||
|
||||
/** Contains the data that would go into a reverse index,
|
||||
* that is, a mapping from words to documents, minus the actual
|
||||
* index structure that makes the data quick to access while
|
||||
* searching.
|
||||
* <p>
|
||||
* Two preindexes can be merged into a third preindex containing
|
||||
* the union of their data. This operation requires no additional
|
||||
* RAM.
|
||||
*/
|
||||
public class ReversePreindex {
|
||||
public final ReversePreindexWordSegments segments;
|
||||
public final ReversePreindexDocuments documents;
|
||||
final ReversePreindexWordSegments segments;
|
||||
final ReversePreindexDocuments documents;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(ReversePreindex.class);
|
||||
|
||||
@ -27,6 +36,26 @@ public class ReversePreindex {
|
||||
this.documents = documents;
|
||||
}
|
||||
|
||||
/** Constructs a new preindex with the data associated with reader. The backing files
|
||||
* will have randomly assigned names.
|
||||
*/
|
||||
public static ReversePreindex constructPreindex(IndexJournalReader reader,
|
||||
DocIdRewriter docIdRewriter,
|
||||
Path destDir) throws IOException
|
||||
{
|
||||
Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat");
|
||||
Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat");
|
||||
Path docsFile = Files.createTempFile(destDir, "docs", ".dat");
|
||||
|
||||
logger.info("Segmenting");
|
||||
var segments = ReversePreindexWordSegments.construct(reader, segmentWordsFile, segmentCountsFile);
|
||||
logger.info("Mapping docs");
|
||||
var docs = ReversePreindexDocuments.construct(docsFile, reader, docIdRewriter, segments);
|
||||
logger.info("Done");
|
||||
return new ReversePreindex(segments, docs);
|
||||
}
|
||||
|
||||
/** Transform the preindex into a reverse index */
|
||||
public void finalizeIndex(Path outputFileDocs, Path outputFileWords) throws IOException {
|
||||
var offsets = segments.counts;
|
||||
|
||||
@ -72,30 +101,87 @@ public class ReversePreindex {
|
||||
segments.delete();
|
||||
documents.delete();
|
||||
}
|
||||
public static ReversePreindex constructPreindex(IndexJournalReader reader,
|
||||
DocIdRewriter docIdRewriter,
|
||||
Path tempDir,
|
||||
Path destDir) throws IOException
|
||||
{
|
||||
Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat");
|
||||
Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat");
|
||||
|
||||
public static ReversePreindex merge(Path destDir,
|
||||
ReversePreindex left,
|
||||
ReversePreindex right) throws IOException {
|
||||
|
||||
ReversePreindexWordSegments mergingSegment =
|
||||
createMergedSegmentWordFile(destDir, left.segments, right.segments);
|
||||
|
||||
var mergingIter = mergingSegment.constructionIterator(2);
|
||||
var leftIter = left.segments.iterator(2);
|
||||
var rightIter = right.segments.iterator(2);
|
||||
|
||||
Path docsFile = Files.createTempFile(destDir, "docs", ".dat");
|
||||
|
||||
SortingContext ctx = new SortingContext(tempDir, 1<<31);
|
||||
logger.info("Segmenting");
|
||||
var segments = ReversePreindexWordSegments.construct(reader, ctx, segmentWordsFile, segmentCountsFile);
|
||||
logger.info("Mapping docs");
|
||||
var docs = ReversePreindexDocuments.construct(docsFile, reader, docIdRewriter, ctx, segments);
|
||||
logger.info("Done");
|
||||
return new ReversePreindex(segments, docs);
|
||||
LongArray mergedDocuments = LongArray.mmapForWriting(docsFile, 8 * (left.documents.size() + right.documents.size()));
|
||||
|
||||
leftIter.next();
|
||||
rightIter.next();
|
||||
|
||||
try (FileChannel leftChannel = left.documents.createDocumentsFileChannel();
|
||||
FileChannel rightChannel = right.documents.createDocumentsFileChannel())
|
||||
{
|
||||
|
||||
while (mergingIter.canPutMore()
|
||||
&& leftIter.isPositionBeforeEnd()
|
||||
&& rightIter.isPositionBeforeEnd())
|
||||
{
|
||||
final long currentWord = mergingIter.wordId;
|
||||
|
||||
if (leftIter.wordId == currentWord && rightIter.wordId == currentWord)
|
||||
{
|
||||
// both inputs have documents for the current word
|
||||
mergeSegments(leftIter, rightIter,
|
||||
left.documents, right.documents,
|
||||
mergedDocuments, mergingIter);
|
||||
}
|
||||
else if (leftIter.wordId == currentWord) {
|
||||
if (!copySegment(leftIter, mergedDocuments, leftChannel, mergingIter))
|
||||
break;
|
||||
}
|
||||
else if (rightIter.wordId == currentWord) {
|
||||
if (!copySegment(rightIter, mergedDocuments, rightChannel, mergingIter))
|
||||
break;
|
||||
}
|
||||
else assert false : "This should never happen"; // the helvetica scenario
|
||||
}
|
||||
|
||||
if (leftIter.isPositionBeforeEnd()) {
|
||||
while (copySegment(leftIter, mergedDocuments, leftChannel, mergingIter));
|
||||
}
|
||||
|
||||
if (rightIter.isPositionBeforeEnd()) {
|
||||
while (copySegment(rightIter, mergedDocuments, rightChannel, mergingIter));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
assert !leftIter.isPositionBeforeEnd() : "Left has more to go";
|
||||
assert !rightIter.isPositionBeforeEnd() : "Right has more to go";
|
||||
assert !mergingIter.canPutMore() : "Source iters ran dry before merging iter";
|
||||
|
||||
// We may have overestimated the size of the merged docs size in the case there were
|
||||
// duplicates in the data, so we need to shrink it to the actual size we wrote.
|
||||
|
||||
mergedDocuments = shrinkMergedDocuments(mergedDocuments,
|
||||
docsFile, 2 * mergingSegment.totalSize());
|
||||
|
||||
mergingSegment.force();
|
||||
|
||||
return new ReversePreindex(
|
||||
mergingSegment,
|
||||
new ReversePreindexDocuments(mergedDocuments, docsFile)
|
||||
);
|
||||
}
|
||||
|
||||
/** Create a segment word file with each word from both inputs, with zero counts for all the data.
|
||||
* This is an intermediate product in merging.
|
||||
*/
|
||||
static ReversePreindexWordSegments createMergedSegmentWordFile(Path destDir,
|
||||
ReversePreindexWordSegments left,
|
||||
ReversePreindexWordSegments right) throws IOException {
|
||||
ReversePreindexWordSegments left,
|
||||
ReversePreindexWordSegments right) throws IOException {
|
||||
Path segmentWordsFile = Files.createTempFile(destDir, "segment_words", ".dat");
|
||||
Path segmentCountsFile = Files.createTempFile(destDir, "segment_counts", ".dat");
|
||||
|
||||
@ -114,79 +200,10 @@ public class ReversePreindex {
|
||||
|
||||
return new ReversePreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile);
|
||||
}
|
||||
public static ReversePreindex merge(Path destDir,
|
||||
ReversePreindex left,
|
||||
ReversePreindex right) throws IOException {
|
||||
|
||||
ReversePreindexWordSegments mergingSegment = createMergedSegmentWordFile(destDir,
|
||||
left.segments,
|
||||
right.segments);
|
||||
|
||||
var mergingIter = mergingSegment.constructionIterator(2);
|
||||
var leftIter = left.segments.iterator(2);
|
||||
var rightIter = right.segments.iterator(2);
|
||||
|
||||
Path docsFile = Files.createTempFile(destDir, "docs", ".dat");
|
||||
|
||||
LongArray mergedDocuments = LongArray.mmapForWriting(docsFile, 8 * (left.documents.size() + right.documents.size()));
|
||||
|
||||
leftIter.next();
|
||||
rightIter.next();
|
||||
|
||||
FileChannel leftChannel = left.documents.createDocumentsFileChannel();
|
||||
FileChannel rightChannel = right.documents.createDocumentsFileChannel();
|
||||
|
||||
while (mergingIter.canPutMore()
|
||||
&& leftIter.isPositionBeforeEnd()
|
||||
&& rightIter.isPositionBeforeEnd())
|
||||
{
|
||||
if (leftIter.wordId == mergingIter.wordId
|
||||
&& rightIter.wordId == mergingIter.wordId) {
|
||||
mergeSegments(leftIter,
|
||||
rightIter,
|
||||
left.documents,
|
||||
right.documents,
|
||||
mergedDocuments,
|
||||
mergingIter);
|
||||
}
|
||||
else if (leftIter.wordId == mergingIter.wordId) {
|
||||
if (!copySegment(leftIter, mergedDocuments, leftChannel, mergingIter))
|
||||
break;
|
||||
}
|
||||
else if (rightIter.wordId == mergingIter.wordId) {
|
||||
if (!copySegment(rightIter, mergedDocuments, rightChannel, mergingIter))
|
||||
break;
|
||||
}
|
||||
else {
|
||||
assert false : "This should never happen";
|
||||
}
|
||||
}
|
||||
|
||||
if (leftIter.isPositionBeforeEnd()) {
|
||||
while (copySegment(leftIter, mergedDocuments, leftChannel, mergingIter));
|
||||
|
||||
}
|
||||
if (rightIter.isPositionBeforeEnd()) {
|
||||
while (copySegment(rightIter, mergedDocuments, rightChannel, mergingIter));
|
||||
}
|
||||
|
||||
assert !leftIter.isPositionBeforeEnd() : "Left has more to go";
|
||||
assert !rightIter.isPositionBeforeEnd() : "Right has more to go";
|
||||
assert !mergingIter.canPutMore() : "Source iters ran dry before merging iter";
|
||||
|
||||
// We may have overestimated the size of the merged docs size in the case there were
|
||||
// duplicates in the data, so we need to shrink it to the actual size we wrote.
|
||||
|
||||
mergedDocuments = shrinkMergedDocuments(mergedDocuments, docsFile, 2 * mergingSegment.totalSize());
|
||||
|
||||
mergingSegment.force();
|
||||
|
||||
return new ReversePreindex(
|
||||
mergingSegment,
|
||||
new ReversePreindexDocuments(mergedDocuments, docsFile)
|
||||
);
|
||||
}
|
||||
|
||||
/** It's possible we overestimated the necessary size of the documents file,
|
||||
* this will permit us to shrink it down to the smallest necessary size.
|
||||
*/
|
||||
private static LongArray shrinkMergedDocuments(LongArray mergedDocuments, Path docsFile, long sizeLongs) throws IOException {
|
||||
|
||||
mergedDocuments.force();
|
||||
@ -205,12 +222,15 @@ public class ReversePreindex {
|
||||
return mergedDocuments;
|
||||
}
|
||||
|
||||
/** Merge contents of the segments indicated by leftIter and rightIter into the destionation
|
||||
* segment, and advance the construction iterator with the appropriate size.
|
||||
*/
|
||||
private static void mergeSegments(ReversePreindexWordSegments.SegmentIterator leftIter,
|
||||
ReversePreindexWordSegments.SegmentIterator rightIter,
|
||||
ReversePreindexDocuments left,
|
||||
ReversePreindexDocuments right,
|
||||
LongArray documentsFile,
|
||||
ReversePreindexWordSegments.SegmentConstructionIterator mergingIter)
|
||||
LongArray dest,
|
||||
ReversePreindexWordSegments.SegmentConstructionIterator destIter)
|
||||
{
|
||||
long distinct = countDistinctElementsN(2,
|
||||
left.documents,
|
||||
@ -218,29 +238,32 @@ public class ReversePreindex {
|
||||
leftIter.startOffset, leftIter.endOffset,
|
||||
rightIter.startOffset, rightIter.endOffset);
|
||||
|
||||
mergeArrays2(documentsFile,
|
||||
mergeArrays2(dest,
|
||||
left.documents,
|
||||
right.documents,
|
||||
mergingIter.startOffset,
|
||||
mergingIter.startOffset + 2*distinct,
|
||||
destIter.startOffset,
|
||||
destIter.startOffset + 2*distinct,
|
||||
leftIter.startOffset, leftIter.endOffset,
|
||||
rightIter.startOffset, rightIter.endOffset);
|
||||
|
||||
mergingIter.putNext(distinct);
|
||||
destIter.putNext(distinct);
|
||||
leftIter.next();
|
||||
rightIter.next();
|
||||
}
|
||||
|
||||
/** Copy the data from the source segment at the position and length indicated by sourceIter,
|
||||
* into the destination segment, and advance the construction iterator.
|
||||
*/
|
||||
private static boolean copySegment(ReversePreindexWordSegments.SegmentIterator sourceIter,
|
||||
LongArray documentsFile,
|
||||
FileChannel leftChannel,
|
||||
LongArray dest,
|
||||
FileChannel sourceChannel,
|
||||
ReversePreindexWordSegments.SegmentConstructionIterator mergingIter) throws IOException {
|
||||
|
||||
long size = sourceIter.endOffset - sourceIter.startOffset;
|
||||
long start = mergingIter.startOffset;
|
||||
long end = start + size;
|
||||
|
||||
documentsFile.transferFrom(leftChannel,
|
||||
dest.transferFrom(sourceChannel,
|
||||
sourceIter.startOffset,
|
||||
mergingIter.startOffset,
|
||||
end);
|
||||
@ -248,12 +271,9 @@ public class ReversePreindex {
|
||||
boolean putNext = mergingIter.putNext(size / 2);
|
||||
boolean iterNext = sourceIter.next();
|
||||
|
||||
if (!putNext) {
|
||||
assert !iterNext: "Source iterator ran out before dest iterator?!";
|
||||
}
|
||||
assert putNext || !iterNext : "Source iterator ran out before dest iterator?!";
|
||||
|
||||
return iterNext;
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
@ -34,7 +34,6 @@ public class ReversePreindexDocuments {
|
||||
Path docsFile,
|
||||
IndexJournalReader reader,
|
||||
DocIdRewriter docIdRewriter,
|
||||
SortingContext sortingContext,
|
||||
ReversePreindexWordSegments segments) throws IOException {
|
||||
|
||||
|
||||
@ -43,7 +42,7 @@ public class ReversePreindexDocuments {
|
||||
|
||||
LongArray docsFileMap = LongArray.mmapForWriting(docsFile, 8 * Files.size(docsFile));
|
||||
logger.info("Sorting data");
|
||||
sortDocsFile(docsFileMap, segments, sortingContext);
|
||||
sortDocsFile(docsFileMap, segments);
|
||||
|
||||
return new ReversePreindexDocuments(docsFileMap, docsFile);
|
||||
}
|
||||
@ -90,7 +89,7 @@ public class ReversePreindexDocuments {
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
private static void sortDocsFile(LongArray docsFileMap, ReversePreindexWordSegments segments, SortingContext sortingContext) throws IOException {
|
||||
private static void sortDocsFile(LongArray docsFileMap, ReversePreindexWordSegments segments) throws IOException {
|
||||
|
||||
var iter = segments.iterator(RECORD_SIZE_LONGS);
|
||||
|
||||
|
@ -51,7 +51,6 @@ public class ReversePreindexWordSegments {
|
||||
}
|
||||
|
||||
public static ReversePreindexWordSegments construct(IndexJournalReader reader,
|
||||
SortingContext ctx,
|
||||
Path wordIdsFile,
|
||||
Path countsFile)
|
||||
throws IOException
|
||||
@ -73,7 +72,7 @@ public class ReversePreindexWordSegments {
|
||||
}
|
||||
|
||||
// Sort the words file
|
||||
words.sortLargeSpan(ctx, 0, counts.size());
|
||||
words.quickSort(0, counts.size());
|
||||
|
||||
// Populate the counts
|
||||
for (i = 0; i < countsMap.size(); i++) {
|
||||
|
@ -94,7 +94,7 @@ class ReverseIndexReaderTest {
|
||||
|
||||
private ReverseIndexReader createIndex(EntryDataWithWordMeta... scenario) throws IOException {
|
||||
var reader = journalFactory.createReader(scenario);
|
||||
var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir, tempDir);
|
||||
var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir);
|
||||
|
||||
|
||||
Path docsFile = tempDir.resolve("docs.dat");
|
||||
|
@ -20,7 +20,6 @@ class ReversePreindexDocsTest {
|
||||
Path wordsIdFile;
|
||||
Path docsFile;
|
||||
Path tempDir;
|
||||
SortingContext sortingContext;
|
||||
|
||||
TestJournalFactory journalFactory;
|
||||
|
||||
@ -32,7 +31,6 @@ class ReversePreindexDocsTest {
|
||||
wordsIdFile = Files.createTempFile("words", ".dat");
|
||||
docsFile = Files.createTempFile("docs", ".dat");
|
||||
tempDir = Files.createTempDirectory("sort");
|
||||
sortingContext = new SortingContext(Path.of("invalid"), 1<<20);
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
@ -55,8 +53,8 @@ class ReversePreindexDocsTest {
|
||||
new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33)
|
||||
);
|
||||
|
||||
var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile);
|
||||
var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), sortingContext, segments);
|
||||
var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
|
||||
var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), segments);
|
||||
|
||||
List<TestSegmentData> expected = List.of(
|
||||
new TestSegmentData(-100, 0, 2, new long[] { -0xF00BA3L, 0 }),
|
||||
@ -84,8 +82,8 @@ class ReversePreindexDocsTest {
|
||||
new EntryData(-0xF00BA3L, 0, 4, 4)
|
||||
);
|
||||
|
||||
var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile);
|
||||
var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), sortingContext, segments);
|
||||
var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
|
||||
var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), segments);
|
||||
|
||||
List<TestSegmentData> expected = List.of(
|
||||
new TestSegmentData(4, 0, 4, new long[] { -0xF00BA3L, 0, -0xF00BA3L, 0 })
|
||||
@ -110,8 +108,8 @@ class ReversePreindexDocsTest {
|
||||
new EntryData(0xF00BA4L, 0, 15, 30, -100, 33)
|
||||
);
|
||||
|
||||
var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile);
|
||||
var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), sortingContext, segments);
|
||||
var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
|
||||
var docs = ReversePreindexDocuments.construct(docsFile, reader, DocIdRewriter.identity(), segments);
|
||||
|
||||
List<TestSegmentData> expected = List.of(
|
||||
new TestSegmentData(-100, 0, 4, new long[] { -0xF00BA3L, 0, 0xF00BA4L, 0 }),
|
||||
|
@ -54,7 +54,7 @@ class ReversePreindexFinalizeTest {
|
||||
@Test
|
||||
public void testFinalizeSimple() throws IOException {
|
||||
var reader = journalFactory.createReader(new EntryDataWithWordMeta(100, 101, wm(50, 51)));
|
||||
var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir, tempDir);
|
||||
var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir);
|
||||
|
||||
|
||||
preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat"));
|
||||
@ -92,7 +92,7 @@ class ReversePreindexFinalizeTest {
|
||||
new EntryDataWithWordMeta(101, 101, wm(51, 52))
|
||||
);
|
||||
|
||||
var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir, tempDir);
|
||||
var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir);
|
||||
|
||||
preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat"));
|
||||
preindex.delete();
|
||||
|
@ -54,8 +54,8 @@ class ReversePreindexMergeTest {
|
||||
var reader1 = journalFactory.createReader(leftData.toArray(EntryDataWithWordMeta[]::new));
|
||||
var reader2 = journalFactory.createReader(rightData.toArray(EntryDataWithWordMeta[]::new));
|
||||
|
||||
var left = ReversePreindex.constructPreindex(reader1, DocIdRewriter.identity(), tempDir, tempDir);
|
||||
var right = ReversePreindex.constructPreindex(reader2, DocIdRewriter.identity(), tempDir, tempDir);
|
||||
var left = ReversePreindex.constructPreindex(reader1, DocIdRewriter.identity(), tempDir);
|
||||
var right = ReversePreindex.constructPreindex(reader2, DocIdRewriter.identity(), tempDir);
|
||||
return ReversePreindex.merge(tempDir, left, right);
|
||||
}
|
||||
|
||||
|
@ -22,7 +22,6 @@ class ReversePreindexWordSegmentsTest {
|
||||
Path tempDir;
|
||||
|
||||
TestJournalFactory journalFactory;
|
||||
SortingContext sortingContext;
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws IOException {
|
||||
@ -32,7 +31,6 @@ class ReversePreindexWordSegmentsTest {
|
||||
wordsIdFile = Files.createTempFile("words", ".dat");
|
||||
docsFile = Files.createTempFile("docs", ".dat");
|
||||
tempDir = Files.createTempDirectory("sort");
|
||||
sortingContext = new SortingContext(Path.of("invalid"), 1<<20);
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
@ -54,7 +52,7 @@ class ReversePreindexWordSegmentsTest {
|
||||
new EntryData(-0xF00BA3L, 0, 1L<<33)
|
||||
);
|
||||
|
||||
var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile);
|
||||
var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
|
||||
var iter = segments.iterator(1);
|
||||
|
||||
List<TestSegmentData> expected = List.of(
|
||||
@ -75,7 +73,7 @@ class ReversePreindexWordSegmentsTest {
|
||||
new EntryData(-0xF00BA3L, 0, 5, 5)
|
||||
);
|
||||
|
||||
var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile);
|
||||
var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
|
||||
var iter = segments.iterator(1);
|
||||
|
||||
List<TestSegmentData> expected = List.of(
|
||||
@ -97,7 +95,7 @@ class ReversePreindexWordSegmentsTest {
|
||||
new EntryData(-0xF00BA3L, 0, 10, 40, -100, 33)
|
||||
);
|
||||
|
||||
var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile);
|
||||
var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
|
||||
var iter = segments.iterator(1);
|
||||
|
||||
List<TestSegmentData> expected = List.of(
|
||||
@ -123,7 +121,7 @@ class ReversePreindexWordSegmentsTest {
|
||||
new EntryData(0xF00BA4L, 0, 15, 30, -100, 33)
|
||||
);
|
||||
|
||||
var segments = ReversePreindexWordSegments.construct(reader, sortingContext, wordsIdFile, countsFile);
|
||||
var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile);
|
||||
var iter = segments.iterator(1);
|
||||
|
||||
List<TestSegmentData> expected = List.of(
|
||||
|
Loading…
Reference in New Issue
Block a user