From dcbec9414f3f4e49ddc3fe75156b13414fea647f Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Thu, 6 Jun 2024 16:35:09 +0200 Subject: [PATCH] (index) Fix non-compiling tests --- .../construction/PositionsFileConstructor.java | 14 ++++++++++++++ .../construction/ReversePreindexDocsTest.java | 15 ++++++++++++--- .../construction/ReversePreindexFinalizeTest.java | 11 ++++++++--- .../construction/ReversePreindexMergeTest.java | 6 ++++-- 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java b/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java index 180976e1..80225e06 100644 --- a/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java +++ b/code/index/index-reverse/java/nu/marginalia/index/construction/PositionsFileConstructor.java @@ -8,6 +8,20 @@ import java.nio.channels.FileChannel; import java.nio.file.Path; import java.nio.file.StandardOpenOption; +/** A class for constructing a positions file. This class is thread-safe. + * + *

+ * + * The positions data is concatenated in the file, with each term's metadata + * followed by its positions. The metadata is a single byte, and the positions + * are encoded using the Elias Gamma code, with zero padded bits at the end to + * get octet alignment. + * + *

+ * + * It is the responsibility of the caller to keep track of the byte offset of + * each posting in the file. + */ public class PositionsFileConstructor implements AutoCloseable { private final Path file; private final FileChannel channel; diff --git a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java index ca3b49a3..e12dbad6 100644 --- a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java +++ b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexDocsTest.java @@ -19,6 +19,7 @@ class ReversePreindexDocsTest { Path wordsIdFile; Path docsFile; Path tempDir; + Path positionsFile; TestJournalFactory journalFactory; @@ -30,6 +31,7 @@ class ReversePreindexDocsTest { wordsIdFile = Files.createTempFile("words", ".dat"); docsFile = Files.createTempFile("docs", ".dat"); tempDir = Files.createTempDirectory("sort"); + positionsFile = tempDir.resolve("positions.dat"); } @AfterEach @@ -38,6 +40,9 @@ class ReversePreindexDocsTest { Files.deleteIfExists(countsFile); Files.deleteIfExists(wordsIdFile); + Files.deleteIfExists(positionsFile); + Files.deleteIfExists(docsFile); + List contents = new ArrayList<>(); Files.list(tempDir).forEach(contents::add); for (var tempFile : contents) { @@ -53,7 +58,7 @@ class ReversePreindexDocsTest { ); var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); - var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), segments); + var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), new PositionsFileConstructor(positionsFile), segments); List expected = List.of( new TestSegmentData(-100, 0, 2, new long[] { -0xF00BA3L, 0 }), @@ -82,7 +87,9 @@ class ReversePreindexDocsTest { ); var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); - var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), segments); + var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), + new PositionsFileConstructor(positionsFile), + segments); List expected = List.of( new TestSegmentData(4, 0, 4, new long[] { -0xF00BA3L, 0, -0xF00BA3L, 0 }) @@ -109,7 +116,9 @@ class ReversePreindexDocsTest { ); var segments = ReversePreindexWordSegments.construct(reader, wordsIdFile, countsFile); - var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), segme.nts); + var docs = ReversePreindexDocuments.construct(docsFile, tempDir, reader, DocIdRewriter.identity(), + new PositionsFileConstructor(positionsFile), + segments); List expected = List.of( new TestSegmentData(-100, 0, 4, new long[] { -0xF00BA3L, 0, 0xF00BA4L, 0 }), diff --git a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java index 1ef2df4e..d9f3cddc 100644 --- a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java +++ b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexFinalizeTest.java @@ -2,7 +2,6 @@ package nu.marginalia.index.construction; import nu.marginalia.array.LongArrayFactory; -import nu.marginalia.btree.BTreeReader; import nu.marginalia.btree.model.BTreeHeader; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -19,6 +18,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; class ReversePreindexFinalizeTest { TestJournalFactory journalFactory; + Path positionsFile; Path countsFile; Path wordsIdFile; Path docsFile; @@ -28,6 +28,7 @@ class ReversePreindexFinalizeTest { public void setUp() throws IOException { journalFactory = new TestJournalFactory(); + positionsFile = Files.createTempFile("positions", ".dat"); countsFile = Files.createTempFile("counts", ".dat"); wordsIdFile = Files.createTempFile("words", ".dat"); docsFile = Files.createTempFile("docs", ".dat"); @@ -51,7 +52,9 @@ class ReversePreindexFinalizeTest { @Test public void testFinalizeSimple() throws IOException { var reader = journalFactory.createReader(new EntryDataWithWordMeta(100, 101, wm(50, 51))); - var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir); + var preindex = ReversePreindex.constructPreindex(reader, + new PositionsFileConstructor(positionsFile), + DocIdRewriter.identity(), tempDir); preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat")); @@ -89,7 +92,9 @@ class ReversePreindexFinalizeTest { new EntryDataWithWordMeta(101, 101, wm(51, 52)) ); - var preindex = ReversePreindex.constructPreindex(reader, DocIdRewriter.identity(), tempDir); + var preindex = ReversePreindex.constructPreindex(reader, + new PositionsFileConstructor(positionsFile), + DocIdRewriter.identity(), tempDir); preindex.finalizeIndex(tempDir.resolve( "docs.dat"), tempDir.resolve("words.dat")); preindex.delete(); diff --git a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexMergeTest.java b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexMergeTest.java index 1a173d9a..2bfa6556 100644 --- a/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexMergeTest.java +++ b/code/index/index-reverse/test/nu/marginalia/index/construction/ReversePreindexMergeTest.java @@ -19,11 +19,13 @@ class ReversePreindexMergeTest { Path wordsIdFile; Path docsFile; Path tempDir; + Path positionsFile; @BeforeEach public void setUp() throws IOException { journalFactory = new TestJournalFactory(); + positionsFile = Files.createTempFile("positions", ".dat"); countsFile = Files.createTempFile("counts", ".dat"); wordsIdFile = Files.createTempFile("words", ".dat"); docsFile = Files.createTempFile("docs", ".dat"); @@ -51,8 +53,8 @@ class ReversePreindexMergeTest { var reader1 = journalFactory.createReader(leftData.toArray(EntryDataWithWordMeta[]::new)); var reader2 = journalFactory.createReader(rightData.toArray(EntryDataWithWordMeta[]::new)); - var left = ReversePreindex.constructPreindex(reader1, DocIdRewriter.identity(), tempDir); - var right = ReversePreindex.constructPreindex(reader2, DocIdRewriter.identity(), tempDir); + var left = ReversePreindex.constructPreindex(reader1, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir); + var right = ReversePreindex.constructPreindex(reader2, new PositionsFileConstructor(positionsFile), DocIdRewriter.identity(), tempDir); return ReversePreindex.merge(tempDir, left, right); }