From a6f1335375b605c77ccbd334ef29d279fbeab90a Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Thu, 31 Aug 2023 17:48:08 +0200 Subject: [PATCH] (loader) Fix bugfix where the loader would omit some meta and words. --- .../IndexJournalWriterSingleFileImpl.java | 6 ++ .../loader/LoaderIndexJournalWriter.java | 2 +- .../loader/LoaderIndexJournalWriterTest.java | 83 +++++++++++++++++++ 3 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 code/processes/loading-process/src/test/java/nu/marginalia/loading/loader/LoaderIndexJournalWriterTest.java diff --git a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/writer/IndexJournalWriterSingleFileImpl.java b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/writer/IndexJournalWriterSingleFileImpl.java index bb49b62b..f6188319 100644 --- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/writer/IndexJournalWriterSingleFileImpl.java +++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/writer/IndexJournalWriterSingleFileImpl.java @@ -28,6 +28,7 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{ private final FileChannel fileChannel; private int numEntries = 0; + private boolean closed = false; private final Logger logger = LoggerFactory.getLogger(getClass()); @@ -99,6 +100,11 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{ } public void close() throws IOException { + if (closed) + return; + else + closed = true; + dataBuffer.flip(); compressingStream.compress(dataBuffer); dataBuffer.clear(); diff --git a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/LoaderIndexJournalWriter.java b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/LoaderIndexJournalWriter.java index 05f02798..5bbce444 100644 --- a/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/LoaderIndexJournalWriter.java +++ b/code/processes/loading-process/src/main/java/nu/marginalia/loading/loader/LoaderIndexJournalWriter.java @@ -70,7 +70,7 @@ public class LoaderIndexJournalWriter { buffer[2*i + 1] = meta[start+i]; } - var entry = new IndexJournalEntryData(end-start, buffer); + var entry = new IndexJournalEntryData(2 * (end-start), buffer); var header = new IndexJournalEntryHeader(combinedId, features, metadata.encode()); indexWriter.put(header, entry); diff --git a/code/processes/loading-process/src/test/java/nu/marginalia/loading/loader/LoaderIndexJournalWriterTest.java b/code/processes/loading-process/src/test/java/nu/marginalia/loading/loader/LoaderIndexJournalWriterTest.java new file mode 100644 index 00000000..0fe79093 --- /dev/null +++ b/code/processes/loading-process/src/test/java/nu/marginalia/loading/loader/LoaderIndexJournalWriterTest.java @@ -0,0 +1,83 @@ +package nu.marginalia.loading.loader; + +import nu.marginalia.db.storage.FileStorageService; +import nu.marginalia.db.storage.model.FileStorage; +import nu.marginalia.db.storage.model.FileStorageType; +import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile; +import nu.marginalia.keyword.model.DocumentKeywords; +import nu.marginalia.model.idx.DocumentMetadata; +import nu.marginallia.index.journal.IndexJournalFileNames; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.mockito.Mockito; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.stream.LongStream; + +import static org.junit.jupiter.api.Assertions.*; + +class LoaderIndexJournalWriterTest { + + Path tempDir; + LoaderIndexJournalWriter writer; + @BeforeEach + public void setUp() throws IOException, SQLException { + tempDir = Files.createTempDirectory(getClass().getSimpleName()); + FileStorageService storageService = Mockito.mock(FileStorageService.class); + Mockito.when(storageService.getStorageByType(FileStorageType.INDEX_STAGING)). + thenReturn(new FileStorage(null, null, null, tempDir.toString(), + "test")); + writer = new LoaderIndexJournalWriter(storageService); + } + + @AfterEach + public void tearDown() throws Exception { + writer.close(); + List junk = Files.list(tempDir).toList(); + for (var item : junk) + Files.delete(item); + Files.delete(tempDir); + } + + @Test + public void testBreakup() throws Exception { + String[] keywords = new String[2000]; + long[] metadata = new long[2000]; + for (int i = 0; i < 2000; i++) { + keywords[i] = Integer.toString(i); + metadata[i] = i+1; + } + DocumentKeywords words = new DocumentKeywords(keywords, metadata); + writer.putWords(1, 0, new DocumentMetadata(0), + words); + + writer.close(); + + List journalFiles =IndexJournalFileNames.findJournalFiles(tempDir); + assertEquals(1, journalFiles.size()); + + var reader = new IndexJournalReaderSingleCompressedFile(journalFiles.get(0)); + List docIds = new ArrayList<>(); + reader.forEachDocId(docIds::add); + assertEquals(List.of(1L, 1L), docIds); + + List metas = new ArrayList(); + reader.forEach(r -> { + var entry = r.readEntry(); + for (int i = 0; i + 1 < entry.size(); i+=2) { + entry.get(i); + metas.add(entry.get(i+1)); + } + }); + + assertEquals(LongStream.of(metadata).boxed().toList(), metas); + } +} \ No newline at end of file