From 10a74f45eaffaef9de5daf784dd098ddbad867ba Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Fri, 1 Sep 2023 11:28:02 +0200 Subject: [PATCH] (index journal; minor) Even cleaner separation of concerns. --- .../journal/reader/IndexJournalReader.java | 9 +-------- .../java/nu/marginalia/index/IndexConstructorMain.java | 9 +++++++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReader.java b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReader.java index 7f06c588..e1a8cff6 100644 --- a/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReader.java +++ b/code/features-index/index-journal/src/main/java/nu.marginalia.index/journal/reader/IndexJournalReader.java @@ -19,10 +19,6 @@ public interface IndexJournalReader { static IndexJournalReader paging(Path baseDir) throws IOException { return new IndexJournalReaderPagingImpl(baseDir); } - static IndexJournalReader filteringSingleFile(Path path, LongPredicate wordMetaFilter) throws IOException { - return new IndexJournalReaderSingleFile(path) - .filtering(wordMetaFilter); - } default void forEachWordId(LongConsumer consumer) { var ptr = this.newPointer(); @@ -32,6 +28,7 @@ public interface IndexJournalReader { } } } + default void forEachDocId(LongConsumer consumer) { var ptr = this.newPointer(); while (ptr.nextDocument()) { @@ -46,10 +43,6 @@ public interface IndexJournalReader { return new FilteringIndexJournalReader(this, termMetaFilter); } - interface LongObjectConsumer { - void accept(long left, T right); - } - } class FilteringIndexJournalReader implements IndexJournalReader { diff --git a/code/processes/index-constructor-process/src/main/java/nu/marginalia/index/IndexConstructorMain.java b/code/processes/index-constructor-process/src/main/java/nu/marginalia/index/IndexConstructorMain.java index 67bdefe1..6d104d4e 100644 --- a/code/processes/index-constructor-process/src/main/java/nu/marginalia/index/IndexConstructorMain.java +++ b/code/processes/index-constructor-process/src/main/java/nu/marginalia/index/IndexConstructorMain.java @@ -13,6 +13,7 @@ import nu.marginalia.index.journal.reader.IndexJournalReader; import nu.marginalia.model.gson.GsonFactory; import nu.marginalia.model.id.UrlIdCodec; import nu.marginalia.model.idx.WordFlags; +import nu.marginalia.model.idx.WordMetadata; import nu.marginalia.mq.MessageQueueFactory; import nu.marginalia.mq.MqMessage; import nu.marginalia.mq.inbox.MqInboxResponse; @@ -125,11 +126,15 @@ public class IndexConstructorMain { Path tmpDir = indexStaging.asPath().resolve("tmp"); if (!Files.isDirectory(tmpDir)) Files.createDirectories(tmpDir); + // The priority index only includes words that have bits indicating they are + // important to the document. This filter will act on the encoded {@see WordMetadata} LongPredicate wordMetaFilter = getPriorityIndexWordMetaFilter(); ReverseIndexConstructor. createReverseIndex(heartbeat, - (path) -> IndexJournalReader.filteringSingleFile(path, wordMetaFilter), + (path) -> IndexJournalReader + .singleFile(path) + .filtering(wordMetaFilter), indexStaging.asPath(), this::addRank, tmpDir, outputFileDocs, outputFileWords); } @@ -145,7 +150,7 @@ public class IndexConstructorMain { | WordFlags.Site.asBit() | WordFlags.SiteAdjacent.asBit(); - return r -> (r & highPriorityFlags) != 0; + return r -> WordMetadata.hasAnyFlags(r, highPriorityFlags); } private void createForwardIndex() throws SQLException, IOException {