diff --git a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLong.java b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLong.java index 061c9ed9..251b2c30 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLong.java +++ b/marginalia_nu/src/main/java/nu/marginalia/util/multimap/MultimapFileLong.java @@ -124,14 +124,22 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice { if (endLongs >= mappedSize) grow(endLongs); - var buff = mappedByteBuffers.get((int)(startLongs / bufferSize)); - if ((int)(startLongs / bufferSize) != (int)((endLongs) / bufferSize)) { - logger.debug("Misaligned madvise, skipping"); - return; + int startIdx = (int)(startLongs / bufferSize); + int endIdx = (int)(endLongs / bufferSize); + + if (startIdx != endIdx) { + long offsetStart = (startLongs % bufferSize) * WORD_SIZE; + NativeIO.madviseRange(mappedByteBuffers.get(startIdx), advice, offsetStart, (int) (bufferSize * WORD_SIZE - offsetStart)); + for (int i = startIdx+1; i < endIdx; i++) { + NativeIO.madviseRange(mappedByteBuffers.get(i), advice, 0, (int)(bufferSize * WORD_SIZE)); + } + NativeIO.madviseRange(mappedByteBuffers.get(endIdx), advice, 0, (int)((endIdx % bufferSize) * WORD_SIZE)); + } + else { + var buff = mappedByteBuffers.get(startIdx); + NativeIO.madviseRange(buff, advice, (startLongs % bufferSize) * WORD_SIZE, (int) (lengthLongs * WORD_SIZE)); } - - NativeIO.madviseRange(buff, advice, (startLongs % bufferSize) * WORD_SIZE, (int)(lengthLongs*WORD_SIZE)); } public void pokeRange(long offset, long length) { diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/conversion/SearchIndexPreconverter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/conversion/SearchIndexPreconverter.java index 5357fc1f..d096ff0e 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/conversion/SearchIndexPreconverter.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/conversion/SearchIndexPreconverter.java @@ -17,19 +17,7 @@ import java.nio.file.Files; import java.util.Objects; public class SearchIndexPreconverter { - private static final int CHUNK_HEADER_SIZE = 16; - private final Logger logger = LoggerFactory.getLogger(getClass()); - private final SearchIndexPartitioner partitioner; - private final TIntHashSet spamDomains; - - @SneakyThrows - public static long wordCount(File inputFile) { - try (RandomAccessFile raf = new RandomAccessFile(inputFile, "r")) { - raf.readLong(); - return raf.readInt(); - } - } @SneakyThrows @Inject @@ -38,8 +26,7 @@ public class SearchIndexPreconverter { SearchIndexPartitioner partitioner, EdgeDomainBlacklist blacklist) { - this.partitioner = partitioner; - this.spamDomains = blacklist.getSpamDomains(); + TIntHashSet spamDomains = blacklist.getSpamDomains(); logger.info("Preconverting {}", inputFile); for (File f : outputFiles) { @@ -64,12 +51,10 @@ public class SearchIndexPreconverter { fileChannels[i] = randomAccessFiles[i].getChannel(); } - var lock = partitioner.getReadLock(); try { lock.lock(); ByteBuffer buffer = ByteBuffer.allocateDirect(8192); - for (var entry : indexJournalReader) { if (!partitioner.isGoodUrl(entry.urlId()) || spamDomains.contains(entry.domainId())) { @@ -93,6 +78,7 @@ public class SearchIndexPreconverter { finally { lock.unlock(); } + logger.info("Finalizing preconversion"); for (int i = 0; i < randomAccessFiles.length; i++) { long pos = randomAccessFiles[i].getFilePointer(); diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/IndexBlock.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/IndexBlock.java index 14a654d9..73e733a5 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/IndexBlock.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/IndexBlock.java @@ -11,6 +11,7 @@ public enum IndexBlock { Meta(7, 7), PositionWords(8, 4.5), NamesWords(9, 5), + Unused(10, 10), Topic(11, 0.5); public final int id; @@ -19,7 +20,6 @@ public enum IndexBlock { IndexBlock(int id, double sortOrder) { this.sortOrder = sortOrder; this.id = id; - } public static IndexBlock byId(int id) { diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndex.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndex.java index ac33b139..c2888f7a 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndex.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/reader/SearchIndex.java @@ -46,17 +46,14 @@ public class SearchIndex implements AutoCloseable { private void madvise(MultimapFileLong urls, BTreeReader reader) { - urls.advice(NativeIO.Advice.Random); words.forEachWordsOffset(offset -> { var h = reader.getHeader(offset); long length = h.dataOffsetLongs() - h.indexOffsetLongs(); - urls.adviceRange(NativeIO.Advice.Normal, offset, 512); + urls.adviceRange(NativeIO.Advice.WillNeed, offset, 512); if (length > 0) { urls.adviceRange(NativeIO.Advice.WillNeed, h.indexOffsetLongs(), length); - urls.adviceRange(NativeIO.Advice.Normal, h.dataOffsetLongs(), Math.min(2048, h.numEntries()*bTreeReader.ctx.entrySize())); - urls.pokeRange(h.indexOffsetLongs(), length); } }); }