mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Merge pull request 'Fix buggy madvise code, clean up preconverter' (#39) from master into release
Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/39
This commit is contained in:
commit
22abcc921e
@ -124,14 +124,22 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice {
|
||||
if (endLongs >= mappedSize)
|
||||
grow(endLongs);
|
||||
|
||||
var buff = mappedByteBuffers.get((int)(startLongs / bufferSize));
|
||||
|
||||
if ((int)(startLongs / bufferSize) != (int)((endLongs) / bufferSize)) {
|
||||
logger.debug("Misaligned madvise, skipping");
|
||||
return;
|
||||
int startIdx = (int)(startLongs / bufferSize);
|
||||
int endIdx = (int)(endLongs / bufferSize);
|
||||
|
||||
if (startIdx != endIdx) {
|
||||
long offsetStart = (startLongs % bufferSize) * WORD_SIZE;
|
||||
NativeIO.madviseRange(mappedByteBuffers.get(startIdx), advice, offsetStart, (int) (bufferSize * WORD_SIZE - offsetStart));
|
||||
for (int i = startIdx+1; i < endIdx; i++) {
|
||||
NativeIO.madviseRange(mappedByteBuffers.get(i), advice, 0, (int)(bufferSize * WORD_SIZE));
|
||||
}
|
||||
NativeIO.madviseRange(mappedByteBuffers.get(endIdx), advice, 0, (int)((endIdx % bufferSize) * WORD_SIZE));
|
||||
}
|
||||
else {
|
||||
var buff = mappedByteBuffers.get(startIdx);
|
||||
NativeIO.madviseRange(buff, advice, (startLongs % bufferSize) * WORD_SIZE, (int) (lengthLongs * WORD_SIZE));
|
||||
}
|
||||
|
||||
NativeIO.madviseRange(buff, advice, (startLongs % bufferSize) * WORD_SIZE, (int)(lengthLongs*WORD_SIZE));
|
||||
}
|
||||
|
||||
public void pokeRange(long offset, long length) {
|
||||
|
@ -17,19 +17,7 @@ import java.nio.file.Files;
|
||||
import java.util.Objects;
|
||||
|
||||
public class SearchIndexPreconverter {
|
||||
private static final int CHUNK_HEADER_SIZE = 16;
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private final SearchIndexPartitioner partitioner;
|
||||
private final TIntHashSet spamDomains;
|
||||
|
||||
@SneakyThrows
|
||||
public static long wordCount(File inputFile) {
|
||||
try (RandomAccessFile raf = new RandomAccessFile(inputFile, "r")) {
|
||||
raf.readLong();
|
||||
return raf.readInt();
|
||||
}
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
@Inject
|
||||
@ -38,8 +26,7 @@ public class SearchIndexPreconverter {
|
||||
SearchIndexPartitioner partitioner,
|
||||
EdgeDomainBlacklist blacklist)
|
||||
{
|
||||
this.partitioner = partitioner;
|
||||
this.spamDomains = blacklist.getSpamDomains();
|
||||
TIntHashSet spamDomains = blacklist.getSpamDomains();
|
||||
logger.info("Preconverting {}", inputFile);
|
||||
|
||||
for (File f : outputFiles) {
|
||||
@ -64,12 +51,10 @@ public class SearchIndexPreconverter {
|
||||
fileChannels[i] = randomAccessFiles[i].getChannel();
|
||||
}
|
||||
|
||||
|
||||
var lock = partitioner.getReadLock();
|
||||
try {
|
||||
lock.lock();
|
||||
ByteBuffer buffer = ByteBuffer.allocateDirect(8192);
|
||||
|
||||
for (var entry : indexJournalReader) {
|
||||
if (!partitioner.isGoodUrl(entry.urlId())
|
||||
|| spamDomains.contains(entry.domainId())) {
|
||||
@ -93,6 +78,7 @@ public class SearchIndexPreconverter {
|
||||
finally {
|
||||
lock.unlock();
|
||||
}
|
||||
logger.info("Finalizing preconversion");
|
||||
|
||||
for (int i = 0; i < randomAccessFiles.length; i++) {
|
||||
long pos = randomAccessFiles[i].getFilePointer();
|
||||
|
@ -11,6 +11,7 @@ public enum IndexBlock {
|
||||
Meta(7, 7),
|
||||
PositionWords(8, 4.5),
|
||||
NamesWords(9, 5),
|
||||
Unused(10, 10),
|
||||
Topic(11, 0.5);
|
||||
|
||||
public final int id;
|
||||
@ -19,7 +20,6 @@ public enum IndexBlock {
|
||||
IndexBlock(int id, double sortOrder) {
|
||||
this.sortOrder = sortOrder;
|
||||
this.id = id;
|
||||
|
||||
}
|
||||
|
||||
public static IndexBlock byId(int id) {
|
||||
|
@ -46,17 +46,14 @@ public class SearchIndex implements AutoCloseable {
|
||||
|
||||
private void madvise(MultimapFileLong urls, BTreeReader reader) {
|
||||
|
||||
urls.advice(NativeIO.Advice.Random);
|
||||
words.forEachWordsOffset(offset -> {
|
||||
var h = reader.getHeader(offset);
|
||||
long length = h.dataOffsetLongs() - h.indexOffsetLongs();
|
||||
|
||||
urls.adviceRange(NativeIO.Advice.Normal, offset, 512);
|
||||
urls.adviceRange(NativeIO.Advice.WillNeed, offset, 512);
|
||||
|
||||
if (length > 0) {
|
||||
urls.adviceRange(NativeIO.Advice.WillNeed, h.indexOffsetLongs(), length);
|
||||
urls.adviceRange(NativeIO.Advice.Normal, h.dataOffsetLongs(), Math.min(2048, h.numEntries()*bTreeReader.ctx.entrySize()));
|
||||
urls.pokeRange(h.indexOffsetLongs(), length);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user