mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
Fix buggy madvise code, clean up preconverter
This commit is contained in:
parent
adb63c20fd
commit
f4bd754e37
@ -124,14 +124,22 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice {
|
|||||||
if (endLongs >= mappedSize)
|
if (endLongs >= mappedSize)
|
||||||
grow(endLongs);
|
grow(endLongs);
|
||||||
|
|
||||||
var buff = mappedByteBuffers.get((int)(startLongs / bufferSize));
|
|
||||||
|
|
||||||
if ((int)(startLongs / bufferSize) != (int)((endLongs) / bufferSize)) {
|
int startIdx = (int)(startLongs / bufferSize);
|
||||||
logger.debug("Misaligned madvise, skipping");
|
int endIdx = (int)(endLongs / bufferSize);
|
||||||
return;
|
|
||||||
|
if (startIdx != endIdx) {
|
||||||
|
long offsetStart = (startLongs % bufferSize) * WORD_SIZE;
|
||||||
|
NativeIO.madviseRange(mappedByteBuffers.get(startIdx), advice, offsetStart, (int) (bufferSize * WORD_SIZE - offsetStart));
|
||||||
|
for (int i = startIdx+1; i < endIdx; i++) {
|
||||||
|
NativeIO.madviseRange(mappedByteBuffers.get(i), advice, 0, (int)(bufferSize * WORD_SIZE));
|
||||||
|
}
|
||||||
|
NativeIO.madviseRange(mappedByteBuffers.get(endIdx), advice, 0, (int)((endIdx % bufferSize) * WORD_SIZE));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
var buff = mappedByteBuffers.get(startIdx);
|
||||||
|
NativeIO.madviseRange(buff, advice, (startLongs % bufferSize) * WORD_SIZE, (int) (lengthLongs * WORD_SIZE));
|
||||||
}
|
}
|
||||||
|
|
||||||
NativeIO.madviseRange(buff, advice, (startLongs % bufferSize) * WORD_SIZE, (int)(lengthLongs*WORD_SIZE));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void pokeRange(long offset, long length) {
|
public void pokeRange(long offset, long length) {
|
||||||
|
@ -17,19 +17,7 @@ import java.nio.file.Files;
|
|||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
public class SearchIndexPreconverter {
|
public class SearchIndexPreconverter {
|
||||||
private static final int CHUNK_HEADER_SIZE = 16;
|
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
private final SearchIndexPartitioner partitioner;
|
|
||||||
private final TIntHashSet spamDomains;
|
|
||||||
|
|
||||||
@SneakyThrows
|
|
||||||
public static long wordCount(File inputFile) {
|
|
||||||
try (RandomAccessFile raf = new RandomAccessFile(inputFile, "r")) {
|
|
||||||
raf.readLong();
|
|
||||||
return raf.readInt();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
@Inject
|
@Inject
|
||||||
@ -38,8 +26,7 @@ public class SearchIndexPreconverter {
|
|||||||
SearchIndexPartitioner partitioner,
|
SearchIndexPartitioner partitioner,
|
||||||
EdgeDomainBlacklist blacklist)
|
EdgeDomainBlacklist blacklist)
|
||||||
{
|
{
|
||||||
this.partitioner = partitioner;
|
TIntHashSet spamDomains = blacklist.getSpamDomains();
|
||||||
this.spamDomains = blacklist.getSpamDomains();
|
|
||||||
logger.info("Preconverting {}", inputFile);
|
logger.info("Preconverting {}", inputFile);
|
||||||
|
|
||||||
for (File f : outputFiles) {
|
for (File f : outputFiles) {
|
||||||
@ -64,12 +51,10 @@ public class SearchIndexPreconverter {
|
|||||||
fileChannels[i] = randomAccessFiles[i].getChannel();
|
fileChannels[i] = randomAccessFiles[i].getChannel();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
var lock = partitioner.getReadLock();
|
var lock = partitioner.getReadLock();
|
||||||
try {
|
try {
|
||||||
lock.lock();
|
lock.lock();
|
||||||
ByteBuffer buffer = ByteBuffer.allocateDirect(8192);
|
ByteBuffer buffer = ByteBuffer.allocateDirect(8192);
|
||||||
|
|
||||||
for (var entry : indexJournalReader) {
|
for (var entry : indexJournalReader) {
|
||||||
if (!partitioner.isGoodUrl(entry.urlId())
|
if (!partitioner.isGoodUrl(entry.urlId())
|
||||||
|| spamDomains.contains(entry.domainId())) {
|
|| spamDomains.contains(entry.domainId())) {
|
||||||
@ -93,6 +78,7 @@ public class SearchIndexPreconverter {
|
|||||||
finally {
|
finally {
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
}
|
}
|
||||||
|
logger.info("Finalizing preconversion");
|
||||||
|
|
||||||
for (int i = 0; i < randomAccessFiles.length; i++) {
|
for (int i = 0; i < randomAccessFiles.length; i++) {
|
||||||
long pos = randomAccessFiles[i].getFilePointer();
|
long pos = randomAccessFiles[i].getFilePointer();
|
||||||
|
@ -11,6 +11,7 @@ public enum IndexBlock {
|
|||||||
Meta(7, 7),
|
Meta(7, 7),
|
||||||
PositionWords(8, 4.5),
|
PositionWords(8, 4.5),
|
||||||
NamesWords(9, 5),
|
NamesWords(9, 5),
|
||||||
|
Unused(10, 10),
|
||||||
Topic(11, 0.5);
|
Topic(11, 0.5);
|
||||||
|
|
||||||
public final int id;
|
public final int id;
|
||||||
@ -19,7 +20,6 @@ public enum IndexBlock {
|
|||||||
IndexBlock(int id, double sortOrder) {
|
IndexBlock(int id, double sortOrder) {
|
||||||
this.sortOrder = sortOrder;
|
this.sortOrder = sortOrder;
|
||||||
this.id = id;
|
this.id = id;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static IndexBlock byId(int id) {
|
public static IndexBlock byId(int id) {
|
||||||
|
@ -46,17 +46,14 @@ public class SearchIndex implements AutoCloseable {
|
|||||||
|
|
||||||
private void madvise(MultimapFileLong urls, BTreeReader reader) {
|
private void madvise(MultimapFileLong urls, BTreeReader reader) {
|
||||||
|
|
||||||
urls.advice(NativeIO.Advice.Random);
|
|
||||||
words.forEachWordsOffset(offset -> {
|
words.forEachWordsOffset(offset -> {
|
||||||
var h = reader.getHeader(offset);
|
var h = reader.getHeader(offset);
|
||||||
long length = h.dataOffsetLongs() - h.indexOffsetLongs();
|
long length = h.dataOffsetLongs() - h.indexOffsetLongs();
|
||||||
|
|
||||||
urls.adviceRange(NativeIO.Advice.Normal, offset, 512);
|
urls.adviceRange(NativeIO.Advice.WillNeed, offset, 512);
|
||||||
|
|
||||||
if (length > 0) {
|
if (length > 0) {
|
||||||
urls.adviceRange(NativeIO.Advice.WillNeed, h.indexOffsetLongs(), length);
|
urls.adviceRange(NativeIO.Advice.WillNeed, h.indexOffsetLongs(), length);
|
||||||
urls.adviceRange(NativeIO.Advice.Normal, h.dataOffsetLongs(), Math.min(2048, h.numEntries()*bTreeReader.ctx.entrySize()));
|
|
||||||
urls.pokeRange(h.indexOffsetLongs(), length);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user