MarginaliaSearch/code/processes/loading-process/java/nu/marginalia/loading/LoaderIndexJournalWriter.java

69 lines
2.2 KiB
Java
Raw Normal View History

package nu.marginalia.loading;
2023-03-04 12:19:01 +00:00
import com.google.inject.Inject;
import com.google.inject.Singleton;
import lombok.SneakyThrows;
import nu.marginalia.IndexLocations;
import nu.marginalia.index.journal.model.IndexJournalEntryData;
import nu.marginalia.storage.FileStorageService;
2023-03-04 12:19:01 +00:00
import nu.marginalia.index.journal.model.IndexJournalEntryHeader;
import nu.marginalia.index.journal.writer.IndexJournalWriterPagingImpl;
2023-03-04 12:19:01 +00:00
import nu.marginalia.index.journal.writer.IndexJournalWriter;
import nu.marginalia.keyword.model.DocumentKeywords;
import nu.marginalia.index.journal.IndexJournalFileNames;
2023-03-04 12:19:01 +00:00
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.file.Files;
2023-03-04 12:19:01 +00:00
@Singleton
public class LoaderIndexJournalWriter {
private final IndexJournalWriter indexWriter;
private static final Logger logger = LoggerFactory.getLogger(LoaderIndexJournalWriter.class);
private final long[] buffer = new long[65536];
2023-03-04 12:19:01 +00:00
@Inject
public LoaderIndexJournalWriter(FileStorageService fileStorageService) throws IOException {
var indexArea = IndexLocations.getIndexConstructionArea(fileStorageService);
2023-03-04 12:19:01 +00:00
var existingIndexFiles = IndexJournalFileNames.findJournalFiles(indexArea);
for (var existingFile : existingIndexFiles) {
Files.delete(existingFile);
}
indexWriter = new IndexJournalWriterPagingImpl(indexArea);
2023-03-04 12:19:01 +00:00
}
@SneakyThrows
public void putWords(long combinedId,
int features,
long metadata,
int length,
DocumentKeywords wordSet) {
if (wordSet.isEmpty()) {
logger.info("Skipping zero-length word set for {}", combinedId);
2023-03-04 12:19:01 +00:00
return;
2023-08-07 10:57:38 +00:00
}
2023-03-04 12:19:01 +00:00
if (combinedId <= 0) {
logger.warn("Bad ID: {}", combinedId);
2023-03-04 12:19:01 +00:00
return;
}
var header = new IndexJournalEntryHeader(combinedId, features, length, metadata);
var data = new IndexJournalEntryData(wordSet.keywords, wordSet.metadata, wordSet.positions);
2023-03-04 12:19:01 +00:00
indexWriter.put(header, data);
2023-03-04 12:19:01 +00:00
}
public void close() throws Exception {
indexWriter.close();
}
}