MarginaliaSearch/code/processes/loading-process/java/nu/marginalia/loading/LoaderIndexJournalWriter.java
Viktor Lofgren aebb2652e8 (wip) Extract and encode spans data
Refactoring keyword extraction to extract spans information.

Modifying the intermediate storage of converted data to use the new slop library, which is allows for easier storage of ad-hoc binary data like spans and positions.

This is a bit of a katamari damacy commit that ended up dragging along a bunch of other fairly tangentially related changes that are hard to break out into separate commits after the fact.  Will push as-is to get back to being able to do more isolated work.
2024-07-27 11:44:13 +02:00

63 lines
1.7 KiB
Java

package nu.marginalia.loading;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import lombok.SneakyThrows;
import nu.marginalia.IndexLocations;
import nu.marginalia.index.journal.IndexJournal;
import nu.marginalia.index.journal.IndexJournalSlopWriter;
import nu.marginalia.model.processed.SlopDocumentRecord;
import nu.marginalia.storage.FileStorageService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.file.Path;
@Singleton
public class LoaderIndexJournalWriter {
private static final Logger logger = LoggerFactory.getLogger(LoaderIndexJournalWriter.class);
private final Path journalPath;
private IndexJournalSlopWriter currentWriter = null;
private long recordsWritten = 0;
private int page;
@Inject
public LoaderIndexJournalWriter(FileStorageService fileStorageService) throws IOException {
var indexArea = IndexLocations.getIndexConstructionArea(fileStorageService);
journalPath = IndexJournal.allocateName(indexArea);
page = IndexJournal.numPages(journalPath);
switchToNextVersion();
logger.info("Creating Journal Writer {}", indexArea);
}
private void switchToNextVersion() throws IOException {
if (currentWriter != null) {
currentWriter.close();
}
currentWriter = new IndexJournalSlopWriter(journalPath, page++);
}
@SneakyThrows
public void putWords(long header, SlopDocumentRecord.KeywordsProjection data)
{
if (++recordsWritten > 200_000) {
recordsWritten = 0;
switchToNextVersion();
}
currentWriter.put(header, data);
}
public void close() throws IOException {
currentWriter.close();
}
}