(converter) Fix data-loss bug where the converter writer would remove all but the last batch of processed data

This commit is contained in:
Viktor Lofgren 2024-12-13 01:19:30 +01:00
parent 0fb03e3d62
commit fb2beb1eac

View File

@ -12,13 +12,11 @@ import nu.marginalia.model.processed.SlopDocumentRecord;
import nu.marginalia.model.processed.SlopDomainLinkRecord; import nu.marginalia.model.processed.SlopDomainLinkRecord;
import nu.marginalia.model.processed.SlopDomainRecord; import nu.marginalia.model.processed.SlopDomainRecord;
import nu.marginalia.sequence.VarintCodedSequence; import nu.marginalia.sequence.VarintCodedSequence;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import java.io.IOException; import java.io.IOException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.*; import java.util.*;
@ -33,27 +31,15 @@ public class ConverterBatchWriter implements AutoCloseable, ConverterBatchWriter
private static final Logger logger = LoggerFactory.getLogger(ConverterBatchWriter.class); private static final Logger logger = LoggerFactory.getLogger(ConverterBatchWriter.class);
public ConverterBatchWriter(Path basePath, int batchNumber) throws IOException { public ConverterBatchWriter(Path basePath, int batchNumber) throws IOException {
Path domainPath = initSlopDir(ProcessedDataFileNames.domainFileName(basePath)); Path domainPath = ProcessedDataFileNames.domainFileName(basePath);
Path linksPath = initSlopDir(ProcessedDataFileNames.domainLinkFileName(basePath)); Path linksPath = ProcessedDataFileNames.domainLinkFileName(basePath);
Path docsPath = initSlopDir(ProcessedDataFileNames.documentFileName(basePath)); Path docsPath = ProcessedDataFileNames.documentFileName(basePath);
domainWriter = new SlopDomainRecord.Writer(domainPath, batchNumber); domainWriter = new SlopDomainRecord.Writer(domainPath, batchNumber);
domainLinkWriter = new SlopDomainLinkRecord.Writer(linksPath, batchNumber); domainLinkWriter = new SlopDomainLinkRecord.Writer(linksPath, batchNumber);
documentWriter = new SlopDocumentRecord.Writer(docsPath, batchNumber); documentWriter = new SlopDocumentRecord.Writer(docsPath, batchNumber);
} }
private Path initSlopDir(Path p) throws IOException {
if (Files.isDirectory(p)) {
FileUtils.deleteDirectory(p.toFile());
}
else if (Files.exists(p)) {
Files.delete(p);
}
Files.createDirectories(p);
return p;
}
/** Sets the lowest ordinal value for the documents in this batch */ /** Sets the lowest ordinal value for the documents in this batch */
public void setOrdinalOffset(int ordinalOffset) { public void setOrdinalOffset(int ordinalOffset) {