(converter) Fix close() ordering to prevent converter crash

This commit is contained in:
Viktor Lofgren 2025-01-26 14:47:36 +01:00
parent 74a1f100f4
commit f0d74aa3bb
2 changed files with 16 additions and 7 deletions

View File

@ -244,8 +244,14 @@ public class ConverterMain extends ProcessMainClass {
continue;
}
try (var dataStream = CrawledDomainReader.createDataStream(dataPath)) {
ConverterBatchWritableIf writable = processor.simpleProcessing(dataStream, sizeHint);
try {
// SerializableCrawlDataStream is autocloseable, we can't try-with-resources because then it will be
// closed before it's consumed by the converterWriter. Instead, the converterWriter guarantees it
// will close it after it's consumed.
var stream = SerializableCrawlDataStream.openDataStream(dataPath);
ConverterBatchWritableIf writable = processor.simpleProcessing(stream, sizeHint);
converterWriter.accept(writable);
}
catch (Exception ex) {

View File

@ -39,6 +39,9 @@ public class ConverterWriter implements AutoCloseable {
workerThread.start();
}
/** Queue and eventually write the domain into the converter journal
* The domain object will be closed after it's processed.
* */
public void accept(@Nullable ConverterBatchWritableIf domain) {
if (null == domain)
return;
@ -72,15 +75,15 @@ public class ConverterWriter implements AutoCloseable {
if (workLog.isItemCommitted(id) || workLog.isItemInCurrentBatch(id)) {
logger.warn("Skipping already logged item {}", id);
}
else {
currentWriter.write(data);
workLog.logItem(id);
data.close();
continue;
}
currentWriter.write(data);
workLog.logItem(id);
switcher.tick();
data.close();
}
}
catch (Exception ex) {