(converter) Fix close() ordering to prevent converter crash

This commit is contained in:
Viktor Lofgren 2025-01-26 14:47:36 +01:00
parent 74a1f100f4
commit f0d74aa3bb
2 changed files with 16 additions and 7 deletions

View File

@ -244,8 +244,14 @@ public class ConverterMain extends ProcessMainClass {
continue; continue;
} }
try (var dataStream = CrawledDomainReader.createDataStream(dataPath)) { try {
ConverterBatchWritableIf writable = processor.simpleProcessing(dataStream, sizeHint); // SerializableCrawlDataStream is autocloseable, we can't try-with-resources because then it will be
// closed before it's consumed by the converterWriter. Instead, the converterWriter guarantees it
// will close it after it's consumed.
var stream = SerializableCrawlDataStream.openDataStream(dataPath);
ConverterBatchWritableIf writable = processor.simpleProcessing(stream, sizeHint);
converterWriter.accept(writable); converterWriter.accept(writable);
} }
catch (Exception ex) { catch (Exception ex) {

View File

@ -39,6 +39,9 @@ public class ConverterWriter implements AutoCloseable {
workerThread.start(); workerThread.start();
} }
/** Queue and eventually write the domain into the converter journal
* The domain object will be closed after it's processed.
* */
public void accept(@Nullable ConverterBatchWritableIf domain) { public void accept(@Nullable ConverterBatchWritableIf domain) {
if (null == domain) if (null == domain)
return; return;
@ -72,15 +75,15 @@ public class ConverterWriter implements AutoCloseable {
if (workLog.isItemCommitted(id) || workLog.isItemInCurrentBatch(id)) { if (workLog.isItemCommitted(id) || workLog.isItemInCurrentBatch(id)) {
logger.warn("Skipping already logged item {}", id); logger.warn("Skipping already logged item {}", id);
}
else {
currentWriter.write(data);
workLog.logItem(id);
data.close(); data.close();
continue;
} }
currentWriter.write(data);
workLog.logItem(id);
switcher.tick(); switcher.tick();
data.close();
} }
} }
catch (Exception ex) { catch (Exception ex) {