From bcecc93e399fbbe219f390bb9a2b9d13afdf1351 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Thu, 28 Dec 2023 19:45:35 +0100 Subject: [PATCH] (converter) Swallow errors in parquet data stream --- .../io/format/ParquetSerializableCrawlDataStream.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/format/ParquetSerializableCrawlDataStream.java b/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/format/ParquetSerializableCrawlDataStream.java index 71159526..01457bd3 100644 --- a/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/format/ParquetSerializableCrawlDataStream.java +++ b/code/process-models/crawling-model/src/main/java/nu/marginalia/crawling/io/format/ParquetSerializableCrawlDataStream.java @@ -46,7 +46,13 @@ public class ParquetSerializableCrawlDataStream implements AutoCloseable, Serial createDomainRecord(nextRecord); wroteDomainRecord = true; } - createDocumentRecord(nextRecord); + + try { + createDocumentRecord(nextRecord); + } + catch (Exception ex) { + logger.error("Failed to create document record", ex); + } } return !nextQ.isEmpty(); }