(converter) Stopgap fix for some cases of lost crawl data due to HTTP 304. The root cause needs further investigation.

This commit is contained in:
Viktor Lofgren 2024-04-19 20:36:01 +02:00
parent 64baa41e64
commit 934167323d

View File

@ -193,6 +193,8 @@ public class DomainProcessor {
continue; continue;
if (doc.url == null) if (doc.url == null)
continue; continue;
if (doc.documentBody.isBlank())
continue;
if (!processedUrls.add(doc.url)) if (!processedUrls.add(doc.url))
continue; continue;