mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(converter) Stopgap fix for some cases of lost crawl data due to HTTP 304. The root cause needs further investigation.
This commit is contained in:
parent
64baa41e64
commit
934167323d
@ -193,6 +193,8 @@ public class DomainProcessor {
|
|||||||
continue;
|
continue;
|
||||||
if (doc.url == null)
|
if (doc.url == null)
|
||||||
continue;
|
continue;
|
||||||
|
if (doc.documentBody.isBlank())
|
||||||
|
continue;
|
||||||
if (!processedUrls.add(doc.url))
|
if (!processedUrls.add(doc.url))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user