mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(converter) Fix NPE bugs in converter due to the reintroduction of CrawledDocument.headers
This commit is contained in:
parent
8b85a58fea
commit
e9e8580913
@ -17,6 +17,9 @@ public class AcceptableAds {
|
||||
}
|
||||
|
||||
public static boolean hasAcceptableAdsHeader(CrawledDocument document) {
|
||||
return document.headers.contains("X-Adblock-Key");
|
||||
if (document.headers != null) {
|
||||
return document.headers.contains("X-Adblock-Key");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -39,6 +39,7 @@ import org.slf4j.LoggerFactory;
|
||||
import java.net.URISyntaxException;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashSet;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
import static nu.marginalia.converting.model.DisqualifiedException.DisqualificationReason;
|
||||
@ -127,7 +128,9 @@ public class HtmlDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin
|
||||
|
||||
final EdgeUrl url = new EdgeUrl(crawledDocument.url);
|
||||
|
||||
final var generatorParts = documentGeneratorExtractor.detectGenerator(doc, crawledDocument.headers);
|
||||
final var generatorParts = documentGeneratorExtractor.detectGenerator(doc,
|
||||
Objects.requireNonNullElse(crawledDocument.headers, "")
|
||||
);
|
||||
|
||||
final var specialization = htmlProcessorSpecializations.select(generatorParts, url);
|
||||
|
||||
@ -162,7 +165,12 @@ public class HtmlDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin
|
||||
throw new DisqualifiedException(DisqualificationReason.QUALITY);
|
||||
}
|
||||
|
||||
PubDate pubDate = pubDateSniffer.getPubDate(crawledDocument.headers, url, doc, standard, true);
|
||||
PubDate pubDate = pubDateSniffer.getPubDate(
|
||||
Objects.requireNonNullElse(crawledDocument.headers, ""),
|
||||
url,
|
||||
doc,
|
||||
standard,
|
||||
true);
|
||||
|
||||
EnumSet<DocumentFlags> documentFlags = documentFlags(features, generatorParts.type());
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user