From c4885998796e58237b2bb29172a46977378db785 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Thu, 28 Dec 2023 19:52:26 +0100 Subject: [PATCH] (converter) Fix NPE in converter --- .../converting/processor/DocumentDecorator.java | 6 ++---- .../converting/processor/DocumentProcessor.java | 10 ++++++---- .../converting/processor/DomainProcessor.java | 7 ++++--- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DocumentDecorator.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DocumentDecorator.java index d3002df2..02e22f4f 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DocumentDecorator.java +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DocumentDecorator.java @@ -10,18 +10,16 @@ import java.util.Set; public class DocumentDecorator { private final Set extraSearchTerms = new HashSet<>(); private final AnchorTextKeywords keywords; - private final DomainLinks externalDomainLinks; - public DocumentDecorator(AnchorTextKeywords keywords, DomainLinks externalDomainLinks) { + public DocumentDecorator(AnchorTextKeywords keywords) { this.keywords = keywords; - this.externalDomainLinks = externalDomainLinks; } public void addTerm(String term) { extraSearchTerms.add(term); } - public void apply(ProcessedDocument doc) { + public void apply(ProcessedDocument doc, DomainLinks externalDomainLinks) { if (doc == null) return; if (doc.words == null) diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DocumentProcessor.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DocumentProcessor.java index d10da715..a9043e33 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DocumentProcessor.java +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DocumentProcessor.java @@ -39,7 +39,9 @@ public class DocumentProcessor { processorPlugins.add(plainTextDocumentProcessorPlugin); } - public ProcessedDocument process(CrawledDocument crawledDocument, DomainLinks externalDomainLinks, DocumentDecorator documentDecorator) { + public ProcessedDocument process(CrawledDocument crawledDocument, + DomainLinks externalDomainLinks, + DocumentDecorator documentDecorator) { ProcessedDocument ret = new ProcessedDocument(); try { @@ -52,7 +54,7 @@ public class DocumentProcessor { default -> DocumentClass.EXTERNALLY_LINKED_MULTI; }; - processDocument(crawledDocument, documentClass, documentDecorator, ret); + processDocument(crawledDocument, documentClass, documentDecorator, externalDomainLinks, ret); } catch (DisqualifiedException ex) { ret.state = UrlIndexingState.DISQUALIFIED; @@ -68,7 +70,7 @@ public class DocumentProcessor { return ret; } - private void processDocument(CrawledDocument crawledDocument, DocumentClass documentClass, DocumentDecorator documentDecorator, ProcessedDocument ret) throws URISyntaxException, DisqualifiedException { + private void processDocument(CrawledDocument crawledDocument, DocumentClass documentClass, DocumentDecorator documentDecorator, DomainLinks externalDomainLinks, ProcessedDocument ret) throws URISyntaxException, DisqualifiedException { var crawlerStatus = CrawlerDocumentStatus.valueOf(crawledDocument.crawlerStatus); if (crawlerStatus != CrawlerDocumentStatus.OK) { @@ -92,7 +94,7 @@ public class DocumentProcessor { ret.details = detailsWithWords.details(); ret.words = detailsWithWords.words(); - documentDecorator.apply(ret); + documentDecorator.apply(ret, externalDomainLinks); if (Boolean.TRUE.equals(crawledDocument.hasCookies) && ret.details != null diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DomainProcessor.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DomainProcessor.java index 72797ee1..8aff30eb 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DomainProcessor.java +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/DomainProcessor.java @@ -105,10 +105,11 @@ public class DomainProcessor { domain = new ProcessedDomain(); domain.sizeloadSizeAdvice = 10_000; - externalDomainLinks = anchorTagsSource.getAnchorTags(domain.domain); - documentDecorator = new DocumentDecorator(anchorTextKeywords, externalDomainLinks); + documentDecorator = new DocumentDecorator(anchorTextKeywords); processDomain(crawledDomain, domain, documentDecorator); + + externalDomainLinks = anchorTagsSource.getAnchorTags(domain.domain); } @Override @@ -215,7 +216,7 @@ public class DomainProcessor { } if (data instanceof CrawledDomain crawledDomain) { - documentDecorator = new DocumentDecorator(anchorTextKeywords, externalDomainLinks); + documentDecorator = new DocumentDecorator(anchorTextKeywords); processDomain(crawledDomain, ret, documentDecorator); ret.documents = docs;