mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(converter) Fix NPE in converter
This commit is contained in:
parent
bcecc93e39
commit
c488599879
@ -10,18 +10,16 @@ import java.util.Set;
|
||||
public class DocumentDecorator {
|
||||
private final Set<String> extraSearchTerms = new HashSet<>();
|
||||
private final AnchorTextKeywords keywords;
|
||||
private final DomainLinks externalDomainLinks;
|
||||
|
||||
public DocumentDecorator(AnchorTextKeywords keywords, DomainLinks externalDomainLinks) {
|
||||
public DocumentDecorator(AnchorTextKeywords keywords) {
|
||||
this.keywords = keywords;
|
||||
this.externalDomainLinks = externalDomainLinks;
|
||||
}
|
||||
|
||||
public void addTerm(String term) {
|
||||
extraSearchTerms.add(term);
|
||||
}
|
||||
|
||||
public void apply(ProcessedDocument doc) {
|
||||
public void apply(ProcessedDocument doc, DomainLinks externalDomainLinks) {
|
||||
if (doc == null)
|
||||
return;
|
||||
if (doc.words == null)
|
||||
|
@ -39,7 +39,9 @@ public class DocumentProcessor {
|
||||
processorPlugins.add(plainTextDocumentProcessorPlugin);
|
||||
}
|
||||
|
||||
public ProcessedDocument process(CrawledDocument crawledDocument, DomainLinks externalDomainLinks, DocumentDecorator documentDecorator) {
|
||||
public ProcessedDocument process(CrawledDocument crawledDocument,
|
||||
DomainLinks externalDomainLinks,
|
||||
DocumentDecorator documentDecorator) {
|
||||
ProcessedDocument ret = new ProcessedDocument();
|
||||
|
||||
try {
|
||||
@ -52,7 +54,7 @@ public class DocumentProcessor {
|
||||
default -> DocumentClass.EXTERNALLY_LINKED_MULTI;
|
||||
};
|
||||
|
||||
processDocument(crawledDocument, documentClass, documentDecorator, ret);
|
||||
processDocument(crawledDocument, documentClass, documentDecorator, externalDomainLinks, ret);
|
||||
}
|
||||
catch (DisqualifiedException ex) {
|
||||
ret.state = UrlIndexingState.DISQUALIFIED;
|
||||
@ -68,7 +70,7 @@ public class DocumentProcessor {
|
||||
return ret;
|
||||
}
|
||||
|
||||
private void processDocument(CrawledDocument crawledDocument, DocumentClass documentClass, DocumentDecorator documentDecorator, ProcessedDocument ret) throws URISyntaxException, DisqualifiedException {
|
||||
private void processDocument(CrawledDocument crawledDocument, DocumentClass documentClass, DocumentDecorator documentDecorator, DomainLinks externalDomainLinks, ProcessedDocument ret) throws URISyntaxException, DisqualifiedException {
|
||||
|
||||
var crawlerStatus = CrawlerDocumentStatus.valueOf(crawledDocument.crawlerStatus);
|
||||
if (crawlerStatus != CrawlerDocumentStatus.OK) {
|
||||
@ -92,7 +94,7 @@ public class DocumentProcessor {
|
||||
ret.details = detailsWithWords.details();
|
||||
ret.words = detailsWithWords.words();
|
||||
|
||||
documentDecorator.apply(ret);
|
||||
documentDecorator.apply(ret, externalDomainLinks);
|
||||
|
||||
if (Boolean.TRUE.equals(crawledDocument.hasCookies)
|
||||
&& ret.details != null
|
||||
|
@ -105,10 +105,11 @@ public class DomainProcessor {
|
||||
|
||||
domain = new ProcessedDomain();
|
||||
domain.sizeloadSizeAdvice = 10_000;
|
||||
externalDomainLinks = anchorTagsSource.getAnchorTags(domain.domain);
|
||||
documentDecorator = new DocumentDecorator(anchorTextKeywords, externalDomainLinks);
|
||||
|
||||
documentDecorator = new DocumentDecorator(anchorTextKeywords);
|
||||
processDomain(crawledDomain, domain, documentDecorator);
|
||||
|
||||
externalDomainLinks = anchorTagsSource.getAnchorTags(domain.domain);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -215,7 +216,7 @@ public class DomainProcessor {
|
||||
}
|
||||
|
||||
if (data instanceof CrawledDomain crawledDomain) {
|
||||
documentDecorator = new DocumentDecorator(anchorTextKeywords, externalDomainLinks);
|
||||
documentDecorator = new DocumentDecorator(anchorTextKeywords);
|
||||
|
||||
processDomain(crawledDomain, ret, documentDecorator);
|
||||
ret.documents = docs;
|
||||
|
Loading…
Reference in New Issue
Block a user