mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(converter) Fix NPE in converter
This commit is contained in:
parent
bcecc93e39
commit
c488599879
@ -10,18 +10,16 @@ import java.util.Set;
|
|||||||
public class DocumentDecorator {
|
public class DocumentDecorator {
|
||||||
private final Set<String> extraSearchTerms = new HashSet<>();
|
private final Set<String> extraSearchTerms = new HashSet<>();
|
||||||
private final AnchorTextKeywords keywords;
|
private final AnchorTextKeywords keywords;
|
||||||
private final DomainLinks externalDomainLinks;
|
|
||||||
|
|
||||||
public DocumentDecorator(AnchorTextKeywords keywords, DomainLinks externalDomainLinks) {
|
public DocumentDecorator(AnchorTextKeywords keywords) {
|
||||||
this.keywords = keywords;
|
this.keywords = keywords;
|
||||||
this.externalDomainLinks = externalDomainLinks;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addTerm(String term) {
|
public void addTerm(String term) {
|
||||||
extraSearchTerms.add(term);
|
extraSearchTerms.add(term);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void apply(ProcessedDocument doc) {
|
public void apply(ProcessedDocument doc, DomainLinks externalDomainLinks) {
|
||||||
if (doc == null)
|
if (doc == null)
|
||||||
return;
|
return;
|
||||||
if (doc.words == null)
|
if (doc.words == null)
|
||||||
|
@ -39,7 +39,9 @@ public class DocumentProcessor {
|
|||||||
processorPlugins.add(plainTextDocumentProcessorPlugin);
|
processorPlugins.add(plainTextDocumentProcessorPlugin);
|
||||||
}
|
}
|
||||||
|
|
||||||
public ProcessedDocument process(CrawledDocument crawledDocument, DomainLinks externalDomainLinks, DocumentDecorator documentDecorator) {
|
public ProcessedDocument process(CrawledDocument crawledDocument,
|
||||||
|
DomainLinks externalDomainLinks,
|
||||||
|
DocumentDecorator documentDecorator) {
|
||||||
ProcessedDocument ret = new ProcessedDocument();
|
ProcessedDocument ret = new ProcessedDocument();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@ -52,7 +54,7 @@ public class DocumentProcessor {
|
|||||||
default -> DocumentClass.EXTERNALLY_LINKED_MULTI;
|
default -> DocumentClass.EXTERNALLY_LINKED_MULTI;
|
||||||
};
|
};
|
||||||
|
|
||||||
processDocument(crawledDocument, documentClass, documentDecorator, ret);
|
processDocument(crawledDocument, documentClass, documentDecorator, externalDomainLinks, ret);
|
||||||
}
|
}
|
||||||
catch (DisqualifiedException ex) {
|
catch (DisqualifiedException ex) {
|
||||||
ret.state = UrlIndexingState.DISQUALIFIED;
|
ret.state = UrlIndexingState.DISQUALIFIED;
|
||||||
@ -68,7 +70,7 @@ public class DocumentProcessor {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void processDocument(CrawledDocument crawledDocument, DocumentClass documentClass, DocumentDecorator documentDecorator, ProcessedDocument ret) throws URISyntaxException, DisqualifiedException {
|
private void processDocument(CrawledDocument crawledDocument, DocumentClass documentClass, DocumentDecorator documentDecorator, DomainLinks externalDomainLinks, ProcessedDocument ret) throws URISyntaxException, DisqualifiedException {
|
||||||
|
|
||||||
var crawlerStatus = CrawlerDocumentStatus.valueOf(crawledDocument.crawlerStatus);
|
var crawlerStatus = CrawlerDocumentStatus.valueOf(crawledDocument.crawlerStatus);
|
||||||
if (crawlerStatus != CrawlerDocumentStatus.OK) {
|
if (crawlerStatus != CrawlerDocumentStatus.OK) {
|
||||||
@ -92,7 +94,7 @@ public class DocumentProcessor {
|
|||||||
ret.details = detailsWithWords.details();
|
ret.details = detailsWithWords.details();
|
||||||
ret.words = detailsWithWords.words();
|
ret.words = detailsWithWords.words();
|
||||||
|
|
||||||
documentDecorator.apply(ret);
|
documentDecorator.apply(ret, externalDomainLinks);
|
||||||
|
|
||||||
if (Boolean.TRUE.equals(crawledDocument.hasCookies)
|
if (Boolean.TRUE.equals(crawledDocument.hasCookies)
|
||||||
&& ret.details != null
|
&& ret.details != null
|
||||||
|
@ -105,10 +105,11 @@ public class DomainProcessor {
|
|||||||
|
|
||||||
domain = new ProcessedDomain();
|
domain = new ProcessedDomain();
|
||||||
domain.sizeloadSizeAdvice = 10_000;
|
domain.sizeloadSizeAdvice = 10_000;
|
||||||
externalDomainLinks = anchorTagsSource.getAnchorTags(domain.domain);
|
|
||||||
documentDecorator = new DocumentDecorator(anchorTextKeywords, externalDomainLinks);
|
|
||||||
|
|
||||||
|
documentDecorator = new DocumentDecorator(anchorTextKeywords);
|
||||||
processDomain(crawledDomain, domain, documentDecorator);
|
processDomain(crawledDomain, domain, documentDecorator);
|
||||||
|
|
||||||
|
externalDomainLinks = anchorTagsSource.getAnchorTags(domain.domain);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -215,7 +216,7 @@ public class DomainProcessor {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (data instanceof CrawledDomain crawledDomain) {
|
if (data instanceof CrawledDomain crawledDomain) {
|
||||||
documentDecorator = new DocumentDecorator(anchorTextKeywords, externalDomainLinks);
|
documentDecorator = new DocumentDecorator(anchorTextKeywords);
|
||||||
|
|
||||||
processDomain(crawledDomain, ret, documentDecorator);
|
processDomain(crawledDomain, ret, documentDecorator);
|
||||||
ret.documents = docs;
|
ret.documents = docs;
|
||||||
|
Loading…
Reference in New Issue
Block a user