mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(converter) Improve simple processing performance
There was a regression introduced in the recent slop migration changes in the performance of the simple conversion track. This reverts the issue.
This commit is contained in:
parent
270cab874b
commit
088310e998
@ -193,13 +193,16 @@ public class DomainProcessor {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Iterator<ProcessedDocument> getDocumentsStream() {
|
public Iterator<ProcessedDocument> getDocumentsStream() {
|
||||||
return dataStream.map((next) -> {
|
return iteratorFactory.create((taskConsumer) -> {
|
||||||
if (!(next instanceof CrawledDocument doc))
|
while (dataStream.hasNext())
|
||||||
return Optional.empty();
|
{
|
||||||
|
if (!(dataStream.next() instanceof CrawledDocument doc))
|
||||||
|
continue;
|
||||||
if (doc.url == null || !processedUrls.add(doc.url))
|
if (doc.url == null || !processedUrls.add(doc.url))
|
||||||
return Optional.empty();
|
continue;
|
||||||
|
|
||||||
|
|
||||||
|
taskConsumer.accept(() -> {
|
||||||
var processedDoc = documentProcessor.process(doc, domain.domain, externalDomainLinks, documentDecorator);
|
var processedDoc = documentProcessor.process(doc, domain.domain, externalDomainLinks, documentDecorator);
|
||||||
|
|
||||||
synchronized (deduplicator) {
|
synchronized (deduplicator) {
|
||||||
@ -212,7 +215,9 @@ public class DomainProcessor {
|
|||||||
10_000, externalDomainLinks.countForUrl(processedDoc.url));
|
10_000, externalDomainLinks.countForUrl(processedDoc.url));
|
||||||
}
|
}
|
||||||
|
|
||||||
return Optional.of(processedDoc);
|
return processedDoc;
|
||||||
|
});
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user