(converter) Improve simple processing performance

There was a regression introduced in the recent slop migration changes in  the performance of the simple conversion track.  This reverts the issue.
This commit is contained in:
Viktor Lofgren 2025-01-21 14:13:33 +01:00
parent 270cab874b
commit 088310e998

View File

@ -193,13 +193,16 @@ public class DomainProcessor {
@Override @Override
public Iterator<ProcessedDocument> getDocumentsStream() { public Iterator<ProcessedDocument> getDocumentsStream() {
return dataStream.map((next) -> { return iteratorFactory.create((taskConsumer) -> {
if (!(next instanceof CrawledDocument doc)) while (dataStream.hasNext())
return Optional.empty(); {
if (!(dataStream.next() instanceof CrawledDocument doc))
continue;
if (doc.url == null || !processedUrls.add(doc.url)) if (doc.url == null || !processedUrls.add(doc.url))
return Optional.empty(); continue;
taskConsumer.accept(() -> {
var processedDoc = documentProcessor.process(doc, domain.domain, externalDomainLinks, documentDecorator); var processedDoc = documentProcessor.process(doc, domain.domain, externalDomainLinks, documentDecorator);
synchronized (deduplicator) { synchronized (deduplicator) {
@ -212,7 +215,9 @@ public class DomainProcessor {
10_000, externalDomainLinks.countForUrl(processedDoc.url)); 10_000, externalDomainLinks.countForUrl(processedDoc.url));
} }
return Optional.of(processedDoc); return processedDoc;
});
}
}); });
} }