mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(term-freq-exporter) Reduce thread count and memory usage
This commit is contained in:
parent
491d6bec46
commit
6bfe04b609
@ -21,6 +21,7 @@ dependencies {
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:libraries:language-processing')
|
||||
implementation project(':code:libraries:term-frequency-dict')
|
||||
implementation project(':code:libraries:blocking-thread-pool')
|
||||
implementation project(':code:features-crawl:link-parser')
|
||||
implementation project(':code:features-convert:anchor-keywords')
|
||||
implementation project(':code:process-models:crawling-model')
|
||||
|
@ -14,6 +14,7 @@ import nu.marginalia.process.log.WorkLog;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.storage.model.FileStorage;
|
||||
import nu.marginalia.storage.model.FileStorageId;
|
||||
import nu.marginalia.util.SimpleBlockingThreadPool;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.slf4j.Logger;
|
||||
@ -53,26 +54,22 @@ public class TermFrequencyExporter implements ExporterIf {
|
||||
TLongIntHashMap counts = new TLongIntHashMap(100_000_000, 0.7f, -1, -1);
|
||||
AtomicInteger docCount = new AtomicInteger();
|
||||
|
||||
try (ForkJoinPool fjp = new ForkJoinPool(Math.max(2, Runtime.getRuntime().availableProcessors() / 2))) {
|
||||
|
||||
SimpleBlockingThreadPool sjp = new SimpleBlockingThreadPool("exporter", Math.clamp(2, 16, Runtime.getRuntime().availableProcessors() / 2), 4);
|
||||
Path crawlerLogFile = inputDir.resolve("crawler.log");
|
||||
|
||||
for (var item : WorkLog.iterable(crawlerLogFile)) {
|
||||
if (Thread.interrupted()) {
|
||||
fjp.shutdownNow();
|
||||
sjp.shutDownNow();
|
||||
|
||||
throw new InterruptedException();
|
||||
}
|
||||
|
||||
Path crawlDataPath = inputDir.resolve(item.relPath());
|
||||
fjp.execute(() -> processFile(crawlDataPath, counts, docCount, se.get()));
|
||||
sjp.submitQuietly(() -> processFile(crawlDataPath, counts, docCount, se.get()));
|
||||
}
|
||||
|
||||
while (!fjp.isQuiescent()) {
|
||||
if (fjp.awaitQuiescence(10, TimeUnit.SECONDS))
|
||||
break;
|
||||
}
|
||||
}
|
||||
sjp.shutDown();
|
||||
sjp.awaitTermination(10, TimeUnit.DAYS);
|
||||
|
||||
var tmpFile = Files.createTempFile(destStorage.asPath(), "freqs", ".dat.tmp",
|
||||
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rw-r--r--")));
|
||||
|
Loading…
Reference in New Issue
Block a user