mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(term-frequency) Fix concurrency issues in SentenceExtractor and TermFrequencyExporter
How'd This Ever Work? (tm) TermFrequencyExporter was using Math.clamp() incorrectly, and SentenceExtractor was synchronizing on its own instance when initializing shared static members, causing rare issues when spinning multiple SE:s up at once.
This commit is contained in:
parent
6401a513d7
commit
ffd970036d
@ -54,7 +54,7 @@ public class TermFrequencyExporter implements ExporterIf {
|
|||||||
TLongIntHashMap counts = new TLongIntHashMap(100_000_000, 0.7f, -1, -1);
|
TLongIntHashMap counts = new TLongIntHashMap(100_000_000, 0.7f, -1, -1);
|
||||||
AtomicInteger docCount = new AtomicInteger();
|
AtomicInteger docCount = new AtomicInteger();
|
||||||
|
|
||||||
SimpleBlockingThreadPool sjp = new SimpleBlockingThreadPool("exporter", Math.clamp(2, 16, Runtime.getRuntime().availableProcessors() / 2), 4);
|
SimpleBlockingThreadPool sjp = new SimpleBlockingThreadPool("exporter", Math.clamp(Runtime.getRuntime().availableProcessors() / 2, 2, 16), 4);
|
||||||
Path crawlerLogFile = inputDir.resolve("crawler.log");
|
Path crawlerLogFile = inputDir.resolve("crawler.log");
|
||||||
|
|
||||||
for (var item : WorkLog.iterable(crawlerLogFile)) {
|
for (var item : WorkLog.iterable(crawlerLogFile)) {
|
||||||
|
@ -59,7 +59,7 @@ public class SentenceExtractor {
|
|||||||
logger.error("Could not initialize sentence detector", ex);
|
logger.error("Could not initialize sentence detector", ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
synchronized (this) {
|
synchronized (SentenceExtractor.class) {
|
||||||
if (ngramLexicon == null) {
|
if (ngramLexicon == null) {
|
||||||
ngramLexicon = new NgramLexicon(models);
|
ngramLexicon = new NgramLexicon(models);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user