From fa162698c2a358ce8e0b4f96d38f9947f435f4c7 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 15 Jul 2024 05:15:30 +0200 Subject: [PATCH] (term-frequency) Fix concurrency issues in SentenceExtractor and TermFrequencyExporter How'd This Ever Work? (tm) TermFrequencyExporter was using Math.clamp() incorrectly, and SentenceExtractor was synchronizing on its own instance when initializing shared static members, causing rare issues when spinning multiple SE:s up at once. --- .../java/nu/marginalia/extractor/TermFrequencyExporter.java | 2 +- .../java/nu/marginalia/language/sentence/SentenceExtractor.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/code/features-convert/data-extractors/java/nu/marginalia/extractor/TermFrequencyExporter.java b/code/features-convert/data-extractors/java/nu/marginalia/extractor/TermFrequencyExporter.java index 18fb3261..3bcc9cf2 100644 --- a/code/features-convert/data-extractors/java/nu/marginalia/extractor/TermFrequencyExporter.java +++ b/code/features-convert/data-extractors/java/nu/marginalia/extractor/TermFrequencyExporter.java @@ -54,7 +54,7 @@ public class TermFrequencyExporter implements ExporterIf { TLongIntHashMap counts = new TLongIntHashMap(100_000_000, 0.7f, -1, -1); AtomicInteger docCount = new AtomicInteger(); - SimpleBlockingThreadPool sjp = new SimpleBlockingThreadPool("exporter", Math.clamp(2, 16, Runtime.getRuntime().availableProcessors() / 2), 4); + SimpleBlockingThreadPool sjp = new SimpleBlockingThreadPool("exporter", Math.clamp(Runtime.getRuntime().availableProcessors() / 2, 2, 16), 4); Path crawlerLogFile = inputDir.resolve("crawler.log"); for (var item : WorkLog.iterable(crawlerLogFile)) { diff --git a/code/libraries/language-processing/java/nu/marginalia/language/sentence/SentenceExtractor.java b/code/libraries/language-processing/java/nu/marginalia/language/sentence/SentenceExtractor.java index bb1e3771..8dd818a3 100644 --- a/code/libraries/language-processing/java/nu/marginalia/language/sentence/SentenceExtractor.java +++ b/code/libraries/language-processing/java/nu/marginalia/language/sentence/SentenceExtractor.java @@ -59,7 +59,7 @@ public class SentenceExtractor { logger.error("Could not initialize sentence detector", ex); } - synchronized (this) { + synchronized (SentenceExtractor.class) { if (ngramLexicon == null) { ngramLexicon = new NgramLexicon(models); }