(term-frequency-dict) Fix memory leak in TermFrequencyDict

This commit is contained in:
Viktor Lofgren 2023-10-04 11:55:11 +02:00
parent 405300b4b2
commit 54c8e13a68

View File

@ -5,6 +5,7 @@ import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
import lombok.SneakyThrows;
import nu.marginalia.LanguageModels;
import nu.marginalia.array.LongArray;
import nu.marginalia.array.LongArrayFactory;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
@ -40,7 +41,7 @@ public class TermFrequencyDict {
}
private static Long2IntOpenHashMap load(Path file) throws IOException {
LongArray array = LongArray.mmapRead(file);
try (LongArray array = LongArrayFactory.mmapForReadingConfined(file)) {
int size = (int) Files.size(file) / 16;
var ret = new Long2IntOpenHashMap(size, 0.5f);
@ -53,6 +54,7 @@ public class TermFrequencyDict {
return ret;
}
}
/** Total number of documents in the corpus */
public int docCount() {