mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
Clean up KeywordMetadata
This commit is contained in:
parent
d5df3268b3
commit
2e4532ca90
@ -154,7 +154,7 @@ public class DocumentKeywordExtractor {
|
||||
if (!word.isStopWord()) {
|
||||
String w = AsciiFlattener.flattenUnicode(word.wordLowerCase());
|
||||
if (WordPatterns.singleWordQualitiesPredicate.test(w)) {
|
||||
wordsBuilder.add(w, metadata.forWord(flagsTemplate, word.stemmed()));
|
||||
wordsBuilder.add(w, metadata.getMetadataForWord(flagsTemplate, word.stemmed()));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -163,7 +163,7 @@ public class DocumentKeywordExtractor {
|
||||
var rep = new WordRep(sent, names);
|
||||
String w = AsciiFlattener.flattenUnicode(rep.word);
|
||||
|
||||
wordsBuilder.add(w, metadata.forWord(flagsTemplate, rep.stemmed));
|
||||
wordsBuilder.add(w, metadata.getMetadataForWord(flagsTemplate, rep.stemmed));
|
||||
}
|
||||
}
|
||||
|
||||
@ -217,7 +217,7 @@ public class DocumentKeywordExtractor {
|
||||
continue;
|
||||
}
|
||||
|
||||
wordsBuilder.add(flatWord, metadata.forWord(metadata.wordFlagsTemplate(), word.stemmed) | additionalMeta);
|
||||
wordsBuilder.add(flatWord, metadata.getMetadataForWord(metadata.wordFlagsTemplate(), word.stemmed) | additionalMeta);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,8 +17,6 @@ public record KeywordMetadata(HashSet<String> titleKeywords,
|
||||
)
|
||||
{
|
||||
|
||||
private static final KeywordCounter.WordFrequencyData empty = new KeywordCounter.WordFrequencyData(0, 0);
|
||||
|
||||
public KeywordMetadata(EnumSet<EdgePageWordFlags> flags) {
|
||||
this(new HashSet<>(50), new HashSet<>(10), new HashSet<>(50),
|
||||
new HashMap<>(15_000),
|
||||
@ -30,7 +28,8 @@ public record KeywordMetadata(HashSet<String> titleKeywords,
|
||||
this(EnumSet.noneOf(EdgePageWordFlags.class));
|
||||
}
|
||||
|
||||
public long forWord(EnumSet<EdgePageWordFlags> flagsTemplate, String stemmed) {
|
||||
private static final KeywordCounter.WordFrequencyData empty = new KeywordCounter.WordFrequencyData(0, 0);
|
||||
public long getMetadataForWord(EnumSet<EdgePageWordFlags> flagsTemplate, String stemmed) {
|
||||
|
||||
KeywordCounter.WordFrequencyData tfidf = wordsTfIdf.getOrDefault(stemmed, empty);
|
||||
EnumSet<EdgePageWordFlags> flags = flagsTemplate.clone();
|
||||
|
Loading…
Reference in New Issue
Block a user