(index) Fix TCF bug where the ngram terms would be considered instead of the regular ones due to a logical derp

This commit is contained in:
Viktor Lofgren 2024-04-19 12:19:26 +02:00
parent b80a83339b
commit 0dcca0cb83
2 changed files with 9 additions and 2 deletions

View File

@ -34,7 +34,11 @@ public class ResultRankingContext {
this.ngramsMask = ngramsMask; this.ngramsMask = ngramsMask;
this.regularMask = new BitSet(ngramsMask.length()); this.regularMask = new BitSet(ngramsMask.length());
this.regularMask.xor(ngramsMask); for (int i = 0; i < ngramsMask.length(); i++) {
if (!ngramsMask.get(i)) {
regularMask.set(i);
}
}
this.fullCounts = fullCounts; this.fullCounts = fullCounts;
this.priorityCounts = prioCounts; this.priorityCounts = prioCounts;

View File

@ -14,6 +14,9 @@ public class TermCoherenceFactor {
* found in the same sentences. * found in the same sentences.
*/ */
public double calculateOverlap(CompiledQueryLong wordMetadataQuery) { public double calculateOverlap(CompiledQueryLong wordMetadataQuery) {
if (wordMetadataQuery.size() <= 2)
return 0;
long mask = CompiledQueryAggregates.longBitmaskAggregate(wordMetadataQuery, long mask = CompiledQueryAggregates.longBitmaskAggregate(wordMetadataQuery,
score -> score >>> WordMetadata.POSITIONS_SHIFT); score -> score >>> WordMetadata.POSITIONS_SHIFT);
@ -62,7 +65,7 @@ public class TermCoherenceFactor {
} }
} }
if (cnt != 0) { if (cnt > 0) {
return sum / cnt; return sum / cnt;
} else { } else {
return 0; return 0;