(index) Adjust proximity score

This commit is contained in:
Viktor Lofgren 2024-08-25 11:01:35 +02:00
parent 6eb0f13411
commit 7f498e10b7
4 changed files with 39 additions and 29 deletions

View File

@ -27,9 +27,7 @@ import nu.marginalia.sequence.SequenceOperations;
import javax.annotation.Nullable;
import java.lang.foreign.Arena;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import static nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates.booleanAggregate;
import static nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates.intMaxMinAggregate;
@ -225,19 +223,11 @@ public class IndexResultScoreCalculator {
float[] weightedCounts = new float[compiledQuery.size()];
float keywordMinDistFac = 0;
if (positions.length > 2) {
List<IntIterator> iterators = new ArrayList<>(positions.length);
for (int i = 0; i < positions.length; i++) {
if (positions[i] != null && ctx.regularMask.get(i)) {
iterators.add(positions[i].iterator());
}
}
int minDist = SequenceOperations.minDistance(iterators);
if (minDist > 0) {
int minDist = constraintGroups.getFullGroup().minDistance(positions);
if (minDist > 0 && minDist < Integer.MAX_VALUE) {
if (minDist < 32) {
// If min-dist is sufficiently small, we give a tapering reward to the document
keywordMinDistFac = 2.0f / (1.f + (float) Math.sqrt(minDist));
keywordMinDistFac = 2.0f / (0.1f + (float) Math.sqrt(minDist));
} else {
// if it is too large, we add a mounting penalty
keywordMinDistFac = -1.0f * (float) Math.sqrt(minDist);

View File

@ -193,5 +193,30 @@ public class PhraseConstraintGroupList {
return false;
}
public int minDistance(CodedSequence[] positions) {
IntIterator[] sequences = new IntIterator[present.cardinality()];
for (int oi = 0, si = 0; oi < offsets.length; oi++) {
if (!present.get(oi)) {
continue;
}
int offset = offsets[oi];
if (offset < 0)
return Integer.MAX_VALUE;
// Create iterators that are offset by their relative position in the
// sequence. This is done by subtracting the index from the offset,
// so that when we intersect them, an overlap means that the terms are
// in the correct order. Note the offset is negative!
var posForTerm = positions[offset];
if (posForTerm == null) {
return Integer.MAX_VALUE;
}
sequences[si++] = posForTerm.offsetIterator(-oi);
}
return SequenceOperations.minDistance(sequences);
}
}
}

View File

@ -4,8 +4,6 @@ import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.ints.IntList;
import java.util.List;
public class SequenceOperations {
/** Return true if the sequences intersect, false otherwise.
@ -145,21 +143,20 @@ public class SequenceOperations {
return minDistance;
}
public static int minDistance(List<IntIterator> iterators) {
if (iterators.size() <= 1)
public static int minDistance(IntIterator[] iterators) {
if (iterators.length <= 1)
return 0;
int[] values = new int[iterators.size()];
int[] values = new int[iterators.length];
for (int i = 0; i < iterators.size(); i++) {
if (iterators.get(i).hasNext())
values[i] = iterators.get(i).nextInt();
for (int i = 0; i < iterators.length; i++) {
if (iterators[i].hasNext())
values[i] = iterators[i].nextInt();
else
return 0;
}
int minDist = Integer.MAX_VALUE;
int successes = 0;
int minVal = Integer.MAX_VALUE;
int maxVal = Integer.MIN_VALUE;
@ -171,13 +168,13 @@ public class SequenceOperations {
minDist = Math.min(minDist, maxVal - minVal);
for (int i = 0; successes < iterators.size(); i = (i + 1) % iterators.size())
for (int i = 0;; i = (i + 1) % iterators.length)
{
if (values[i] == minVal) {
if (!iterators.get(i).hasNext()) {
if (!iterators[i].hasNext()) {
break;
}
values[i] = iterators.get(i).nextInt();
values[i] = iterators[i].nextInt();
if (values[i] > maxVal) {
maxVal = values[i];

View File

@ -1,10 +1,10 @@
package nu.marginalia.sequence;
import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.ints.IntList;
import org.junit.jupiter.api.Test;
import java.nio.ByteBuffer;
import java.util.List;
import static org.junit.jupiter.api.Assertions.*;
@ -91,8 +91,6 @@ class SequenceOperationsTest {
GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 20, 50, 100);
GammaCodedSequence seq3 = GammaCodedSequence.generate(wa, 30, 60, 90);
assertEquals(19, SequenceOperations.minDistance(List.of(seq1.iterator(), seq2.iterator(), seq3.iterator())));
assertEquals(19, SequenceOperations.minDistance(new IntIterator[]{seq1.iterator(), seq2.iterator(), seq3.iterator()}));
}
}