mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(index) Adjust proximity score
This commit is contained in:
parent
6eb0f13411
commit
7f498e10b7
@ -27,9 +27,7 @@ import nu.marginalia.sequence.SequenceOperations;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.util.ArrayList;
|
||||
import java.util.BitSet;
|
||||
import java.util.List;
|
||||
|
||||
import static nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates.booleanAggregate;
|
||||
import static nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates.intMaxMinAggregate;
|
||||
@ -225,19 +223,11 @@ public class IndexResultScoreCalculator {
|
||||
float[] weightedCounts = new float[compiledQuery.size()];
|
||||
float keywordMinDistFac = 0;
|
||||
if (positions.length > 2) {
|
||||
List<IntIterator> iterators = new ArrayList<>(positions.length);
|
||||
|
||||
for (int i = 0; i < positions.length; i++) {
|
||||
if (positions[i] != null && ctx.regularMask.get(i)) {
|
||||
iterators.add(positions[i].iterator());
|
||||
}
|
||||
}
|
||||
|
||||
int minDist = SequenceOperations.minDistance(iterators);
|
||||
if (minDist > 0) {
|
||||
int minDist = constraintGroups.getFullGroup().minDistance(positions);
|
||||
if (minDist > 0 && minDist < Integer.MAX_VALUE) {
|
||||
if (minDist < 32) {
|
||||
// If min-dist is sufficiently small, we give a tapering reward to the document
|
||||
keywordMinDistFac = 2.0f / (1.f + (float) Math.sqrt(minDist));
|
||||
keywordMinDistFac = 2.0f / (0.1f + (float) Math.sqrt(minDist));
|
||||
} else {
|
||||
// if it is too large, we add a mounting penalty
|
||||
keywordMinDistFac = -1.0f * (float) Math.sqrt(minDist);
|
||||
|
@ -193,5 +193,30 @@ public class PhraseConstraintGroupList {
|
||||
return false;
|
||||
}
|
||||
|
||||
public int minDistance(CodedSequence[] positions) {
|
||||
IntIterator[] sequences = new IntIterator[present.cardinality()];
|
||||
|
||||
for (int oi = 0, si = 0; oi < offsets.length; oi++) {
|
||||
if (!present.get(oi)) {
|
||||
continue;
|
||||
}
|
||||
int offset = offsets[oi];
|
||||
if (offset < 0)
|
||||
return Integer.MAX_VALUE;
|
||||
|
||||
// Create iterators that are offset by their relative position in the
|
||||
// sequence. This is done by subtracting the index from the offset,
|
||||
// so that when we intersect them, an overlap means that the terms are
|
||||
// in the correct order. Note the offset is negative!
|
||||
|
||||
var posForTerm = positions[offset];
|
||||
if (posForTerm == null) {
|
||||
return Integer.MAX_VALUE;
|
||||
}
|
||||
sequences[si++] = posForTerm.offsetIterator(-oi);
|
||||
}
|
||||
|
||||
return SequenceOperations.minDistance(sequences);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4,8 +4,6 @@ import it.unimi.dsi.fastutil.ints.IntArrayList;
|
||||
import it.unimi.dsi.fastutil.ints.IntIterator;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class SequenceOperations {
|
||||
|
||||
/** Return true if the sequences intersect, false otherwise.
|
||||
@ -145,21 +143,20 @@ public class SequenceOperations {
|
||||
return minDistance;
|
||||
}
|
||||
|
||||
public static int minDistance(List<IntIterator> iterators) {
|
||||
if (iterators.size() <= 1)
|
||||
public static int minDistance(IntIterator[] iterators) {
|
||||
if (iterators.length <= 1)
|
||||
return 0;
|
||||
|
||||
int[] values = new int[iterators.size()];
|
||||
int[] values = new int[iterators.length];
|
||||
|
||||
for (int i = 0; i < iterators.size(); i++) {
|
||||
if (iterators.get(i).hasNext())
|
||||
values[i] = iterators.get(i).nextInt();
|
||||
for (int i = 0; i < iterators.length; i++) {
|
||||
if (iterators[i].hasNext())
|
||||
values[i] = iterators[i].nextInt();
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
int minDist = Integer.MAX_VALUE;
|
||||
int successes = 0;
|
||||
|
||||
int minVal = Integer.MAX_VALUE;
|
||||
int maxVal = Integer.MIN_VALUE;
|
||||
@ -171,13 +168,13 @@ public class SequenceOperations {
|
||||
|
||||
minDist = Math.min(minDist, maxVal - minVal);
|
||||
|
||||
for (int i = 0; successes < iterators.size(); i = (i + 1) % iterators.size())
|
||||
for (int i = 0;; i = (i + 1) % iterators.length)
|
||||
{
|
||||
if (values[i] == minVal) {
|
||||
if (!iterators.get(i).hasNext()) {
|
||||
if (!iterators[i].hasNext()) {
|
||||
break;
|
||||
}
|
||||
values[i] = iterators.get(i).nextInt();
|
||||
values[i] = iterators[i].nextInt();
|
||||
|
||||
if (values[i] > maxVal) {
|
||||
maxVal = values[i];
|
||||
|
@ -1,10 +1,10 @@
|
||||
package nu.marginalia.sequence;
|
||||
|
||||
import it.unimi.dsi.fastutil.ints.IntIterator;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
@ -91,8 +91,6 @@ class SequenceOperationsTest {
|
||||
GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 20, 50, 100);
|
||||
GammaCodedSequence seq3 = GammaCodedSequence.generate(wa, 30, 60, 90);
|
||||
|
||||
assertEquals(19, SequenceOperations.minDistance(List.of(seq1.iterator(), seq2.iterator(), seq3.iterator())));
|
||||
|
||||
|
||||
assertEquals(19, SequenceOperations.minDistance(new IntIterator[]{seq1.iterator(), seq2.iterator(), seq3.iterator()}));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user