mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(index) Adjust proximity score
This commit is contained in:
parent
6eb0f13411
commit
7f498e10b7
@ -27,9 +27,7 @@ import nu.marginalia.sequence.SequenceOperations;
|
|||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.lang.foreign.Arena;
|
import java.lang.foreign.Arena;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.BitSet;
|
import java.util.BitSet;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import static nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates.booleanAggregate;
|
import static nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates.booleanAggregate;
|
||||||
import static nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates.intMaxMinAggregate;
|
import static nu.marginalia.api.searchquery.model.compiled.aggregate.CompiledQueryAggregates.intMaxMinAggregate;
|
||||||
@ -225,19 +223,11 @@ public class IndexResultScoreCalculator {
|
|||||||
float[] weightedCounts = new float[compiledQuery.size()];
|
float[] weightedCounts = new float[compiledQuery.size()];
|
||||||
float keywordMinDistFac = 0;
|
float keywordMinDistFac = 0;
|
||||||
if (positions.length > 2) {
|
if (positions.length > 2) {
|
||||||
List<IntIterator> iterators = new ArrayList<>(positions.length);
|
int minDist = constraintGroups.getFullGroup().minDistance(positions);
|
||||||
|
if (minDist > 0 && minDist < Integer.MAX_VALUE) {
|
||||||
for (int i = 0; i < positions.length; i++) {
|
|
||||||
if (positions[i] != null && ctx.regularMask.get(i)) {
|
|
||||||
iterators.add(positions[i].iterator());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int minDist = SequenceOperations.minDistance(iterators);
|
|
||||||
if (minDist > 0) {
|
|
||||||
if (minDist < 32) {
|
if (minDist < 32) {
|
||||||
// If min-dist is sufficiently small, we give a tapering reward to the document
|
// If min-dist is sufficiently small, we give a tapering reward to the document
|
||||||
keywordMinDistFac = 2.0f / (1.f + (float) Math.sqrt(minDist));
|
keywordMinDistFac = 2.0f / (0.1f + (float) Math.sqrt(minDist));
|
||||||
} else {
|
} else {
|
||||||
// if it is too large, we add a mounting penalty
|
// if it is too large, we add a mounting penalty
|
||||||
keywordMinDistFac = -1.0f * (float) Math.sqrt(minDist);
|
keywordMinDistFac = -1.0f * (float) Math.sqrt(minDist);
|
||||||
|
@ -193,5 +193,30 @@ public class PhraseConstraintGroupList {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int minDistance(CodedSequence[] positions) {
|
||||||
|
IntIterator[] sequences = new IntIterator[present.cardinality()];
|
||||||
|
|
||||||
|
for (int oi = 0, si = 0; oi < offsets.length; oi++) {
|
||||||
|
if (!present.get(oi)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int offset = offsets[oi];
|
||||||
|
if (offset < 0)
|
||||||
|
return Integer.MAX_VALUE;
|
||||||
|
|
||||||
|
// Create iterators that are offset by their relative position in the
|
||||||
|
// sequence. This is done by subtracting the index from the offset,
|
||||||
|
// so that when we intersect them, an overlap means that the terms are
|
||||||
|
// in the correct order. Note the offset is negative!
|
||||||
|
|
||||||
|
var posForTerm = positions[offset];
|
||||||
|
if (posForTerm == null) {
|
||||||
|
return Integer.MAX_VALUE;
|
||||||
|
}
|
||||||
|
sequences[si++] = posForTerm.offsetIterator(-oi);
|
||||||
|
}
|
||||||
|
|
||||||
|
return SequenceOperations.minDistance(sequences);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,8 +4,6 @@ import it.unimi.dsi.fastutil.ints.IntArrayList;
|
|||||||
import it.unimi.dsi.fastutil.ints.IntIterator;
|
import it.unimi.dsi.fastutil.ints.IntIterator;
|
||||||
import it.unimi.dsi.fastutil.ints.IntList;
|
import it.unimi.dsi.fastutil.ints.IntList;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class SequenceOperations {
|
public class SequenceOperations {
|
||||||
|
|
||||||
/** Return true if the sequences intersect, false otherwise.
|
/** Return true if the sequences intersect, false otherwise.
|
||||||
@ -145,21 +143,20 @@ public class SequenceOperations {
|
|||||||
return minDistance;
|
return minDistance;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static int minDistance(List<IntIterator> iterators) {
|
public static int minDistance(IntIterator[] iterators) {
|
||||||
if (iterators.size() <= 1)
|
if (iterators.length <= 1)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
int[] values = new int[iterators.size()];
|
int[] values = new int[iterators.length];
|
||||||
|
|
||||||
for (int i = 0; i < iterators.size(); i++) {
|
for (int i = 0; i < iterators.length; i++) {
|
||||||
if (iterators.get(i).hasNext())
|
if (iterators[i].hasNext())
|
||||||
values[i] = iterators.get(i).nextInt();
|
values[i] = iterators[i].nextInt();
|
||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int minDist = Integer.MAX_VALUE;
|
int minDist = Integer.MAX_VALUE;
|
||||||
int successes = 0;
|
|
||||||
|
|
||||||
int minVal = Integer.MAX_VALUE;
|
int minVal = Integer.MAX_VALUE;
|
||||||
int maxVal = Integer.MIN_VALUE;
|
int maxVal = Integer.MIN_VALUE;
|
||||||
@ -171,13 +168,13 @@ public class SequenceOperations {
|
|||||||
|
|
||||||
minDist = Math.min(minDist, maxVal - minVal);
|
minDist = Math.min(minDist, maxVal - minVal);
|
||||||
|
|
||||||
for (int i = 0; successes < iterators.size(); i = (i + 1) % iterators.size())
|
for (int i = 0;; i = (i + 1) % iterators.length)
|
||||||
{
|
{
|
||||||
if (values[i] == minVal) {
|
if (values[i] == minVal) {
|
||||||
if (!iterators.get(i).hasNext()) {
|
if (!iterators[i].hasNext()) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
values[i] = iterators.get(i).nextInt();
|
values[i] = iterators[i].nextInt();
|
||||||
|
|
||||||
if (values[i] > maxVal) {
|
if (values[i] > maxVal) {
|
||||||
maxVal = values[i];
|
maxVal = values[i];
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
package nu.marginalia.sequence;
|
package nu.marginalia.sequence;
|
||||||
|
|
||||||
|
import it.unimi.dsi.fastutil.ints.IntIterator;
|
||||||
import it.unimi.dsi.fastutil.ints.IntList;
|
import it.unimi.dsi.fastutil.ints.IntList;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.*;
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
@ -91,8 +91,6 @@ class SequenceOperationsTest {
|
|||||||
GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 20, 50, 100);
|
GammaCodedSequence seq2 = GammaCodedSequence.generate(wa, 20, 50, 100);
|
||||||
GammaCodedSequence seq3 = GammaCodedSequence.generate(wa, 30, 60, 90);
|
GammaCodedSequence seq3 = GammaCodedSequence.generate(wa, 30, 60, 90);
|
||||||
|
|
||||||
assertEquals(19, SequenceOperations.minDistance(List.of(seq1.iterator(), seq2.iterator(), seq3.iterator())));
|
assertEquals(19, SequenceOperations.minDistance(new IntIterator[]{seq1.iterator(), seq2.iterator(), seq3.iterator()}));
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user