(index) Optimize DocumentSpan.countIntersections

This commit is contained in:
Viktor Lofgren 2024-08-25 13:59:11 +02:00
parent 893fae6d59
commit 9c5f463775
2 changed files with 40 additions and 16 deletions

View File

@ -4,6 +4,8 @@ import it.unimi.dsi.fastutil.ints.IntIterator;
import it.unimi.dsi.fastutil.ints.IntList;
import nu.marginalia.sequence.CodedSequence;
import java.util.Arrays;
public class DocumentSpan {
/** A list of the interlaced start and end positions of each span in the document of this type */
@ -17,22 +19,42 @@ public class DocumentSpan {
this.startsEnds = null;
}
public int countIntersections(IntList positions) {
if (null == startsEnds || startsEnds.isEmpty() || positions.isEmpty()) {
public int countIntersections(int[] positions) {
if (null == startsEnds || startsEnds.isEmpty() || positions.length == 0) {
return 0;
}
int cnt = 0;
int seis = 0;
for (int pi = 0; pi < positions.size(); pi++) {
int position = positions.getInt(pi);
if (positions.length < 8) {
int seis = 0;
for (int sei = seis; sei < startsEnds.size(); sei ++) {
if (startsEnds.getInt(sei) > position) {
cnt += sei % 2;
seis = Math.max(seis, sei - 1);
break;
for (int pi = 0; pi < positions.length; pi++) {
int position = positions[pi];
for (int sei = seis; sei < startsEnds.size(); sei ++) {
if (startsEnds.getInt(sei) > position) {
cnt += sei % 2;
seis = Math.max(seis, sei - 1);
break;
}
}
}
}
else {
for (int sei = 0; sei < startsEnds.size(); ) {
int start = startsEnds.getInt(sei++);
int end = startsEnds.getInt(sei++);
int i = Arrays.binarySearch(positions, start);
if (i < 0) {
i = -i - 1;
}
while (i < positions.length && positions[i] < end) {
cnt++;
i++;
}
}
}

View File

@ -250,8 +250,10 @@ public class IndexResultScoreCalculator {
int firstPosition = 1;
for (int i = 0; i < weightedCounts.length; i++) {
if (positions[i] != null && ctx.regularMask.get(i)) {
searchableKeywordsCount ++;
int[] posArray = positions[i].toIntArray();
for (int idx = 0; idx < positions[i].size(); idx++) {
int pos = positions[i].getInt(idx);
@ -259,24 +261,24 @@ public class IndexResultScoreCalculator {
}
int cnt;
if ((cnt = spans.title.countIntersections(positions[i])) != 0) {
if ((cnt = spans.title.countIntersections(posArray)) != 0) {
unorderedMatchInTitleCount++;
weightedCounts[i] += 2.5f * cnt;
}
if ((cnt = spans.heading.countIntersections(positions[i])) != 0) {
if ((cnt = spans.heading.countIntersections(posArray)) != 0) {
unorderedMatchInHeadingCount++;
weightedCounts[i] += 2.5f * cnt;
}
if ((cnt = spans.code.countIntersections(positions[i])) != 0) {
if ((cnt = spans.code.countIntersections(posArray)) != 0) {
weightedCounts[i] += 0.25f * cnt;
}
if ((cnt = spans.anchor.countIntersections(positions[i])) != 0) {
if ((cnt = spans.anchor.countIntersections(posArray)) != 0) {
weightedCounts[i] += 0.2f * cnt;
}
if ((cnt = spans.nav.countIntersections(positions[i])) != 0) {
if ((cnt = spans.nav.countIntersections(posArray)) != 0) {
weightedCounts[i] += 0.1f * cnt;
}
if ((cnt = spans.body.countIntersections(positions[i])) != 0) {
if ((cnt = spans.body.countIntersections(posArray)) != 0) {
weightedCounts[i] += 1.0f * cnt;
}
}