mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(index) Optimize DocumentSpan.countIntersections
This commit is contained in:
parent
893fae6d59
commit
9c5f463775
@ -4,6 +4,8 @@ import it.unimi.dsi.fastutil.ints.IntIterator;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import nu.marginalia.sequence.CodedSequence;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class DocumentSpan {
|
||||
|
||||
/** A list of the interlaced start and end positions of each span in the document of this type */
|
||||
@ -17,22 +19,42 @@ public class DocumentSpan {
|
||||
this.startsEnds = null;
|
||||
}
|
||||
|
||||
public int countIntersections(IntList positions) {
|
||||
if (null == startsEnds || startsEnds.isEmpty() || positions.isEmpty()) {
|
||||
public int countIntersections(int[] positions) {
|
||||
if (null == startsEnds || startsEnds.isEmpty() || positions.length == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int cnt = 0;
|
||||
int seis = 0;
|
||||
|
||||
for (int pi = 0; pi < positions.size(); pi++) {
|
||||
int position = positions.getInt(pi);
|
||||
if (positions.length < 8) {
|
||||
int seis = 0;
|
||||
|
||||
for (int sei = seis; sei < startsEnds.size(); sei ++) {
|
||||
if (startsEnds.getInt(sei) > position) {
|
||||
cnt += sei % 2;
|
||||
seis = Math.max(seis, sei - 1);
|
||||
break;
|
||||
for (int pi = 0; pi < positions.length; pi++) {
|
||||
int position = positions[pi];
|
||||
|
||||
for (int sei = seis; sei < startsEnds.size(); sei ++) {
|
||||
if (startsEnds.getInt(sei) > position) {
|
||||
cnt += sei % 2;
|
||||
seis = Math.max(seis, sei - 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int sei = 0; sei < startsEnds.size(); ) {
|
||||
int start = startsEnds.getInt(sei++);
|
||||
int end = startsEnds.getInt(sei++);
|
||||
|
||||
int i = Arrays.binarySearch(positions, start);
|
||||
if (i < 0) {
|
||||
i = -i - 1;
|
||||
}
|
||||
while (i < positions.length && positions[i] < end) {
|
||||
cnt++;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -250,8 +250,10 @@ public class IndexResultScoreCalculator {
|
||||
|
||||
int firstPosition = 1;
|
||||
for (int i = 0; i < weightedCounts.length; i++) {
|
||||
|
||||
if (positions[i] != null && ctx.regularMask.get(i)) {
|
||||
searchableKeywordsCount ++;
|
||||
int[] posArray = positions[i].toIntArray();
|
||||
|
||||
for (int idx = 0; idx < positions[i].size(); idx++) {
|
||||
int pos = positions[i].getInt(idx);
|
||||
@ -259,24 +261,24 @@ public class IndexResultScoreCalculator {
|
||||
}
|
||||
|
||||
int cnt;
|
||||
if ((cnt = spans.title.countIntersections(positions[i])) != 0) {
|
||||
if ((cnt = spans.title.countIntersections(posArray)) != 0) {
|
||||
unorderedMatchInTitleCount++;
|
||||
weightedCounts[i] += 2.5f * cnt;
|
||||
}
|
||||
if ((cnt = spans.heading.countIntersections(positions[i])) != 0) {
|
||||
if ((cnt = spans.heading.countIntersections(posArray)) != 0) {
|
||||
unorderedMatchInHeadingCount++;
|
||||
weightedCounts[i] += 2.5f * cnt;
|
||||
}
|
||||
if ((cnt = spans.code.countIntersections(positions[i])) != 0) {
|
||||
if ((cnt = spans.code.countIntersections(posArray)) != 0) {
|
||||
weightedCounts[i] += 0.25f * cnt;
|
||||
}
|
||||
if ((cnt = spans.anchor.countIntersections(positions[i])) != 0) {
|
||||
if ((cnt = spans.anchor.countIntersections(posArray)) != 0) {
|
||||
weightedCounts[i] += 0.2f * cnt;
|
||||
}
|
||||
if ((cnt = spans.nav.countIntersections(positions[i])) != 0) {
|
||||
if ((cnt = spans.nav.countIntersections(posArray)) != 0) {
|
||||
weightedCounts[i] += 0.1f * cnt;
|
||||
}
|
||||
if ((cnt = spans.body.countIntersections(positions[i])) != 0) {
|
||||
if ((cnt = spans.body.countIntersections(posArray)) != 0) {
|
||||
weightedCounts[i] += 1.0f * cnt;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user