mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(index) Optimize DocumentSpan.countIntersections
This commit is contained in:
parent
893fae6d59
commit
9c5f463775
@ -4,6 +4,8 @@ import it.unimi.dsi.fastutil.ints.IntIterator;
|
|||||||
import it.unimi.dsi.fastutil.ints.IntList;
|
import it.unimi.dsi.fastutil.ints.IntList;
|
||||||
import nu.marginalia.sequence.CodedSequence;
|
import nu.marginalia.sequence.CodedSequence;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
public class DocumentSpan {
|
public class DocumentSpan {
|
||||||
|
|
||||||
/** A list of the interlaced start and end positions of each span in the document of this type */
|
/** A list of the interlaced start and end positions of each span in the document of this type */
|
||||||
@ -17,22 +19,42 @@ public class DocumentSpan {
|
|||||||
this.startsEnds = null;
|
this.startsEnds = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int countIntersections(IntList positions) {
|
public int countIntersections(int[] positions) {
|
||||||
if (null == startsEnds || startsEnds.isEmpty() || positions.isEmpty()) {
|
if (null == startsEnds || startsEnds.isEmpty() || positions.length == 0) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
int cnt = 0;
|
int cnt = 0;
|
||||||
int seis = 0;
|
|
||||||
|
|
||||||
for (int pi = 0; pi < positions.size(); pi++) {
|
if (positions.length < 8) {
|
||||||
int position = positions.getInt(pi);
|
int seis = 0;
|
||||||
|
|
||||||
for (int sei = seis; sei < startsEnds.size(); sei ++) {
|
for (int pi = 0; pi < positions.length; pi++) {
|
||||||
if (startsEnds.getInt(sei) > position) {
|
int position = positions[pi];
|
||||||
cnt += sei % 2;
|
|
||||||
seis = Math.max(seis, sei - 1);
|
for (int sei = seis; sei < startsEnds.size(); sei ++) {
|
||||||
break;
|
if (startsEnds.getInt(sei) > position) {
|
||||||
|
cnt += sei % 2;
|
||||||
|
seis = Math.max(seis, sei - 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
for (int sei = 0; sei < startsEnds.size(); ) {
|
||||||
|
int start = startsEnds.getInt(sei++);
|
||||||
|
int end = startsEnds.getInt(sei++);
|
||||||
|
|
||||||
|
int i = Arrays.binarySearch(positions, start);
|
||||||
|
if (i < 0) {
|
||||||
|
i = -i - 1;
|
||||||
|
}
|
||||||
|
while (i < positions.length && positions[i] < end) {
|
||||||
|
cnt++;
|
||||||
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -250,8 +250,10 @@ public class IndexResultScoreCalculator {
|
|||||||
|
|
||||||
int firstPosition = 1;
|
int firstPosition = 1;
|
||||||
for (int i = 0; i < weightedCounts.length; i++) {
|
for (int i = 0; i < weightedCounts.length; i++) {
|
||||||
|
|
||||||
if (positions[i] != null && ctx.regularMask.get(i)) {
|
if (positions[i] != null && ctx.regularMask.get(i)) {
|
||||||
searchableKeywordsCount ++;
|
searchableKeywordsCount ++;
|
||||||
|
int[] posArray = positions[i].toIntArray();
|
||||||
|
|
||||||
for (int idx = 0; idx < positions[i].size(); idx++) {
|
for (int idx = 0; idx < positions[i].size(); idx++) {
|
||||||
int pos = positions[i].getInt(idx);
|
int pos = positions[i].getInt(idx);
|
||||||
@ -259,24 +261,24 @@ public class IndexResultScoreCalculator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int cnt;
|
int cnt;
|
||||||
if ((cnt = spans.title.countIntersections(positions[i])) != 0) {
|
if ((cnt = spans.title.countIntersections(posArray)) != 0) {
|
||||||
unorderedMatchInTitleCount++;
|
unorderedMatchInTitleCount++;
|
||||||
weightedCounts[i] += 2.5f * cnt;
|
weightedCounts[i] += 2.5f * cnt;
|
||||||
}
|
}
|
||||||
if ((cnt = spans.heading.countIntersections(positions[i])) != 0) {
|
if ((cnt = spans.heading.countIntersections(posArray)) != 0) {
|
||||||
unorderedMatchInHeadingCount++;
|
unorderedMatchInHeadingCount++;
|
||||||
weightedCounts[i] += 2.5f * cnt;
|
weightedCounts[i] += 2.5f * cnt;
|
||||||
}
|
}
|
||||||
if ((cnt = spans.code.countIntersections(positions[i])) != 0) {
|
if ((cnt = spans.code.countIntersections(posArray)) != 0) {
|
||||||
weightedCounts[i] += 0.25f * cnt;
|
weightedCounts[i] += 0.25f * cnt;
|
||||||
}
|
}
|
||||||
if ((cnt = spans.anchor.countIntersections(positions[i])) != 0) {
|
if ((cnt = spans.anchor.countIntersections(posArray)) != 0) {
|
||||||
weightedCounts[i] += 0.2f * cnt;
|
weightedCounts[i] += 0.2f * cnt;
|
||||||
}
|
}
|
||||||
if ((cnt = spans.nav.countIntersections(positions[i])) != 0) {
|
if ((cnt = spans.nav.countIntersections(posArray)) != 0) {
|
||||||
weightedCounts[i] += 0.1f * cnt;
|
weightedCounts[i] += 0.1f * cnt;
|
||||||
}
|
}
|
||||||
if ((cnt = spans.body.countIntersections(positions[i])) != 0) {
|
if ((cnt = spans.body.countIntersections(posArray)) != 0) {
|
||||||
weightedCounts[i] += 1.0f * cnt;
|
weightedCounts[i] += 1.0f * cnt;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user