diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java index f0113870..b705917e 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/QueryProtobufCodec.java @@ -121,6 +121,7 @@ public class QueryProtobufCodec { results.getPubYear(), // ??, results.getDataHash(), results.getWordsTotal(), + results.getBestPositions(), results.getRankingScore() ); } @@ -202,6 +203,7 @@ public class QueryProtobufCodec { rpcDecoratedResultItem.getPubYear(), rpcDecoratedResultItem.getDataHash(), rpcDecoratedResultItem.getWordsTotal(), + rpcDecoratedResultItem.getBestPositions(), rpcDecoratedResultItem.getRankingScore() ); } diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CompiledQueryAggregates.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CompiledQueryAggregates.java index 9c4abe72..0ab0647d 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CompiledQueryAggregates.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CompiledQueryAggregates.java @@ -44,4 +44,9 @@ public class CompiledQueryAggregates { public static List queriesAggregate(CompiledQueryLong query) { return new ArrayList<>(query.root().visit(new CqQueryPathsOperator(query))); } + + /** Using the bitwise AND operator, aggregate all possible combined values of the long generated by the provided operator */ + public static LongSet positionsAggregate(CompiledQuery query, ToLongFunction operator) { + return query.root().visit(new CqPositionsOperator(query, operator)); + } } diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CqPositionsOperator.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CqPositionsOperator.java new file mode 100644 index 00000000..19db2d4b --- /dev/null +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/compiled/aggregate/CqPositionsOperator.java @@ -0,0 +1,79 @@ +package nu.marginalia.api.searchquery.model.compiled.aggregate; + +import it.unimi.dsi.fastutil.longs.LongArraySet; +import it.unimi.dsi.fastutil.longs.LongOpenHashSet; +import it.unimi.dsi.fastutil.longs.LongSet; +import nu.marginalia.api.searchquery.model.compiled.CompiledQuery; +import nu.marginalia.api.searchquery.model.compiled.CqExpression; + +import java.util.List; +import java.util.function.IntToLongFunction; +import java.util.function.ToLongFunction; + +public class CqPositionsOperator implements CqExpression.ObjectVisitor { + private final IntToLongFunction operator; + + public CqPositionsOperator(CompiledQuery query, ToLongFunction operator) { + this.operator = idx -> operator.applyAsLong(query.at(idx)); + } + + @Override + public LongSet onAnd(List parts) { + LongSet ret = new LongArraySet(); + + for (var part : parts) { + ret = comineSets(ret, part.visit(this)); + } + + return ret; + } + + private LongSet comineSets(LongSet a, LongSet b) { + if (a.isEmpty()) + return b; + if (b.isEmpty()) + return a; + + LongSet ret = newSet(a.size() * b.size()); + + var ai = a.longIterator(); + + while (ai.hasNext()) { + long aval = ai.nextLong(); + + var bi = b.longIterator(); + while (bi.hasNext()) { + ret.add(aval & bi.nextLong()); + } + } + + return ret; + } + + @Override + public LongSet onOr(List parts) { + LongSet ret = newSet(parts.size()); + + for (var part : parts) { + ret.addAll(part.visit(this)); + } + + return ret; + } + + @Override + public LongSet onLeaf(int idx) { + var set = newSet(1); + set.add(operator.applyAsLong(idx)); + return set; + } + + /** Allocate a new set suitable for a collection with the provided cardinality */ + private LongSet newSet(int cardinality) { + if (cardinality < 8) + return new LongArraySet(cardinality); + else + return new LongOpenHashSet(cardinality); + } + +} diff --git a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/DecoratedSearchResultItem.java b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/DecoratedSearchResultItem.java index b099dc01..df48ea64 100644 --- a/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/DecoratedSearchResultItem.java +++ b/code/functions/search-query/api/java/nu/marginalia/api/searchquery/model/results/DecoratedSearchResultItem.java @@ -30,6 +30,7 @@ public class DecoratedSearchResultItem implements Comparable next.termScore <= scoreLimit) - .toList(); - } + this.rest.removeIf(urlDetail -> { + if (urlDetail.termScore > scoreLimit) + return false; + + for (var keywordScore : urlDetail.resultItem.keywordScores) { + if (keywordScore.isKeywordSpecial()) + continue; + if (keywordScore.positionCount() == 0) + continue; + + if (keywordScore.hasTermFlag(WordFlags.Title)) + return false; + if (keywordScore.hasTermFlag(WordFlags.ExternalLink)) + return false; + if (keywordScore.hasTermFlag(WordFlags.UrlDomain)) + return false; + if (keywordScore.hasTermFlag(WordFlags.UrlPath)) + return false; + if (keywordScore.hasTermFlag(WordFlags.Subjects)) + return false; + } + + return true; + }); } - private boolean isEligbleForInclusion(UrlDetails urlDetails) { - return urlDetails.resultItem.keywordScores.stream() - .filter(score -> !score.keyword.contains(":")) - .collect(Collectors.toMap( - score -> -1, // FIXME - score -> score.hasTermFlag(WordFlags.Title) - | score.hasTermFlag(WordFlags.ExternalLink) - | score.hasTermFlag(WordFlags.UrlDomain) - | score.hasTermFlag(WordFlags.UrlPath) - | score.hasTermFlag(WordFlags.Subjects) - , - (a, b) -> a && b - )) - .containsValue(Boolean.TRUE); - } public ClusteredUrlDetails(@NotNull UrlDetails onlyFirst) { this.first = onlyFirst; diff --git a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryIndexService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryIndexService.java index 785c8952..6dc7b83b 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryIndexService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryIndexService.java @@ -88,7 +88,7 @@ public class SearchQueryIndexService { DomainIndexingState.ACTIVE, detail.rankingScore, // termScore detail.resultsFromDomain(), - getPositionsString(detail.rawIndexResult), + getPositionsString(detail), detail.rawIndexResult, detail.rawIndexResult.keywordScores )); @@ -97,27 +97,8 @@ public class SearchQueryIndexService { return ret; } - private String getPositionsString(SearchResultItem resultItem) { - Int2LongArrayMap positionsPerSet = new Int2LongArrayMap(8); - - for (var score : resultItem.keywordScores) { - if (!score.isKeywordRegular()) { - continue; - } - positionsPerSet.merge(score.subquery(), score.positions(), this::and); - } - - long bits = positionsPerSet.values().longStream().reduce(this::or).orElse(0); - - return BrailleBlockPunchCards.printBits(bits, 56); + private String getPositionsString(DecoratedSearchResultItem resultItem) { + return BrailleBlockPunchCards.printBits(resultItem.bestPositions, 56); } - - private long and(long a, long b) { - return a & b; - } - private long or(long a, long b) { - return a | b; - } - }