diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexClient.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexClient.java index f79496d7..f28747b1 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexClient.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/IndexClient.java @@ -7,9 +7,9 @@ import io.reactivex.rxjava3.core.Observable; import nu.marginalia.WmsaHome; import nu.marginalia.client.AbstractDynamicClient; import nu.marginalia.client.Context; -import nu.marginalia.index.client.model.query.EdgeSearchSpecification; -import nu.marginalia.index.client.model.results.EdgeSearchResultItem; -import nu.marginalia.index.client.model.results.EdgeSearchResultSet; +import nu.marginalia.index.client.model.query.SearchSpecification; +import nu.marginalia.index.client.model.results.SearchResultItem; +import nu.marginalia.index.client.model.results.SearchResultSet; import nu.marginalia.model.gson.GsonFactory; import nu.marginalia.service.descriptor.ServiceDescriptors; import nu.marginalia.service.id.ServiceId; @@ -30,9 +30,9 @@ public class IndexClient extends AbstractDynamicClient { } @CheckReturnValue - public List query(Context ctx, EdgeSearchSpecification specs) { + public List query(Context ctx, SearchSpecification specs) { return wmsa_search_index_api_time.time( - () -> this.postGet(ctx, "/search/", specs, EdgeSearchResultSet.class).blockingFirst().getResults() + () -> this.postGet(ctx, "/search/", specs, SearchResultSet.class).blockingFirst().getResults() ); } diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/domain/EdgeDomainSearchResults.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/domain/EdgeDomainSearchResults.java deleted file mode 100644 index 0b994f81..00000000 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/domain/EdgeDomainSearchResults.java +++ /dev/null @@ -1,13 +0,0 @@ -package nu.marginalia.index.client.model.domain; - -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.ToString; -import nu.marginalia.model.EdgeUrl; -import nu.marginalia.model.id.EdgeIdList; - -@AllArgsConstructor @Getter @ToString -public class EdgeDomainSearchResults { - public final String keyword; - public final EdgeIdList results; -} diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/domain/EdgeDomainSearchSpecification.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/domain/EdgeDomainSearchSpecification.java deleted file mode 100644 index 29748632..00000000 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/domain/EdgeDomainSearchSpecification.java +++ /dev/null @@ -1,14 +0,0 @@ -package nu.marginalia.index.client.model.domain; - -import lombok.AllArgsConstructor; -import lombok.ToString; - -@ToString @AllArgsConstructor -public class EdgeDomainSearchSpecification { - - public final String keyword; - - public final int queryDepth; - public final int minHitCount; - public final int maxResults; -} diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/EdgeSearchSpecification.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java similarity index 81% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/EdgeSearchSpecification.java rename to code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java index bfafb75b..3c0c8460 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/EdgeSearchSpecification.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSpecification.java @@ -8,9 +8,12 @@ import nu.marginalia.index.query.limit.SpecificationLimit; import java.util.List; @ToString @Getter @Builder @With @AllArgsConstructor -public class EdgeSearchSpecification { - public List subqueries; +public class SearchSpecification { + public List subqueries; + + /** If present and not empty, limit the search to these domain IDs */ public List domains; + public SearchSetIdentifier searchSetIdentifier; public final String humanQuery; @@ -21,6 +24,7 @@ public class EdgeSearchSpecification { public final SpecificationLimit rank; public final QueryLimits queryLimits; + public final QueryStrategy queryStrategy; } diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/EdgeSearchSubquery.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSubquery.java similarity index 72% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/EdgeSearchSubquery.java rename to code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSubquery.java index 00d0e826..83422fab 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/EdgeSearchSubquery.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/query/SearchSubquery.java @@ -2,26 +2,32 @@ package nu.marginalia.index.client.model.query; import lombok.AllArgsConstructor; import lombok.Getter; -import lombok.ToString; import java.util.List; import java.util.stream.Collectors; @Getter @AllArgsConstructor -public class EdgeSearchSubquery { +public class SearchSubquery { + /** These terms must be present in the document */ public final List searchTermsInclude; - public final List searchTermsExclude; - public final List searchTermsAdvice; - public final List searchTermsPriority; + /** These terms must be absent from the document */ + public final List searchTermsExclude; + + /** These terms must be present in the document, but are not used in ranking */ + public final List searchTermsAdvice; + + /** If these optional terms are present in the document, rank it highly */ + public final List searchTermsPriority; + private double value = 0; - public EdgeSearchSubquery(List searchTermsInclude, - List searchTermsExclude, - List searchTermsAdvice, - List searchTermsPriority + public SearchSubquery(List searchTermsInclude, + List searchTermsExclude, + List searchTermsAdvice, + List searchTermsPriority ) { this.searchTermsInclude = searchTermsInclude; this.searchTermsExclude = searchTermsExclude; @@ -29,7 +35,7 @@ public class EdgeSearchSubquery { this.searchTermsPriority = searchTermsPriority; } - public EdgeSearchSubquery setValue(double value) { + public SearchSubquery setValue(double value) { if (Double.isInfinite(value) || Double.isNaN(value)) { this.value = Double.MAX_VALUE; } else { diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/EdgeSearchResultKeywordScore.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/EdgeSearchResultKeywordScore.java deleted file mode 100644 index 073bd974..00000000 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/EdgeSearchResultKeywordScore.java +++ /dev/null @@ -1,84 +0,0 @@ -package nu.marginalia.index.client.model.results; - -import nu.marginalia.model.crawl.EdgePageWordFlags; -import nu.marginalia.model.idx.WordMetadata; -import nu.marginalia.model.crawl.EdgePageDocumentFlags; -import nu.marginalia.model.idx.DocumentMetadata; - -import static java.lang.Integer.lowestOneBit; -import static java.lang.Integer.numberOfTrailingZeros; - -public record EdgeSearchResultKeywordScore(int set, - String keyword, - long encodedWordMetadata, - long encodedDocMetadata, - boolean hasPriorityTerms) { - public double documentValue() { - long sum = 0; - - sum += DocumentMetadata.decodeQuality(encodedDocMetadata) / 5.; - - sum += DocumentMetadata.decodeTopology(encodedDocMetadata); - - if (DocumentMetadata.hasFlags(encodedDocMetadata, EdgePageDocumentFlags.Simple.asBit())) { - sum += 20; - } - - int rank = DocumentMetadata.decodeRank(encodedDocMetadata) - 13; - if (rank < 0) - sum += rank / 2; - else - sum += rank / 4; - - return sum; - } - - private boolean hasTermFlag(EdgePageWordFlags flag) { - return WordMetadata.hasFlags(encodedWordMetadata, flag.asBit()); - } - - public double termValue() { - double sum = 0; - - if (hasTermFlag(EdgePageWordFlags.Title)) { - sum -= 15; - } - - if (hasTermFlag(EdgePageWordFlags.Site)) { - sum -= 10; - } - else if (hasTermFlag(EdgePageWordFlags.SiteAdjacent)) { - sum -= 5; - } - - if (hasTermFlag(EdgePageWordFlags.Subjects)) { - sum -= 10; - } - if (hasTermFlag(EdgePageWordFlags.NamesWords)) { - sum -= 1; - } - - if (hasTermFlag(EdgePageWordFlags.UrlDomain)) { - sum -= 5; - } - - if (hasTermFlag(EdgePageWordFlags.UrlPath)) { - sum -= 5; - } - - double tfIdf = WordMetadata.decodeTfidf(encodedWordMetadata); - int positionBits = WordMetadata.decodePositions(encodedWordMetadata); - - sum -= tfIdf / 10.; - sum -= Integer.bitCount(positionBits) / 3.; - - return sum; - } - - public int positions() { return WordMetadata.decodePositions(encodedWordMetadata); } - public boolean isSpecial() { return keyword.contains(":") || hasTermFlag(EdgePageWordFlags.Synthetic); } - public boolean isRegular() { - return !keyword.contains(":") - && !hasTermFlag(EdgePageWordFlags.Synthetic); - } -} diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/EdgeSearchResults.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/EdgeSearchResults.java deleted file mode 100644 index 2e54a25c..00000000 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/EdgeSearchResults.java +++ /dev/null @@ -1,26 +0,0 @@ -package nu.marginalia.index.client.model.results; - -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.ToString; - -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Stream; - -@AllArgsConstructor @Getter @ToString -public class EdgeSearchResults { - public final List results; - - public EdgeSearchResults() { - results = new ArrayList<>(); - } - - public int size() { - return results.size(); - } - - public Stream stream() { - return results.stream(); - } -} diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/EdgeSearchResultItem.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultItem.java similarity index 80% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/EdgeSearchResultItem.java rename to code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultItem.java index e9f1e1be..396b7a3a 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/EdgeSearchResultItem.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultItem.java @@ -8,15 +8,19 @@ import nu.marginalia.model.id.EdgeId; import java.util.ArrayList; import java.util.List; +/** Represents a document matching a search query */ @AllArgsConstructor @Getter -public class EdgeSearchResultItem { +public class SearchResultItem { + /** Encoded ID that contains both the URL id and its ranking */ public final long combinedId; - public final List scores; + /** How did the subqueries match against the document ? */ + public final List scores; + /** How many other potential results existed in the same domain */ public int resultsFromDomain; - public EdgeSearchResultItem(long val) { + public SearchResultItem(long val) { this.combinedId = val; this.scores = new ArrayList<>(16); } @@ -62,7 +66,7 @@ public class EdgeSearchResultItem { return false; if (other == this) return true; - if (other instanceof EdgeSearchResultItem o) { + if (other instanceof SearchResultItem o) { return o.getUrlIdInt() == getUrlIdInt(); } return false; diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultKeywordScore.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultKeywordScore.java new file mode 100644 index 00000000..d50953a2 --- /dev/null +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultKeywordScore.java @@ -0,0 +1,145 @@ +package nu.marginalia.index.client.model.results; + +import nu.marginalia.model.crawl.EdgePageWordFlags; +import nu.marginalia.model.idx.WordMetadata; +import nu.marginalia.model.crawl.EdgePageDocumentFlags; +import nu.marginalia.model.idx.DocumentMetadata; + +import java.util.Objects; + +public final class SearchResultKeywordScore { + public final int subquery; + public final String keyword; + private final long encodedWordMetadata; + private final long encodedDocMetadata; + private final boolean hasPriorityTerms; + + public SearchResultKeywordScore(int subquery, + String keyword, + long encodedWordMetadata, + long encodedDocMetadata, + boolean hasPriorityTerms) { + this.subquery = subquery; + this.keyword = keyword; + this.encodedWordMetadata = encodedWordMetadata; + this.encodedDocMetadata = encodedDocMetadata; + this.hasPriorityTerms = hasPriorityTerms; + } + + private boolean hasTermFlag(EdgePageWordFlags flag) { + return WordMetadata.hasFlags(encodedWordMetadata, flag.asBit()); + } + + public double documentValue() { + long sum = 0; + + sum += DocumentMetadata.decodeQuality(encodedDocMetadata) / 5.; + + sum += DocumentMetadata.decodeTopology(encodedDocMetadata); + + if (DocumentMetadata.hasFlags(encodedDocMetadata, EdgePageDocumentFlags.Simple.asBit())) { + sum += 20; + } + + int rank = DocumentMetadata.decodeRank(encodedDocMetadata) - 13; + if (rank < 0) + sum += rank / 2; + else + sum += rank / 4; + + return sum; + } + + public double termValue() { + double sum = 0; + + if (hasTermFlag(EdgePageWordFlags.Title)) { + sum -= 15; + } + + if (hasTermFlag(EdgePageWordFlags.Site)) { + sum -= 10; + } else if (hasTermFlag(EdgePageWordFlags.SiteAdjacent)) { + sum -= 5; + } + + if (hasTermFlag(EdgePageWordFlags.Subjects)) { + sum -= 10; + } + if (hasTermFlag(EdgePageWordFlags.NamesWords)) { + sum -= 1; + } + + if (hasTermFlag(EdgePageWordFlags.UrlDomain)) { + sum -= 5; + } + + if (hasTermFlag(EdgePageWordFlags.UrlPath)) { + sum -= 5; + } + + double tfIdf = WordMetadata.decodeTfidf(encodedWordMetadata); + int positionBits = WordMetadata.decodePositions(encodedWordMetadata); + + sum -= tfIdf / 10.; + sum -= Integer.bitCount(positionBits) / 3.; + + return sum; + } + + public int subquery() { + return subquery; + } + public int positions() { + return WordMetadata.decodePositions(encodedWordMetadata); + } + + public boolean isKeywordSpecial() { + return keyword.contains(":") || hasTermFlag(EdgePageWordFlags.Synthetic); + } + + public boolean isKeywordRegular() { + return !keyword.contains(":") + && !hasTermFlag(EdgePageWordFlags.Synthetic); + } + + public long encodedWordMetadata() { + return encodedWordMetadata; + } + + public long encodedDocMetadata() { + return encodedDocMetadata; + } + + public boolean hasPriorityTerms() { + return hasPriorityTerms; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) return true; + if (obj == null || obj.getClass() != this.getClass()) return false; + var that = (SearchResultKeywordScore) obj; + return this.subquery == that.subquery && + Objects.equals(this.keyword, that.keyword) && + this.encodedWordMetadata == that.encodedWordMetadata && + this.encodedDocMetadata == that.encodedDocMetadata && + this.hasPriorityTerms == that.hasPriorityTerms; + } + + @Override + public int hashCode() { + return Objects.hash(subquery, keyword, encodedWordMetadata, encodedDocMetadata, hasPriorityTerms); + } + + @Override + public String toString() { + return "SearchResultKeywordScore[" + + "set=" + subquery + ", " + + "keyword=" + keyword + ", " + + "encodedWordMetadata=" + encodedWordMetadata + ", " + + "encodedDocMetadata=" + encodedDocMetadata + ", " + + "hasPriorityTerms=" + hasPriorityTerms + ']'; + } + +} diff --git a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/EdgeSearchResultSet.java b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultSet.java similarity index 75% rename from code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/EdgeSearchResultSet.java rename to code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultSet.java index e69fd34d..ce25a632 100644 --- a/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/EdgeSearchResultSet.java +++ b/code/api/index-api/src/main/java/nu/marginalia/index/client/model/results/SearchResultSet.java @@ -7,8 +7,8 @@ import lombok.ToString; import java.util.List; @AllArgsConstructor @Getter @ToString -public class EdgeSearchResultSet { - public List results; +public class SearchResultSet { + public List results; public int size() { return results.size(); diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultDomainDeduplicator.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultDomainDeduplicator.java index 7f2b69b6..265ba5dc 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultDomainDeduplicator.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultDomainDeduplicator.java @@ -2,7 +2,7 @@ package nu.marginalia.index.results; import gnu.trove.map.TLongIntMap; import gnu.trove.map.hash.TLongIntHashMap; -import nu.marginalia.index.client.model.results.EdgeSearchResultItem; +import nu.marginalia.index.client.model.results.SearchResultItem; public class IndexResultDomainDeduplicator { final TLongIntMap resultsByRankingId = new TLongIntHashMap(2048, 0.5f, -1, 0); @@ -21,7 +21,7 @@ public class IndexResultDomainDeduplicator { return resultsByRankingId.adjustOrPutValue(ranking, 1, 1) <= limitByDomain; } - public boolean test(EdgeSearchResultItem item) { + public boolean test(SearchResultItem item) { final long key = item.deduplicationKey(); if (key == 0) return true; @@ -29,7 +29,7 @@ public class IndexResultDomainDeduplicator { return resultsByRankingId.adjustOrPutValue(key, 1, 1) <= limitByDomain; } - public int getCount(EdgeSearchResultItem item) { + public int getCount(SearchResultItem item) { final long key = item.deduplicationKey(); if (key == 0) return 1; diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultValuator.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultValuator.java index db76022b..9ea28aec 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultValuator.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/results/IndexResultValuator.java @@ -9,9 +9,9 @@ import nu.marginalia.index.svc.SearchTermsService; import nu.marginalia.model.crawl.EdgePageWordFlags; import nu.marginalia.model.idx.WordMetadata; import nu.marginalia.index.query.limit.QueryStrategy; -import nu.marginalia.index.client.model.results.EdgeSearchResultItem; -import nu.marginalia.index.client.model.results.EdgeSearchResultKeywordScore; -import nu.marginalia.index.client.model.query.EdgeSearchSubquery; +import nu.marginalia.index.client.model.results.SearchResultItem; +import nu.marginalia.index.client.model.results.SearchResultKeywordScore; +import nu.marginalia.index.client.model.query.SearchSubquery; import nu.marginalia.index.query.IndexQueryParams; import java.util.List; @@ -31,7 +31,7 @@ public class IndexResultValuator { public IndexResultValuator(SearchTermsService searchTermsSvc, IndexMetadataService metadataService, TLongList results, - List subqueries, + List subqueries, IndexQueryParams queryParams) { this.searchTermVariants = subqueries.stream().map(sq -> sq.searchTermsInclude).distinct().toList(); this.queryParams = queryParams; @@ -71,9 +71,9 @@ public class IndexResultValuator { } - public EdgeSearchResultItem evaluateResult(long id) { + public SearchResultItem evaluateResult(long id) { - EdgeSearchResultItem searchResult = new EdgeSearchResultItem(id); + SearchResultItem searchResult = new SearchResultItem(id); final long urlIdInt = searchResult.getUrlIdInt(); searchResult.setDomainId(metadataService.getDomainId(urlIdInt)); @@ -99,7 +99,7 @@ public class IndexResultValuator { return searchResult; } - private double evaluateSubquery(EdgeSearchResultItem searchResult, + private double evaluateSubquery(SearchResultItem searchResult, long docMetadata, int querySetId, List termList) @@ -114,7 +114,7 @@ public class IndexResultValuator { long metadata = termMetadata.getTermMetadata(termId, searchResult.getUrlIdInt()); - EdgeSearchResultKeywordScore score = new EdgeSearchResultKeywordScore( + SearchResultKeywordScore score = new SearchResultKeywordScore( querySetId, searchTerm, metadata, diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java index ae9a2361..eaebeee7 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/IndexQueryService.java @@ -9,10 +9,10 @@ import gnu.trove.set.hash.TLongHashSet; import io.prometheus.client.Counter; import io.prometheus.client.Gauge; import io.prometheus.client.Histogram; -import nu.marginalia.index.client.model.results.EdgeSearchResultItem; -import nu.marginalia.index.client.model.results.EdgeSearchResultSet; -import nu.marginalia.index.client.model.query.EdgeSearchSpecification; -import nu.marginalia.index.client.model.query.EdgeSearchSubquery; +import nu.marginalia.index.client.model.results.SearchResultItem; +import nu.marginalia.index.client.model.results.SearchResultSet; +import nu.marginalia.index.client.model.query.SearchSpecification; +import nu.marginalia.index.client.model.query.SearchSubquery; import nu.marginalia.array.buffer.LongQueryBuffer; import nu.marginalia.index.index.SearchIndex; import nu.marginalia.index.index.SearchIndexSearchTerms; @@ -73,13 +73,13 @@ public class IndexQueryService { public Object search(Request request, Response response) { String json = request.body(); - EdgeSearchSpecification specsSet = gson.fromJson(json, EdgeSearchSpecification.class); + SearchSpecification specsSet = gson.fromJson(json, SearchSpecification.class); try { return wmsa_edge_index_query_time.time(() -> { var params = new SearchParameters(specsSet, getSearchSet(specsSet)); - List results = executeSearch(params); + List results = executeSearch(params); logger.info(queryMarker, "Index Result Count: {}", results.size()); wmsa_edge_index_query_cost.set(params.getDataCost()); @@ -87,7 +87,7 @@ public class IndexQueryService { wmsa_edge_index_query_timeouts.inc(); } - return new EdgeSearchResultSet(results); + return new SearchResultSet(results); }); } catch (HaltException ex) { @@ -103,11 +103,11 @@ public class IndexQueryService { } // exists for test access - EdgeSearchResultSet justQuery(EdgeSearchSpecification specsSet) { - return new EdgeSearchResultSet(executeSearch(new SearchParameters(specsSet, getSearchSet(specsSet)))); + SearchResultSet justQuery(SearchSpecification specsSet) { + return new SearchResultSet(executeSearch(new SearchParameters(specsSet, getSearchSet(specsSet)))); } - private SearchSet getSearchSet(EdgeSearchSpecification specsSet) { + private SearchSet getSearchSet(SearchSpecification specsSet) { if (specsSet.domains != null && !specsSet.domains.isEmpty()) { return new SmallSearchSet(specsSet.domains); } @@ -115,7 +115,7 @@ public class IndexQueryService { return searchSetsService.getSearchSetByName(specsSet.searchSetIdentifier); } - private List executeSearch(SearchParameters params) { + private List executeSearch(SearchParameters params) { var resultIds = evaluateSubqueries(params); var resultItems = calculateResultScores(params, resultIds); @@ -176,7 +176,7 @@ public class IndexQueryService { return results; } - private ArrayList calculateResultScores(SearchParameters params, TLongList results) { + private ArrayList calculateResultScores(SearchParameters params, TLongList results) { final var evaluator = new IndexResultValuator( searchTermsSvc, @@ -185,7 +185,7 @@ public class IndexQueryService { params.subqueries, params.queryParams); - ArrayList items = new ArrayList<>(results.size()); + ArrayList items = new ArrayList<>(results.size()); // Sorting the result ids results in better paging characteristics results.sort(); @@ -206,15 +206,15 @@ public class IndexQueryService { return items; } - private List selectBestResults(SearchParameters params, List results) { + private List selectBestResults(SearchParameters params, List results) { var domainCountFilter = new IndexResultDomainDeduplicator(params.limitByDomain); - results.sort(comparingDouble(EdgeSearchResultItem::getScore) - .thenComparingInt(EdgeSearchResultItem::getRanking) - .thenComparingInt(EdgeSearchResultItem::getUrlIdInt)); + results.sort(comparingDouble(SearchResultItem::getScore) + .thenComparingInt(SearchResultItem::getRanking) + .thenComparingInt(SearchResultItem::getUrlIdInt)); - List resultsList = new ArrayList<>(results.size()); + List resultsList = new ArrayList<>(results.size()); for (var item : results) { if (domainCountFilter.test(item)) { @@ -245,7 +245,7 @@ class SearchParameters { before evaluating them for the best result. */ final int fetchSize; final IndexSearchBudget budget; - final List subqueries; + final List subqueries; final IndexQueryParams queryParams; final int limitByDomain; @@ -261,7 +261,7 @@ class SearchParameters { */ final TLongHashSet consideredUrlIds; - public SearchParameters(EdgeSearchSpecification specsSet, SearchSet searchSet) { + public SearchParameters(SearchSpecification specsSet, SearchSet searchSet) { var limits = specsSet.queryLimits; this.fetchSize = limits.fetchSize(); diff --git a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchTermsService.java b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchTermsService.java index 0ac7cced..886bede5 100644 --- a/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchTermsService.java +++ b/code/services-core/index-service/src/main/java/nu/marginalia/index/svc/SearchTermsService.java @@ -5,7 +5,7 @@ import com.google.inject.Singleton; import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntList; import nu.marginalia.dict.OffHeapDictionaryHashMap; -import nu.marginalia.index.client.model.query.EdgeSearchSubquery; +import nu.marginalia.index.client.model.query.SearchSubquery; import nu.marginalia.index.index.SearchIndexSearchTerms; import nu.marginalia.lexicon.KeywordLexiconReadOnlyView; import org.slf4j.Logger; @@ -23,7 +23,7 @@ public class SearchTermsService { this.lexicon = lexicon; } - public SearchIndexSearchTerms getSearchTerms(EdgeSearchSubquery request) { + public SearchIndexSearchTerms getSearchTerms(SearchSubquery request) { final IntList excludes = new IntArrayList(); final IntList includes = new IntArrayList(); final IntList priority = new IntArrayList(); diff --git a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTest.java b/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTest.java index 0d33c294..8c75c4e0 100644 --- a/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTest.java +++ b/code/services-core/index-service/src/test/java/nu/marginalia/index/svc/IndexQueryServiceIntegrationTest.java @@ -2,10 +2,10 @@ package nu.marginalia.index.svc; import com.google.inject.Guice; import com.google.inject.Inject; -import nu.marginalia.index.client.model.query.EdgeSearchSpecification; -import nu.marginalia.index.client.model.query.EdgeSearchSubquery; +import nu.marginalia.index.client.model.query.SearchSpecification; +import nu.marginalia.index.client.model.query.SearchSubquery; import nu.marginalia.index.client.model.query.SearchSetIdentifier; -import nu.marginalia.index.client.model.results.EdgeSearchResultItem; +import nu.marginalia.index.client.model.results.SearchResultItem; import nu.marginalia.index.index.SearchIndex; import nu.marginalia.index.journal.model.IndexJournalEntryData; import nu.marginalia.index.journal.model.IndexJournalEntryHeader; @@ -79,7 +79,7 @@ public class IndexQueryServiceIntegrationTest { searchIndex.switchIndex(); var rsp = queryService.justQuery( - EdgeSearchSpecification.builder() + SearchSpecification.builder() .queryLimits(new QueryLimits(10, 10, Integer.MAX_VALUE, 4000)) .queryStrategy(QueryStrategy.SENTENCE) .year(SpecificationLimit.none()) @@ -88,7 +88,7 @@ public class IndexQueryServiceIntegrationTest { .rank(SpecificationLimit.none()) .domains(new ArrayList<>()) .searchSetIdentifier(SearchSetIdentifier.NONE) - .subqueries(List.of(new EdgeSearchSubquery( + .subqueries(List.of(new SearchSubquery( List.of("3", "5", "2"), List.of("4"), Collections.emptyList(), Collections.emptyList() ))).build()); @@ -96,7 +96,7 @@ public class IndexQueryServiceIntegrationTest { new int[] { 30, 90, 150, 210, 270, 330, 390, 450, 510 }, rsp.results .stream() - .mapToInt(EdgeSearchResultItem::getUrlIdInt) + .mapToInt(SearchResultItem::getUrlIdInt) .toArray()); } @@ -111,7 +111,7 @@ public class IndexQueryServiceIntegrationTest { searchIndex.switchIndex(); var rsp = queryService.justQuery( - EdgeSearchSpecification.builder() + SearchSpecification.builder() .queryLimits(new QueryLimits(10, 10, Integer.MAX_VALUE, 4000)) .year(SpecificationLimit.none()) .quality(SpecificationLimit.none()) @@ -119,12 +119,12 @@ public class IndexQueryServiceIntegrationTest { .rank(SpecificationLimit.none()) .queryStrategy(QueryStrategy.SENTENCE) .domains(List.of(2)) - .subqueries(List.of(new EdgeSearchSubquery( + .subqueries(List.of(new SearchSubquery( List.of("3", "5", "2"), List.of("4"), Collections.emptyList(), Collections.emptyList() ))).build()); Assertions.assertArrayEquals( new int[] { 210, 270 }, - rsp.results.stream().mapToInt(EdgeSearchResultItem::getUrlIdInt).toArray()); + rsp.results.stream().mapToInt(SearchResultItem::getUrlIdInt).toArray()); } @Test @@ -136,7 +136,7 @@ public class IndexQueryServiceIntegrationTest { searchIndex.switchIndex(); var rsp = queryService.justQuery( - EdgeSearchSpecification.builder() + SearchSpecification.builder() .queryLimits(new QueryLimits(10, 10, Integer.MAX_VALUE, 4000)) .quality(SpecificationLimit.none()) .year(SpecificationLimit.equals(1998)) @@ -144,14 +144,14 @@ public class IndexQueryServiceIntegrationTest { .rank(SpecificationLimit.none()) .queryStrategy(QueryStrategy.SENTENCE) .searchSetIdentifier(SearchSetIdentifier.NONE) - .subqueries(List.of(new EdgeSearchSubquery( + .subqueries(List.of(new SearchSubquery( List.of("4"), Collections.emptyList(), Collections.emptyList(), Collections.emptyList() )) ).build()); Assertions.assertArrayEquals( new int[] { 12, 72, 132, 192, 252, 312, 372, 432, 492, 32 }, - rsp.results.stream().mapToInt(EdgeSearchResultItem::getUrlIdInt).toArray()); + rsp.results.stream().mapToInt(SearchResultItem::getUrlIdInt).toArray()); } diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/command/SearchJsParameter.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/command/SearchJsParameter.java index 0efa224d..ab67486c 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/command/SearchJsParameter.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/command/SearchJsParameter.java @@ -1,6 +1,6 @@ package nu.marginalia.search.command; -import nu.marginalia.index.client.model.query.EdgeSearchSubquery; +import nu.marginalia.index.client.model.query.SearchSubquery; import javax.annotation.Nullable; import java.util.Arrays; @@ -25,7 +25,7 @@ public enum SearchJsParameter { return DEFAULT; } - public void addTacitTerms(EdgeSearchSubquery subquery) { + public void addTacitTerms(SearchSubquery subquery) { subquery.searchTermsExclude.addAll(Arrays.asList(implictExcludeSearchTerms)); } } diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/query/NearQueryProcessor.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/db/DbNearDomainsQuery.java similarity index 93% rename from code/services-core/search-service/src/main/java/nu/marginalia/search/query/NearQueryProcessor.java rename to code/services-core/search-service/src/main/java/nu/marginalia/search/db/DbNearDomainsQuery.java index 7a5f6025..5ac7d591 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/query/NearQueryProcessor.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/db/DbNearDomainsQuery.java @@ -1,4 +1,4 @@ -package nu.marginalia.search.query; +package nu.marginalia.search.db; import com.google.inject.Inject; import com.zaxxer.hikari.HikariDataSource; @@ -9,12 +9,12 @@ import java.util.ArrayList; import java.util.List; import java.util.function.Consumer; -public class NearQueryProcessor { +public class DbNearDomainsQuery { private final HikariDataSource dataSource; @Inject - public NearQueryProcessor(HikariDataSource dataSource) { + public DbNearDomainsQuery(HikariDataSource dataSource) { this.dataSource = dataSource; } diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/model/SearchProfile.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/model/SearchProfile.java index 6f0c7535..0ec971ef 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/model/SearchProfile.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/model/SearchProfile.java @@ -2,7 +2,7 @@ package nu.marginalia.search.model; import nu.marginalia.index.query.limit.SpecificationLimit; import nu.marginalia.model.crawl.HtmlFeature; -import nu.marginalia.index.client.model.query.EdgeSearchSubquery; +import nu.marginalia.index.client.model.query.SearchSubquery; import nu.marginalia.index.client.model.query.SearchSetIdentifier; import java.util.Objects; @@ -45,7 +45,7 @@ public enum SearchProfile { return YOLO; } - public void addTacitTerms(EdgeSearchSubquery subquery) { + public void addTacitTerms(SearchSubquery subquery) { if (this == ACADEMIA) { subquery.searchTermsPriority.add("tld:edu"); } diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/model/UrlDetails.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/model/UrlDetails.java index 06d077a4..5b9b85f0 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/model/UrlDetails.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/model/UrlDetails.java @@ -1,7 +1,7 @@ package nu.marginalia.search.model; import lombok.*; -import nu.marginalia.index.client.model.results.EdgeSearchResultItem; +import nu.marginalia.index.client.model.results.SearchResultItem; import nu.marginalia.model.EdgeUrl; import nu.marginalia.model.crawl.EdgeDomainIndexingState; import nu.marginalia.model.crawl.HtmlFeature; @@ -36,7 +36,7 @@ public class UrlDetails { public int resultsFromSameDomain; public String positions; - public EdgeSearchResultItem resultItem; + public SearchResultItem resultItem; public boolean hasMoreResults() { return resultsFromSameDomain > 1; diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/query/QueryFactory.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/query/QueryFactory.java index fdd6d9df..ca38e25a 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/query/QueryFactory.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/query/QueryFactory.java @@ -3,8 +3,8 @@ package nu.marginalia.search.query; import com.google.inject.Inject; import com.google.inject.Singleton; import nu.marginalia.LanguageModels; -import nu.marginalia.index.client.model.query.EdgeSearchSpecification; -import nu.marginalia.index.client.model.query.EdgeSearchSubquery; +import nu.marginalia.index.client.model.query.SearchSpecification; +import nu.marginalia.index.client.model.query.SearchSubquery; import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.SpecificationLimit; @@ -16,6 +16,7 @@ import nu.marginalia.query_parser.QueryPermutation; import nu.marginalia.query_parser.QueryVariants; import nu.marginalia.query_parser.token.Token; import nu.marginalia.query_parser.token.TokenType; +import nu.marginalia.search.db.DbNearDomainsQuery; import nu.marginalia.search.model.SearchProfile; import nu.marginalia.search.query.model.SearchQuery; import nu.marginalia.search.query.model.UserSearchParameters; @@ -34,7 +35,7 @@ public class QueryFactory { private final EnglishDictionary englishDictionary; private final Logger logger = LoggerFactory.getLogger(getClass()); private final SearchResultValuator searchResultValuator; - private final NearQueryProcessor nearQueryProcessor; + private final DbNearDomainsQuery dbNearDomainsQuery; private static final int RETAIN_QUERY_VARIANT_COUNT = 5; private final ThreadLocal queryVariants; @@ -48,11 +49,11 @@ public class QueryFactory { EnglishDictionary englishDictionary, NGramBloomFilter nGramBloomFilter, SearchResultValuator searchResultValuator, - NearQueryProcessor nearQueryProcessor) { + DbNearDomainsQuery dbNearDomainsQuery) { this.englishDictionary = englishDictionary; this.searchResultValuator = searchResultValuator; - this.nearQueryProcessor = nearQueryProcessor; + this.dbNearDomainsQuery = dbNearDomainsQuery; this.queryVariants = ThreadLocal.withInitial(() -> new QueryVariants(lm ,dict, nGramBloomFilter, englishDictionary)); } @@ -67,13 +68,13 @@ public class QueryFactory { public SearchQuery createQuery(UserSearchParameters params) { final var processedQuery = createQuery(getQueryPermutation(), params); - final List subqueries = processedQuery.specs.subqueries; + final List subqueries = processedQuery.specs.subqueries; for (var sq : subqueries) { sq.setValue(searchResultValuator.preEvaluate(sq)); } - subqueries.sort(Comparator.comparing(EdgeSearchSubquery::getValue)); + subqueries.sort(Comparator.comparing(SearchSubquery::getValue)); trimArray(subqueries, RETAIN_QUERY_VARIANT_COUNT); return processedQuery; @@ -84,16 +85,16 @@ public class QueryFactory { int limitTotal, String... termsInclude) { - List sqs = new ArrayList<>(); + List sqs = new ArrayList<>(); - sqs.add(new EdgeSearchSubquery( + sqs.add(new SearchSubquery( Arrays.asList(termsInclude), Collections.emptyList(), Collections.emptyList(), Collections.emptyList() )); - var specs = EdgeSearchSpecification.builder() + var specs = SearchSpecification.builder() .subqueries(sqs) .domains(Collections.emptyList()) .searchSetIdentifier(profile.searchSetIdentifier) @@ -170,7 +171,7 @@ public class QueryFactory { } var queryPermutations = queryPermutation.permuteQueriesNew(basicQuery); - List subqueries = new ArrayList<>(); + List subqueries = new ArrayList<>(); String near = profile.getNearDomain(); @@ -219,7 +220,7 @@ public class QueryFactory { searchTermsAdvice.clear(); } - EdgeSearchSubquery subquery = new EdgeSearchSubquery(searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority); + SearchSubquery subquery = new SearchSubquery(searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority); params.profile().addTacitTerms(subquery); params.jsSetting().addTacitTerms(subquery); @@ -231,7 +232,7 @@ public class QueryFactory { if (near != null) { if (domain == null) { - domains = nearQueryProcessor.getRelatedDomains(near, problems::add); + domains = dbNearDomainsQuery.getRelatedDomains(near, problems::add); } } @@ -242,7 +243,7 @@ public class QueryFactory { domainLimit = 2; } - EdgeSearchSpecification.EdgeSearchSpecificationBuilder specsBuilder = EdgeSearchSpecification.builder() + var specsBuilder = SearchSpecification.builder() .subqueries(subqueries) .queryLimits(new QueryLimits(domainLimit, 100, 250, 4096)) .humanQuery(query) @@ -254,7 +255,7 @@ public class QueryFactory { .queryStrategy(queryStrategy) .searchSetIdentifier(profile.searchSetIdentifier); - EdgeSearchSpecification specs = specsBuilder.build(); + SearchSpecification specs = specsBuilder.build(); return new SearchQuery(specs, searchTermsHuman, domain); } diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/query/model/SearchQuery.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/query/model/SearchQuery.java index 615c888e..345cf32c 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/query/model/SearchQuery.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/query/model/SearchQuery.java @@ -1,19 +1,19 @@ package nu.marginalia.search.query.model; import lombok.AllArgsConstructor; -import nu.marginalia.index.client.model.query.EdgeSearchSpecification; +import nu.marginalia.index.client.model.query.SearchSpecification; import java.util.*; @AllArgsConstructor public class SearchQuery { - public final EdgeSearchSpecification specs; + public final SearchSpecification specs; public final Set problems = new TreeSet<>(); public final List searchTermsHuman; public String domain; - public SearchQuery(EdgeSearchSpecification justSpecs) { + public SearchQuery(SearchSpecification justSpecs) { searchTermsHuman = new ArrayList<>(); specs = justSpecs; } diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/results/SearchResultDecorator.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/results/SearchResultDecorator.java index 517c8e8a..b08054e5 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/results/SearchResultDecorator.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/results/SearchResultDecorator.java @@ -8,7 +8,7 @@ import nu.marginalia.search.db.DbUrlDetailsQuery; import nu.marginalia.model.EdgeUrl; import nu.marginalia.model.crawl.EdgeDomainIndexingState; import nu.marginalia.model.id.EdgeIdList; -import nu.marginalia.index.client.model.results.EdgeSearchResultItem; +import nu.marginalia.index.client.model.results.SearchResultItem; import nu.marginalia.search.model.UrlDetails; import nu.marginalia.search.valuation.SearchResultValuator; import nu.marginalia.util.BrailleBlockPunchCards; @@ -29,11 +29,11 @@ public class SearchResultDecorator { this.valuator = valuator; } - public List getAllUrlDetails(List resultItems) { + public List getAllUrlDetails(List resultItems) { TIntObjectHashMap detailsById = new TIntObjectHashMap<>(resultItems.size()); EdgeIdList idList = resultItems.stream() - .mapToInt(EdgeSearchResultItem::getUrlIdInt) + .mapToInt(SearchResultItem::getUrlIdInt) .collect(EdgeIdList::new, EdgeIdList::add, EdgeIdList::addAll); List ret = dbUrlDetailsQuery.getUrlDetailsMulti(idList); @@ -72,14 +72,14 @@ public class SearchResultDecorator { return retList; } - private String getPositionsString(EdgeSearchResultItem resultItem) { + private String getPositionsString(SearchResultItem resultItem) { Int2IntArrayMap positionsPerSet = new Int2IntArrayMap(8); for (var score : resultItem.scores) { - if (!score.isRegular()) { + if (!score.isKeywordRegular()) { continue; } - positionsPerSet.merge(score.set(), score.positions(), this::and); + positionsPerSet.merge(score.subquery(), score.positions(), this::and); } int bits = positionsPerSet.values().intStream().reduce(this::or).orElse(0); @@ -95,7 +95,7 @@ public class SearchResultDecorator { return a | b; } - private double calculateTermScore(EdgeSearchResultItem resultItem, UrlDetails details) { + private double calculateTermScore(SearchResultItem resultItem, UrlDetails details) { final double statePenalty = (details.domainState == EdgeDomainIndexingState.SPECIAL) ? 1.25 : 0; final double value = valuator.evaluateTerms(resultItem.scores, details.words, details.title.length()); diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchApiQueryService.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchApiQueryService.java index 8c2e6038..28951265 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchApiQueryService.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchApiQueryService.java @@ -3,7 +3,7 @@ package nu.marginalia.search.svc; import com.google.common.base.Strings; import com.google.inject.Inject; import lombok.SneakyThrows; -import nu.marginalia.index.client.model.results.EdgeSearchResultKeywordScore; +import nu.marginalia.index.client.model.results.SearchResultKeywordScore; import nu.marginalia.search.client.model.ApiSearchResultQueryDetails; import nu.marginalia.model.idx.WordMetadata; import nu.marginalia.search.SearchOperator; @@ -62,7 +62,7 @@ public class SearchApiQueryService { ApiSearchResult convert(UrlDetails url) { List> details = new ArrayList<>(); if (url.resultItem != null) { - var bySet = url.resultItem.scores.stream().collect(Collectors.groupingBy(EdgeSearchResultKeywordScore::set)); + var bySet = url.resultItem.scores.stream().collect(Collectors.groupingBy(SearchResultKeywordScore::subquery)); outer: for (var entries : bySet.values()) { @@ -73,7 +73,7 @@ public class SearchApiQueryService { continue outer; Set flags = metadata.flagSet().stream().map(Object::toString).collect(Collectors.toSet()); - lst.add(new ApiSearchResultQueryDetails(entry.keyword(), metadata.tfIdf(), Integer.bitCount(metadata.positions()), flags)); + lst.add(new ApiSearchResultQueryDetails(entry.keyword, metadata.tfIdf(), Integer.bitCount(metadata.positions()), flags)); } details.add(lst); } diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java index 3803bbda..47a6a4f1 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java @@ -3,8 +3,8 @@ package nu.marginalia.search.svc; import com.google.inject.Inject; import com.google.inject.Singleton; import nu.marginalia.index.client.IndexClient; -import nu.marginalia.index.client.model.results.EdgeSearchResultItem; -import nu.marginalia.index.client.model.query.EdgeSearchSpecification; +import nu.marginalia.index.client.model.results.SearchResultItem; +import nu.marginalia.index.client.model.query.SearchSpecification; import nu.marginalia.search.model.PageScoreAdjustment; import nu.marginalia.search.model.UrlDetails; import nu.marginalia.search.results.SearchResultDecorator; @@ -37,7 +37,7 @@ public class SearchQueryIndexService { } public List executeQuery(Context ctx, SearchQuery processedQuery) { - final List results = indexClient.query(ctx, processedQuery.specs); + final List results = indexClient.query(ctx, processedQuery.specs); List urlDetails = resultDecorator.getAllUrlDetails(results); @@ -70,7 +70,7 @@ public class SearchQueryIndexService { private final Pattern titleSplitPattern = Pattern.compile("[:!|./]|(\\s-|-\\s)|\\s{2,}"); - private PageScoreAdjustment adjustScoreBasedOnQuery(UrlDetails p, EdgeSearchSpecification specs) { + private PageScoreAdjustment adjustScoreBasedOnQuery(UrlDetails p, SearchSpecification specs) { String titleLC = p.title == null ? "" : p.title.toLowerCase(); String descLC = p.description == null ? "" : p.description.toLowerCase(); String urlLC = p.url == null ? "" : p.url.path.toLowerCase(); diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/valuation/SearchResultValuator.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/valuation/SearchResultValuator.java index be09b00e..8bdd791c 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/valuation/SearchResultValuator.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/valuation/SearchResultValuator.java @@ -5,8 +5,8 @@ import com.google.inject.Singleton; import nu.marginalia.language.statistics.TermFrequencyDict; import nu.marginalia.model.crawl.EdgePageWordFlags; import nu.marginalia.model.idx.WordMetadata; -import nu.marginalia.index.client.model.results.EdgeSearchResultKeywordScore; -import nu.marginalia.index.client.model.query.EdgeSearchSubquery; +import nu.marginalia.index.client.model.results.SearchResultKeywordScore; +import nu.marginalia.index.client.model.query.SearchSubquery; import nu.marginalia.language.WordPatterns; import org.jetbrains.annotations.NotNull; @@ -35,7 +35,7 @@ public class SearchResultValuator { } - public double preEvaluate(EdgeSearchSubquery sq) { + public double preEvaluate(SearchSubquery sq) { final String[] terms = sq.searchTermsInclude.stream().filter(f -> !f.contains(":")).toArray(String[]::new); double termSum = 0.; @@ -56,8 +56,8 @@ public class SearchResultValuator { return termSum / factorSum; } - public double evaluateTerms(List rawScores, int length, int titleLength) { - int sets = 1 + rawScores.stream().mapToInt(EdgeSearchResultKeywordScore::set).max().orElse(0); + public double evaluateTerms(List rawScores, int length, int titleLength) { + int sets = 1 + rawScores.stream().mapToInt(SearchResultKeywordScore::subquery).max().orElse(0); double bestScore = 10; double bestAllTermsFactor = 1.; @@ -88,10 +88,10 @@ public class SearchResultValuator { return bestScore * (0.1 + 0.9 * bestAllTermsFactor) * priorityTermBonus; } - private boolean hasPriorityTerm(List rawScores) { + private boolean hasPriorityTerm(List rawScores) { return rawScores.stream() .findAny() - .map(EdgeSearchResultKeywordScore::hasPriorityTerms) + .map(SearchResultKeywordScore::hasPriorityTerms) .orElse(false); } @@ -260,11 +260,11 @@ public class SearchResultValuator { return f; } - private double[] getTermWeights(EdgeSearchResultKeywordScore[] scores) { + private double[] getTermWeights(SearchResultKeywordScore[] scores) { double[] weights = new double[scores.length]; for (int i = 0; i < scores.length; i++) { - String[] parts = separator.split(scores[i].keyword()); + String[] parts = separator.split(scores[i].keyword); double sumScore = 0.; int count = 0; @@ -305,8 +305,8 @@ public class SearchResultValuator { return weights; } - private SearchResultsKeywordSet createKeywordSet(List rawScores, int thisSet) { - EdgeSearchResultKeywordScore[] scores = rawScores.stream().filter(w -> w.set() == thisSet && !w.keyword().contains(":")).toArray(EdgeSearchResultKeywordScore[]::new); + private SearchResultsKeywordSet createKeywordSet(List rawScores, int thisSet) { + SearchResultKeywordScore[] scores = rawScores.stream().filter(w -> w.subquery() == thisSet && !w.keyword.contains(":")).toArray(SearchResultKeywordScore[]::new); if (scores.length == 0) { return null; } @@ -322,8 +322,8 @@ public class SearchResultValuator { } - private record SearchResultsKeyword(EdgeSearchResultKeywordScore score, WordMetadata wordMetadata, double weight) { - public SearchResultsKeyword(EdgeSearchResultKeywordScore score, double weight) { + private record SearchResultsKeyword(SearchResultKeywordScore score, WordMetadata wordMetadata, double weight) { + public SearchResultsKeyword(SearchResultKeywordScore score, double weight) { this(score, new WordMetadata(score.encodedWordMetadata()), weight); } diff --git a/code/services-core/search-service/src/test/java/nu/marginalia/search/query/QueryFactoryTest.java b/code/services-core/search-service/src/test/java/nu/marginalia/search/query/QueryFactoryTest.java index 48842b9a..b3ec6dca 100644 --- a/code/services-core/search-service/src/test/java/nu/marginalia/search/query/QueryFactoryTest.java +++ b/code/services-core/search-service/src/test/java/nu/marginalia/search/query/QueryFactoryTest.java @@ -3,7 +3,7 @@ package nu.marginalia.search.query; import nu.marginalia.WmsaHome; import nu.marginalia.index.query.limit.SpecificationLimitType; import nu.marginalia.language.statistics.EnglishDictionary; -import nu.marginalia.index.client.model.query.EdgeSearchSpecification; +import nu.marginalia.index.client.model.query.SearchSpecification; import nu.marginalia.language.statistics.NGramBloomFilter; import nu.marginalia.language.statistics.TermFrequencyDict; import nu.marginalia.search.command.SearchJsParameter; @@ -37,7 +37,7 @@ public class QueryFactoryTest { ); } - public EdgeSearchSpecification parseAndGetSpecs(String query) { + public SearchSpecification parseAndGetSpecs(String query) { return queryFactory.createQuery( new UserSearchParameters(query, SearchProfile.CORPO, SearchJsParameter.DEFAULT) ).specs; diff --git a/code/services-core/search-service/src/test/java/nu/marginalia/search/valuation/SearchResultValuatorTest.java b/code/services-core/search-service/src/test/java/nu/marginalia/search/valuation/SearchResultValuatorTest.java index 34637872..333eca48 100644 --- a/code/services-core/search-service/src/test/java/nu/marginalia/search/valuation/SearchResultValuatorTest.java +++ b/code/services-core/search-service/src/test/java/nu/marginalia/search/valuation/SearchResultValuatorTest.java @@ -1,6 +1,6 @@ package nu.marginalia.search.valuation; -import nu.marginalia.index.client.model.results.EdgeSearchResultKeywordScore; +import nu.marginalia.index.client.model.results.SearchResultKeywordScore; import nu.marginalia.language.statistics.TermFrequencyDict; import nu.marginalia.model.crawl.EdgePageDocumentFlags; import nu.marginalia.model.crawl.EdgePageWordFlags; @@ -31,29 +31,29 @@ class SearchResultValuatorTest { valuator = new SearchResultValuator(dict); } - List titleOnlyLowCountSet = List.of( - new EdgeSearchResultKeywordScore(0, "bob", + List titleOnlyLowCountSet = List.of( + new SearchResultKeywordScore(0, "bob", wordMetadata(32, Set.of(1), EnumSet.of(EdgePageWordFlags.Title)), docMetadata(0, 2010, 0, 5, EnumSet.noneOf(EdgePageDocumentFlags.class)), false) ); - List highCountNoTitleSet = List.of( - new EdgeSearchResultKeywordScore(0, "bob", + List highCountNoTitleSet = List.of( + new SearchResultKeywordScore(0, "bob", wordMetadata(129, Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(EdgePageWordFlags.TfIdfHigh)), docMetadata(0, 2010, 0, 5, EnumSet.noneOf(EdgePageDocumentFlags.class)), false) ); - List highCountSubjectSet = List.of( - new EdgeSearchResultKeywordScore(0, "bob", + List highCountSubjectSet = List.of( + new SearchResultKeywordScore(0, "bob", wordMetadata(129, Set.of(1,3,4,6,7,9,10,11,12,14,15,16), EnumSet.of(EdgePageWordFlags.TfIdfHigh, EdgePageWordFlags.Subjects)), docMetadata(0, 2010, 0, 5, EnumSet.noneOf(EdgePageDocumentFlags.class)), false) ); - List first = List.of( - new EdgeSearchResultKeywordScore(0, "bob", + List first = List.of( + new SearchResultKeywordScore(0, "bob", wordMetadata(202, Set.of(1,3,4,6,7,9,10,11), EnumSet.of(EdgePageWordFlags.TfIdfHigh)), docMetadata(0, 2010, 0, 5, EnumSet.noneOf(EdgePageDocumentFlags.class)), false)