mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
Better handling of quote terms, fix bug in handling of longer queries.
... where some terms may previously have been ignored. The latter bug was due to the handling of QueryHeads with AnyOf-style predicates interacting poorly with alreadyConsideredTerms in SearchIndex.java
This commit is contained in:
parent
810515c08d
commit
fe419b12b4
@ -10,7 +10,7 @@ import java.util.stream.Collectors;
|
|||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class SearchSubquery {
|
public class SearchSubquery {
|
||||||
|
|
||||||
/** These terms must be present in the document */
|
/** These terms must be present in the document and are used in ranking*/
|
||||||
public final List<String> searchTermsInclude;
|
public final List<String> searchTermsInclude;
|
||||||
|
|
||||||
/** These terms must be absent from the document */
|
/** These terms must be absent from the document */
|
||||||
@ -21,18 +21,22 @@ public class SearchSubquery {
|
|||||||
|
|
||||||
/** If these optional terms are present in the document, rank it highly */
|
/** If these optional terms are present in the document, rank it highly */
|
||||||
public final List<String> searchTermsPriority;
|
public final List<String> searchTermsPriority;
|
||||||
|
|
||||||
|
/** Terms that we require to be in the same sentence */
|
||||||
|
public final List<List<String>> searchTermCoherences;
|
||||||
|
|
||||||
private double value = 0;
|
private double value = 0;
|
||||||
|
|
||||||
public SearchSubquery(List<String> searchTermsInclude,
|
public SearchSubquery(List<String> searchTermsInclude,
|
||||||
List<String> searchTermsExclude,
|
List<String> searchTermsExclude,
|
||||||
List<String> searchTermsAdvice,
|
List<String> searchTermsAdvice,
|
||||||
List<String> searchTermsPriority
|
List<String> searchTermsPriority,
|
||||||
) {
|
List<List<String>> searchTermCoherences) {
|
||||||
this.searchTermsInclude = searchTermsInclude;
|
this.searchTermsInclude = searchTermsInclude;
|
||||||
this.searchTermsExclude = searchTermsExclude;
|
this.searchTermsExclude = searchTermsExclude;
|
||||||
this.searchTermsAdvice = searchTermsAdvice;
|
this.searchTermsAdvice = searchTermsAdvice;
|
||||||
this.searchTermsPriority = searchTermsPriority;
|
this.searchTermsPriority = searchTermsPriority;
|
||||||
|
this.searchTermCoherences = searchTermCoherences;
|
||||||
}
|
}
|
||||||
|
|
||||||
public SearchSubquery setValue(double value) {
|
public SearchSubquery setValue(double value) {
|
||||||
@ -51,6 +55,7 @@ public class SearchSubquery {
|
|||||||
if (!searchTermsExclude.isEmpty()) sb.append("exclude=").append(searchTermsExclude.stream().collect(Collectors.joining(",", "[", "] ")));
|
if (!searchTermsExclude.isEmpty()) sb.append("exclude=").append(searchTermsExclude.stream().collect(Collectors.joining(",", "[", "] ")));
|
||||||
if (!searchTermsAdvice.isEmpty()) sb.append("advice=").append(searchTermsAdvice.stream().collect(Collectors.joining(",", "[", "] ")));
|
if (!searchTermsAdvice.isEmpty()) sb.append("advice=").append(searchTermsAdvice.stream().collect(Collectors.joining(",", "[", "] ")));
|
||||||
if (!searchTermsPriority.isEmpty()) sb.append("priority=").append(searchTermsPriority.stream().collect(Collectors.joining(",", "[", "] ")));
|
if (!searchTermsPriority.isEmpty()) sb.append("priority=").append(searchTermsPriority.stream().collect(Collectors.joining(",", "[", "] ")));
|
||||||
|
if (!searchTermCoherences.isEmpty()) sb.append("coherences=").append(searchTermCoherences.stream().map(coh->coh.stream().collect(Collectors.joining(",", "[", "] "))).collect(Collectors.joining(", ")));
|
||||||
|
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
@ -6,9 +6,7 @@ import static java.lang.Boolean.compare;
|
|||||||
import static java.lang.Double.compare;
|
import static java.lang.Double.compare;
|
||||||
|
|
||||||
public record SearchResultPreliminaryScore(
|
public record SearchResultPreliminaryScore(
|
||||||
boolean anyAllSynthetic,
|
boolean disqualified,
|
||||||
int minNumberOfFlagsSet,
|
|
||||||
int minPositionsSet,
|
|
||||||
boolean hasPriorityTerm,
|
boolean hasPriorityTerm,
|
||||||
double searchRankingScore)
|
double searchRankingScore)
|
||||||
implements Comparable<SearchResultPreliminaryScore>
|
implements Comparable<SearchResultPreliminaryScore>
|
||||||
@ -27,16 +25,7 @@ public record SearchResultPreliminaryScore(
|
|||||||
return PREFER_LOW * compare(searchRankingScore, other.searchRankingScore);
|
return PREFER_LOW * compare(searchRankingScore, other.searchRankingScore);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isEmpty() {
|
public boolean isDisqualified() {
|
||||||
if (minNumberOfFlagsSet > 0)
|
return disqualified;
|
||||||
return false;
|
|
||||||
|
|
||||||
if (anyAllSynthetic)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (minPositionsSet > 0)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -110,11 +110,12 @@ public class SearchIndex {
|
|||||||
// that contain pairs of two search terms
|
// that contain pairs of two search terms
|
||||||
if (orderedIncludes.length > 1) {
|
if (orderedIncludes.length > 1) {
|
||||||
for (int i = 0; i + 1 < orderedIncludes.length; i++) {
|
for (int i = 0; i + 1 < orderedIncludes.length; i++) {
|
||||||
var remainingWords = Arrays.copyOfRange(orderedIncludes, i+1, orderedIncludes.length);
|
for (int j = i + 1; j < orderedIncludes.length; j++) {
|
||||||
var entrySource = indexReader
|
var entrySource = indexReader
|
||||||
.findPriorityWord(orderedIncludes[i])
|
.findPriorityWord(orderedIncludes[i])
|
||||||
.alsoPrioAnyOf(remainingWords);
|
.alsoPrio(orderedIncludes[j]);
|
||||||
queryHeads.add(entrySource);
|
queryHeads.add(entrySource);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,9 +5,18 @@ import it.unimi.dsi.fastutil.ints.IntComparator;
|
|||||||
import it.unimi.dsi.fastutil.ints.IntList;
|
import it.unimi.dsi.fastutil.ints.IntList;
|
||||||
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
|
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
|
||||||
|
|
||||||
public record SearchIndexSearchTerms(IntList includes, IntList excludes, IntList priority) {
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
public record SearchIndexSearchTerms(
|
||||||
|
IntList includes,
|
||||||
|
IntList excludes,
|
||||||
|
IntList priority,
|
||||||
|
List<IntList> coherences
|
||||||
|
)
|
||||||
|
{
|
||||||
public SearchIndexSearchTerms() {
|
public SearchIndexSearchTerms() {
|
||||||
this(IntList.of(), IntList.of(), IntList.of());
|
this(IntList.of(), IntList.of(), IntList.of(), Collections.emptyList());
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isEmpty() {
|
public boolean isEmpty() {
|
||||||
|
@ -8,8 +8,10 @@ import it.unimi.dsi.fastutil.longs.Long2LongOpenHashMap;
|
|||||||
import nu.marginalia.index.client.model.query.SearchSubquery;
|
import nu.marginalia.index.client.model.query.SearchSubquery;
|
||||||
import nu.marginalia.index.index.SearchIndex;
|
import nu.marginalia.index.index.SearchIndex;
|
||||||
import nu.marginalia.index.svc.SearchTermsService;
|
import nu.marginalia.index.svc.SearchTermsService;
|
||||||
|
import nu.marginalia.model.idx.WordMetadata;
|
||||||
import nu.marginalia.ranking.ResultValuator;
|
import nu.marginalia.ranking.ResultValuator;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.OptionalInt;
|
import java.util.OptionalInt;
|
||||||
|
|
||||||
@ -74,7 +76,27 @@ public class IndexMetadataService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new QuerySearchTerms(termToId, termIdsList.toIntArray());
|
|
||||||
|
return new QuerySearchTerms(termToId,
|
||||||
|
termIdsList.toIntArray(),
|
||||||
|
getTermCoherences(searchTermVariants));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private TermCoherences getTermCoherences(List<SearchSubquery> searchTermVariants) {
|
||||||
|
List<int[]> coherences = new ArrayList<>();
|
||||||
|
|
||||||
|
for (var subquery : searchTermVariants) {
|
||||||
|
for (var coh : subquery.searchTermCoherences) {
|
||||||
|
int[] ids = coh.stream().map(searchTermsService::lookUpWord).filter(OptionalInt::isPresent).mapToInt(OptionalInt::getAsInt).toArray();
|
||||||
|
coherences.add(ids);
|
||||||
|
}
|
||||||
|
|
||||||
|
// It's assumed each subquery has identical coherences
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new TermCoherences(coherences);
|
||||||
}
|
}
|
||||||
|
|
||||||
public TLongHashSet getResultsWithPriorityTerms(List<SearchSubquery> subqueries, long[] resultsArray) {
|
public TLongHashSet getResultsWithPriorityTerms(List<SearchSubquery> subqueries, long[] resultsArray) {
|
||||||
@ -116,15 +138,32 @@ public class IndexMetadataService {
|
|||||||
return termdocToMeta.getOrDefault(termdocKey(termId, docId), 0);
|
return termdocToMeta.getOrDefault(termdocKey(termId, docId), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean testCoherence(long docId, TermCoherences coherences) {
|
||||||
|
|
||||||
|
for (var coherenceSet : coherences.words()) {
|
||||||
|
long overlap = 0xFF_FFFF_FFFF_FFFFL;
|
||||||
|
for (var word : coherenceSet) {
|
||||||
|
overlap &= WordMetadata.decodePositions(getTermMetadata(word, docId));
|
||||||
|
}
|
||||||
|
if (overlap == 0L) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class QuerySearchTerms {
|
public static class QuerySearchTerms {
|
||||||
private final TObjectIntHashMap<String> termToId;
|
private final TObjectIntHashMap<String> termToId;
|
||||||
public final int[] termIdsAll;
|
public final int[] termIdsAll;
|
||||||
|
|
||||||
public QuerySearchTerms(TObjectIntHashMap<String> termToId, int[] termIdsAll) {
|
public final TermCoherences coherences;
|
||||||
|
|
||||||
|
public QuerySearchTerms(TObjectIntHashMap<String> termToId, int[] termIdsAll, TermCoherences coherences) {
|
||||||
this.termToId = termToId;
|
this.termToId = termToId;
|
||||||
this.termIdsAll = termIdsAll;
|
this.termIdsAll = termIdsAll;
|
||||||
|
this.coherences = coherences;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int get(String searchTerm) {
|
public int get(String searchTerm) {
|
||||||
@ -132,6 +171,8 @@ public class IndexMetadataService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public record TermCoherences(List<int[]> words) {}
|
||||||
|
|
||||||
private static long termdocKey(int termId, long docId) {
|
private static long termdocKey(int termId, long docId) {
|
||||||
return (docId << 32) | termId;
|
return (docId << 32) | termId;
|
||||||
}
|
}
|
||||||
|
@ -117,10 +117,15 @@ public class IndexResultValuator {
|
|||||||
|
|
||||||
double score = searchResultValuator.calculateSearchResultValue(searchResult.keywordScores, 5000, rankingContext);
|
double score = searchResultValuator.calculateSearchResultValue(searchResult.keywordScores, 5000, rankingContext);
|
||||||
|
|
||||||
|
boolean disqualified = false;
|
||||||
|
|
||||||
|
if (!termMetadata.testCoherence(urlIdInt, searchTerms.coherences))
|
||||||
|
disqualified = true;
|
||||||
|
else if (maxFlagsCount == 0 && !anyAllSynthetic && maxPositionsSet == 0)
|
||||||
|
disqualified = true;
|
||||||
|
|
||||||
searchResult.setScore(new SearchResultPreliminaryScore(
|
searchResult.setScore(new SearchResultPreliminaryScore(
|
||||||
anyAllSynthetic,
|
disqualified,
|
||||||
maxFlagsCount,
|
|
||||||
maxPositionsSet,
|
|
||||||
hasPriorityTerm,
|
hasPriorityTerm,
|
||||||
score
|
score
|
||||||
));
|
));
|
||||||
@ -140,6 +145,7 @@ public class IndexResultValuator {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -155,6 +155,7 @@ public class IndexQueryService {
|
|||||||
outer:
|
outer:
|
||||||
// These queries are various term combinations
|
// These queries are various term combinations
|
||||||
for (var subquery : params.subqueries) {
|
for (var subquery : params.subqueries) {
|
||||||
|
|
||||||
final SearchIndexSearchTerms searchTerms = searchTermsSvc.getSearchTerms(subquery);
|
final SearchIndexSearchTerms searchTerms = searchTermsSvc.getSearchTerms(subquery);
|
||||||
|
|
||||||
if (searchTerms.isEmpty()) {
|
if (searchTerms.isEmpty()) {
|
||||||
@ -195,16 +196,20 @@ public class IndexQueryService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var includes = subquery.searchTermsInclude;
|
var includes = subquery.searchTermsInclude;
|
||||||
|
var advice = subquery.searchTermsAdvice;
|
||||||
var excludes = subquery.searchTermsExclude;
|
var excludes = subquery.searchTermsExclude;
|
||||||
var priority = subquery.searchTermsPriority;
|
var priority = subquery.searchTermsPriority;
|
||||||
|
|
||||||
for (int i = 0; i < subquery.searchTermsInclude.size(); i++) {
|
for (int i = 0; i < includes.size(); i++) {
|
||||||
logger.info(queryMarker, "{} -> {} I", includes.get(i), searchTerms.includes().getInt(i));
|
logger.info(queryMarker, "{} -> {} I", includes.get(i), searchTerms.includes().getInt(i));
|
||||||
}
|
}
|
||||||
for (int i = 0; i < subquery.searchTermsExclude.size(); i++) {
|
for (int i = 0; i < advice.size(); i++) {
|
||||||
|
logger.info(queryMarker, "{} -> {} A", advice.get(i), searchTerms.includes().getInt(includes.size() + i));
|
||||||
|
}
|
||||||
|
for (int i = 0; i < excludes.size(); i++) {
|
||||||
logger.info(queryMarker, "{} -> {} E", excludes.get(i), searchTerms.excludes().getInt(i));
|
logger.info(queryMarker, "{} -> {} E", excludes.get(i), searchTerms.excludes().getInt(i));
|
||||||
}
|
}
|
||||||
for (int i = 0; i < subquery.searchTermsPriority.size(); i++) {
|
for (int i = 0; i < priority.size(); i++) {
|
||||||
logger.info(queryMarker, "{} -> {} P", priority.get(i), searchTerms.priority().getInt(i));
|
logger.info(queryMarker, "{} -> {} P", priority.get(i), searchTerms.priority().getInt(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -247,7 +252,7 @@ public class IndexQueryService {
|
|||||||
return Arrays.stream(resultIds.toArray())
|
return Arrays.stream(resultIds.toArray())
|
||||||
.parallel()
|
.parallel()
|
||||||
.mapToObj(evaluator::calculatePreliminaryScore)
|
.mapToObj(evaluator::calculatePreliminaryScore)
|
||||||
.filter(score -> !score.getScore().isEmpty())
|
.filter(score -> !score.getScore().isDisqualified())
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -11,10 +11,7 @@ import nu.marginalia.lexicon.KeywordLexiconReadOnlyView;
|
|||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.*;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.OptionalInt;
|
|
||||||
|
|
||||||
@Singleton
|
@Singleton
|
||||||
public class SearchTermsService {
|
public class SearchTermsService {
|
||||||
@ -30,34 +27,49 @@ public class SearchTermsService {
|
|||||||
final IntList excludes = new IntArrayList();
|
final IntList excludes = new IntArrayList();
|
||||||
final IntList includes = new IntArrayList();
|
final IntList includes = new IntArrayList();
|
||||||
final IntList priority = new IntArrayList();
|
final IntList priority = new IntArrayList();
|
||||||
|
final List<IntList> coherences = new ArrayList<>();
|
||||||
|
|
||||||
for (var include : request.searchTermsInclude) {
|
if (!addEachTerm(includes, request.searchTermsInclude)) {
|
||||||
var word = lookUpWord(include);
|
return new SearchIndexSearchTerms();
|
||||||
if (word.isEmpty()) {
|
}
|
||||||
logger.debug("Unknown search term: " + include);
|
|
||||||
|
// This looks like a bug, but it's not
|
||||||
|
// v--- ----v
|
||||||
|
if (!addEachTerm(includes, request.searchTermsAdvice)) {
|
||||||
|
return new SearchIndexSearchTerms();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (var coherence : request.searchTermCoherences) {
|
||||||
|
IntList parts = new IntArrayList(coherence.size());
|
||||||
|
|
||||||
|
if (!addEachTerm(parts, coherence)) {
|
||||||
return new SearchIndexSearchTerms();
|
return new SearchIndexSearchTerms();
|
||||||
}
|
}
|
||||||
includes.add(word.getAsInt());
|
|
||||||
|
coherences.add(parts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// we don't care if we can't find these:
|
||||||
|
addEachTerm(excludes, request.searchTermsExclude);
|
||||||
|
addEachTerm(priority, request.searchTermsPriority);
|
||||||
|
|
||||||
for (var advice : request.searchTermsAdvice) {
|
return new SearchIndexSearchTerms(includes, excludes, priority, coherences);
|
||||||
var word = lookUpWord(advice);
|
}
|
||||||
if (word.isEmpty()) {
|
|
||||||
logger.debug("Unknown search term: " + advice);
|
private boolean addEachTerm(IntList ret, List<String> words) {
|
||||||
return new SearchIndexSearchTerms();
|
boolean success = true;
|
||||||
|
|
||||||
|
for (var exclude : words) {
|
||||||
|
var word = lookUpWord(exclude);
|
||||||
|
|
||||||
|
if (word.isPresent()) {
|
||||||
|
lookUpWord(exclude).ifPresent(ret::add);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
success = false;
|
||||||
}
|
}
|
||||||
includes.add(word.getAsInt());
|
|
||||||
}
|
}
|
||||||
|
return success;
|
||||||
for (var exclude : request.searchTermsExclude) {
|
|
||||||
lookUpWord(exclude).ifPresent(excludes::add);
|
|
||||||
}
|
|
||||||
for (var exclude : request.searchTermsPriority) {
|
|
||||||
lookUpWord(exclude).ifPresent(priority::add);
|
|
||||||
}
|
|
||||||
|
|
||||||
return new SearchIndexSearchTerms(includes, excludes, priority);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -28,11 +28,9 @@ import spark.Spark;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.IntStream;
|
import java.util.stream.IntStream;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
|
||||||
import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD;
|
import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD;
|
||||||
|
|
||||||
@Execution(SAME_THREAD)
|
@Execution(SAME_THREAD)
|
||||||
@ -91,8 +89,8 @@ public class IndexQueryServiceIntegrationTest {
|
|||||||
.domains(new ArrayList<>())
|
.domains(new ArrayList<>())
|
||||||
.searchSetIdentifier(SearchSetIdentifier.NONE)
|
.searchSetIdentifier(SearchSetIdentifier.NONE)
|
||||||
.subqueries(List.of(new SearchSubquery(
|
.subqueries(List.of(new SearchSubquery(
|
||||||
List.of("3", "5", "2"), List.of("4"), Collections.emptyList(), Collections.emptyList()
|
List.of("3", "5", "2"), List.of("4"), Collections.emptyList(), Collections.emptyList(),
|
||||||
))).build());
|
Collections.emptyList()))).build());
|
||||||
|
|
||||||
Assertions.assertArrayEquals(
|
Assertions.assertArrayEquals(
|
||||||
new int[] { 30, 90, 150, 210, 270, 330, 390, 450, 510 },
|
new int[] { 30, 90, 150, 210, 270, 330, 390, 450, 510 },
|
||||||
@ -123,8 +121,8 @@ public class IndexQueryServiceIntegrationTest {
|
|||||||
.queryStrategy(QueryStrategy.SENTENCE)
|
.queryStrategy(QueryStrategy.SENTENCE)
|
||||||
.domains(List.of(2))
|
.domains(List.of(2))
|
||||||
.subqueries(List.of(new SearchSubquery(
|
.subqueries(List.of(new SearchSubquery(
|
||||||
List.of("3", "5", "2"), List.of("4"), Collections.emptyList(), Collections.emptyList()
|
List.of("3", "5", "2"), List.of("4"), Collections.emptyList(), Collections.emptyList(),
|
||||||
))).build());
|
Collections.emptyList()))).build());
|
||||||
Assertions.assertArrayEquals(
|
Assertions.assertArrayEquals(
|
||||||
new int[] { 210, 270 },
|
new int[] { 210, 270 },
|
||||||
rsp.results.stream().mapToInt(SearchResultItem::getUrlIdInt).toArray());
|
rsp.results.stream().mapToInt(SearchResultItem::getUrlIdInt).toArray());
|
||||||
@ -149,8 +147,8 @@ public class IndexQueryServiceIntegrationTest {
|
|||||||
.searchSetIdentifier(SearchSetIdentifier.NONE)
|
.searchSetIdentifier(SearchSetIdentifier.NONE)
|
||||||
.rankingParams(ResultRankingParameters.sensibleDefaults())
|
.rankingParams(ResultRankingParameters.sensibleDefaults())
|
||||||
.subqueries(List.of(new SearchSubquery(
|
.subqueries(List.of(new SearchSubquery(
|
||||||
List.of("4"), Collections.emptyList(), Collections.emptyList(), Collections.emptyList()
|
List.of("4"), Collections.emptyList(), Collections.emptyList(), Collections.emptyList(),
|
||||||
))
|
Collections.emptyList()))
|
||||||
).build());
|
).build());
|
||||||
|
|
||||||
|
|
||||||
@ -167,8 +165,6 @@ public class IndexQueryServiceIntegrationTest {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public void loadData(int id) {
|
public void loadData(int id) {
|
||||||
int[] factors = IntStream
|
int[] factors = IntStream
|
||||||
.rangeClosed(1, id)
|
.rangeClosed(1, id)
|
||||||
|
@ -83,8 +83,8 @@ public class QueryFactory {
|
|||||||
Arrays.asList(termsInclude),
|
Arrays.asList(termsInclude),
|
||||||
Collections.emptyList(),
|
Collections.emptyList(),
|
||||||
Collections.emptyList(),
|
Collections.emptyList(),
|
||||||
Collections.emptyList()
|
Collections.emptyList(),
|
||||||
));
|
Collections.emptyList()));
|
||||||
|
|
||||||
var specs = SearchSpecification.builder()
|
var specs = SearchSpecification.builder()
|
||||||
.subqueries(sqs)
|
.subqueries(sqs)
|
||||||
|
@ -9,17 +9,19 @@ import java.util.ArrayList;
|
|||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
/** @see SearchSubquery */
|
||||||
public class QuerySearchTermsAccumulator implements TokenVisitor {
|
public class QuerySearchTermsAccumulator implements TokenVisitor {
|
||||||
public List<String> searchTermsExclude = new ArrayList<>();
|
public List<String> searchTermsExclude = new ArrayList<>();
|
||||||
public List<String> searchTermsInclude = new ArrayList<>();
|
public List<String> searchTermsInclude = new ArrayList<>();
|
||||||
public List<String> searchTermsAdvice = new ArrayList<>();
|
public List<String> searchTermsAdvice = new ArrayList<>();
|
||||||
public List<String> searchTermsPriority = new ArrayList<>();
|
public List<String> searchTermsPriority = new ArrayList<>();
|
||||||
|
public List<List<String>> searchTermCoherences = new ArrayList<>();
|
||||||
|
|
||||||
public String near;
|
public String near;
|
||||||
public String domain;
|
public String domain;
|
||||||
|
|
||||||
public SearchSubquery createSubquery() {
|
public SearchSubquery createSubquery() {
|
||||||
return new SearchSubquery(searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority);
|
return new SearchSubquery(searchTermsInclude, searchTermsExclude, searchTermsAdvice, searchTermsPriority, searchTermCoherences);
|
||||||
}
|
}
|
||||||
|
|
||||||
public QuerySearchTermsAccumulator(SearchProfile profile, List<Token> parts) {
|
public QuerySearchTermsAccumulator(SearchProfile profile, List<Token> parts) {
|
||||||
@ -45,11 +47,19 @@ public class QuerySearchTermsAccumulator implements TokenVisitor {
|
|||||||
public void onQuotTerm(Token token) {
|
public void onQuotTerm(Token token) {
|
||||||
String[] parts = token.str.split("_");
|
String[] parts = token.str.split("_");
|
||||||
if (parts.length > 1) {
|
if (parts.length > 1) {
|
||||||
|
// Prefer that the actual n-gram is present
|
||||||
searchTermsAdvice.add(token.str);
|
searchTermsAdvice.add(token.str);
|
||||||
|
|
||||||
|
// Require that the terms appear in the same sentence
|
||||||
|
searchTermCoherences.add(Arrays.asList(parts));
|
||||||
|
|
||||||
|
// Require that each term exists in the document
|
||||||
|
// (needed for ranking)
|
||||||
searchTermsInclude.addAll(Arrays.asList(parts));
|
searchTermsInclude.addAll(Arrays.asList(parts));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
searchTermsInclude.add(token.str);
|
searchTermsInclude.add(token.str);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,7 +29,7 @@ public class LoadTestMain {
|
|||||||
|
|
||||||
for (int i = 0; i < 10000; i++) {
|
for (int i = 0; i < 10000; i++) {
|
||||||
String uri = "http://127.0.0.1:8080/search?query=%s&profile=corpo".formatted(
|
String uri = "http://127.0.0.1:8080/search?query=%s&profile=corpo".formatted(
|
||||||
Strings.join(pickNCommonWords(2), '+')
|
Strings.join(pickNCommonWords(4), '+')
|
||||||
);
|
);
|
||||||
|
|
||||||
HttpRequest req = HttpRequest.newBuilder(new URI(uri))
|
HttpRequest req = HttpRequest.newBuilder(new URI(uri))
|
||||||
|
Loading…
Reference in New Issue
Block a user