diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/model/UrlDetails.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/model/UrlDetails.java index 2f97e49f..15a0ef9a 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/model/UrlDetails.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/model/UrlDetails.java @@ -42,21 +42,6 @@ public class UrlDetails { return resultsFromSameDomain > 1; } - public long rankingIdAdjustment() { - int penalty = 0; - - if (words < 500) { - penalty -= 1; - } - if (urlQuality < -10) { - penalty -= 1; - } - if (isSpecialDomain()) { - penalty -= 1; - } - return penalty; //(int)(Math.log(1+rankingId) / Math.log(100))-1-penalty; - } - public String getFormat() { if (null == format) { return "?"; diff --git a/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java b/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java index 5a7d2e7e..6ce90f9d 100644 --- a/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java +++ b/code/services-core/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryIndexService.java @@ -30,10 +30,10 @@ public class SearchQueryIndexService { this.indexClient = indexClient; this.searchVisitorCount = searchVisitorCount; - Comparator c = Comparator.comparing(ud -> Math.round(10*(ud.getTermScore() - ud.rankingIdAdjustment()))); - resultListComparator = c + resultListComparator = Comparator.comparing(UrlDetails::getTermScore) .thenComparing(UrlDetails::getRanking) .thenComparing(UrlDetails::getId); + } public List executeQuery(Context ctx, SearchQuery processedQuery) { @@ -43,10 +43,6 @@ public class SearchQueryIndexService { List urlDetails = resultDecorator.getAllUrlDetails(results); - urlDetails.replaceAll(details -> - details.withUrlQualityAdjustment(adjustScoreBasedOnQuery(details, processedQuery.specs)) - ); - urlDetails.sort(resultListComparator); return limitAndDeduplicateResults(processedQuery, urlDetails); @@ -70,57 +66,6 @@ public class SearchQueryIndexService { return retList; } - private final Pattern titleSplitPattern = Pattern.compile("[:!|./]|(\\s-|-\\s)|\\s{2,}"); - private PageScoreAdjustment adjustScoreBasedOnQuery(UrlDetails p, SearchSpecification specs) { - String titleLC = p.title == null ? "" : p.title.toLowerCase(); - String descLC = p.description == null ? "" : p.description.toLowerCase(); - String urlLC = p.url == null ? "" : p.url.path.toLowerCase(); - String domainLC = p.url == null ? "" : p.url.domain.toString().toLowerCase(); - - String[] searchTermsLC = specs.subqueries.get(0).searchTermsInclude.stream() - .map(String::toLowerCase) - .flatMap(s -> Arrays.stream(s.split("_"))) - .toArray(String[]::new); - int termCount = searchTermsLC.length; - - double titleHitsAdj = 0.; - final String[] titleParts = titleSplitPattern.split(titleLC); - for (String titlePart : titleParts) { - double hits = 0; - for (String term : searchTermsLC) { - if (titlePart.contains(term)) { - hits += term.length(); - } - } - titleHitsAdj += hits / Math.max(1, titlePart.length()); - } - - double titleFullHit = 0.; - if (termCount > 1 && titleLC.contains(specs.humanQuery.replaceAll("\"", "").toLowerCase())) { - titleFullHit = termCount; - } - long descHits = Arrays.stream(searchTermsLC).filter(descLC::contains).count(); - long urlHits = Arrays.stream(searchTermsLC).filter(urlLC::contains).count(); - long domainHits = Arrays.stream(searchTermsLC).filter(domainLC::contains).count(); - - double descHitsAdj = 0.; - for (String word : descLC.split("\\W+")) { - descHitsAdj += Arrays.stream(searchTermsLC) - .filter(term -> term.length() > word.length()) - .filter(term -> term.contains(word)) - .mapToDouble(term -> word.length() / (double) term.length()) - .sum(); - } - - return PageScoreAdjustment.builder() - .descAdj(Math.min(termCount, descHits) / (10. * termCount)) - .descHitsAdj(descHitsAdj / 10.) - .domainAdj(2 * Math.min(termCount, domainHits) / (double) termCount) - .urlAdj(Math.min(termCount, urlHits) / (10. * termCount)) - .titleAdj(5 * titleHitsAdj / (Math.max(1, titleParts.length) * Math.log(titleLC.length() + 2))) - .titleFullHit(titleFullHit) - .build(); - } }