Remove antique result scoring adjustment that makes no sense anymore.

This commit is contained in:
Viktor Lofgren 2023-04-02 11:58:04 +02:00
parent affcf8cf41
commit 5fb75adaae
2 changed files with 2 additions and 72 deletions

View File

@ -42,21 +42,6 @@ public class UrlDetails {
return resultsFromSameDomain > 1;
}
public long rankingIdAdjustment() {
int penalty = 0;
if (words < 500) {
penalty -= 1;
}
if (urlQuality < -10) {
penalty -= 1;
}
if (isSpecialDomain()) {
penalty -= 1;
}
return penalty; //(int)(Math.log(1+rankingId) / Math.log(100))-1-penalty;
}
public String getFormat() {
if (null == format) {
return "?";

View File

@ -30,10 +30,10 @@ public class SearchQueryIndexService {
this.indexClient = indexClient;
this.searchVisitorCount = searchVisitorCount;
Comparator<UrlDetails> c = Comparator.comparing(ud -> Math.round(10*(ud.getTermScore() - ud.rankingIdAdjustment())));
resultListComparator = c
resultListComparator = Comparator.comparing(UrlDetails::getTermScore)
.thenComparing(UrlDetails::getRanking)
.thenComparing(UrlDetails::getId);
}
public List<UrlDetails> executeQuery(Context ctx, SearchQuery processedQuery) {
@ -43,10 +43,6 @@ public class SearchQueryIndexService {
List<UrlDetails> urlDetails = resultDecorator.getAllUrlDetails(results);
urlDetails.replaceAll(details ->
details.withUrlQualityAdjustment(adjustScoreBasedOnQuery(details, processedQuery.specs))
);
urlDetails.sort(resultListComparator);
return limitAndDeduplicateResults(processedQuery, urlDetails);
@ -70,57 +66,6 @@ public class SearchQueryIndexService {
return retList;
}
private final Pattern titleSplitPattern = Pattern.compile("[:!|./]|(\\s-|-\\s)|\\s{2,}");
private PageScoreAdjustment adjustScoreBasedOnQuery(UrlDetails p, SearchSpecification specs) {
String titleLC = p.title == null ? "" : p.title.toLowerCase();
String descLC = p.description == null ? "" : p.description.toLowerCase();
String urlLC = p.url == null ? "" : p.url.path.toLowerCase();
String domainLC = p.url == null ? "" : p.url.domain.toString().toLowerCase();
String[] searchTermsLC = specs.subqueries.get(0).searchTermsInclude.stream()
.map(String::toLowerCase)
.flatMap(s -> Arrays.stream(s.split("_")))
.toArray(String[]::new);
int termCount = searchTermsLC.length;
double titleHitsAdj = 0.;
final String[] titleParts = titleSplitPattern.split(titleLC);
for (String titlePart : titleParts) {
double hits = 0;
for (String term : searchTermsLC) {
if (titlePart.contains(term)) {
hits += term.length();
}
}
titleHitsAdj += hits / Math.max(1, titlePart.length());
}
double titleFullHit = 0.;
if (termCount > 1 && titleLC.contains(specs.humanQuery.replaceAll("\"", "").toLowerCase())) {
titleFullHit = termCount;
}
long descHits = Arrays.stream(searchTermsLC).filter(descLC::contains).count();
long urlHits = Arrays.stream(searchTermsLC).filter(urlLC::contains).count();
long domainHits = Arrays.stream(searchTermsLC).filter(domainLC::contains).count();
double descHitsAdj = 0.;
for (String word : descLC.split("\\W+")) {
descHitsAdj += Arrays.stream(searchTermsLC)
.filter(term -> term.length() > word.length())
.filter(term -> term.contains(word))
.mapToDouble(term -> word.length() / (double) term.length())
.sum();
}
return PageScoreAdjustment.builder()
.descAdj(Math.min(termCount, descHits) / (10. * termCount))
.descHitsAdj(descHitsAdj / 10.)
.domainAdj(2 * Math.min(termCount, domainHits) / (double) termCount)
.urlAdj(Math.min(termCount, urlHits) / (10. * termCount))
.titleAdj(5 * titleHitsAdj / (Math.max(1, titleParts.length) * Math.log(titleLC.length() + 2)))
.titleFullHit(titleFullHit)
.build();
}
}