From fc6e3b6da0d0f0b4f8ea810c7c455c502e009d8c Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 1 Jan 2024 18:51:03 +0100 Subject: [PATCH] (index) Further ranking adjustments --- .../src/main/java/nu/marginalia/ranking/ResultValuator.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java b/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java index 99e59f4e..a6b2d925 100644 --- a/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java +++ b/code/features-index/result-ranking/src/main/java/nu/marginalia/ranking/ResultValuator.java @@ -137,11 +137,13 @@ public class ResultValuator { double penalty = 0; boolean isForum = DocumentFlags.GeneratorForum.isPresent(docFlags); + boolean isWiki = DocumentFlags.GeneratorWiki.isPresent(docFlags); + boolean isDocs = DocumentFlags.GeneratorDocs.isPresent(docFlags); // Penalize large sites harder for any bullshit as it's a strong signal of a low quality site double largeSiteFactor = 1.; - if (!isForum && size > 400) { + if (!isForum && !isWiki && !isDocs && size > 400) { // Long urls-that-look-like-this tend to be poor search results if (DocumentMetadata.hasFlags(featureFlags, HtmlFeature.KEBAB_CASE_URL.getFeatureBit())) penalty += 30.0; @@ -161,7 +163,7 @@ public class ResultValuator { if (DocumentMetadata.hasFlags(featureFlags, HtmlFeature.TRACKING.getFeatureBit())) penalty += 2.5 * largeSiteFactor; - if (isForum) { + if (isForum || isWiki || isDocs) { penalty = Math.min(0, penalty - 2); }