diff --git a/code/index/java/nu/marginalia/index/results/IndexResultRankingService.java b/code/index/java/nu/marginalia/index/results/IndexResultRankingService.java index 810a1880..16d8a937 100644 --- a/code/index/java/nu/marginalia/index/results/IndexResultRankingService.java +++ b/code/index/java/nu/marginalia/index/results/IndexResultRankingService.java @@ -6,6 +6,7 @@ import gnu.trove.list.TLongList; import gnu.trove.list.array.TLongArrayList; import gnu.trove.map.hash.TObjectLongHashMap; import it.unimi.dsi.fastutil.longs.LongArrayList; +import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import nu.marginalia.api.searchquery.*; import nu.marginalia.api.searchquery.model.compiled.CompiledQuery; import nu.marginalia.api.searchquery.model.compiled.CqDataLong; @@ -174,6 +175,7 @@ public class IndexResultRankingService { } List resultItems = new ArrayList<>(resultsList.size()); + LongOpenHashSet seenDocumentHashes = new LongOpenHashSet(resultsList.size()); // Decorate the results with the document details for (var result : resultsList) { @@ -185,6 +187,11 @@ public class IndexResultRankingService { continue; } + // Filter out duplicates by content + if (!seenDocumentHashes.add(docData.dataHash())) { + continue; + } + var rawItem = RpcRawResultItem.newBuilder(); rawItem.setCombinedId(result.combinedId);