mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(index) Add index-side deduplication in selectBestResults
This commit is contained in:
parent
4ece5f847b
commit
e6c8a6febe
@ -6,6 +6,7 @@ import gnu.trove.list.TLongList;
|
||||
import gnu.trove.list.array.TLongArrayList;
|
||||
import gnu.trove.map.hash.TObjectLongHashMap;
|
||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
|
||||
import nu.marginalia.api.searchquery.*;
|
||||
import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
|
||||
import nu.marginalia.api.searchquery.model.compiled.CqDataLong;
|
||||
@ -174,6 +175,7 @@ public class IndexResultRankingService {
|
||||
}
|
||||
|
||||
List<RpcDecoratedResultItem> resultItems = new ArrayList<>(resultsList.size());
|
||||
LongOpenHashSet seenDocumentHashes = new LongOpenHashSet(resultsList.size());
|
||||
|
||||
// Decorate the results with the document details
|
||||
for (var result : resultsList) {
|
||||
@ -185,6 +187,11 @@ public class IndexResultRankingService {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Filter out duplicates by content
|
||||
if (!seenDocumentHashes.add(docData.dataHash())) {
|
||||
continue;
|
||||
}
|
||||
|
||||
var rawItem = RpcRawResultItem.newBuilder();
|
||||
|
||||
rawItem.setCombinedId(result.combinedId);
|
||||
|
Loading…
Reference in New Issue
Block a user