mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(index) Add index-side deduplication in selectBestResults
This commit is contained in:
parent
4ece5f847b
commit
e6c8a6febe
@ -6,6 +6,7 @@ import gnu.trove.list.TLongList;
|
|||||||
import gnu.trove.list.array.TLongArrayList;
|
import gnu.trove.list.array.TLongArrayList;
|
||||||
import gnu.trove.map.hash.TObjectLongHashMap;
|
import gnu.trove.map.hash.TObjectLongHashMap;
|
||||||
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
import it.unimi.dsi.fastutil.longs.LongArrayList;
|
||||||
|
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
|
||||||
import nu.marginalia.api.searchquery.*;
|
import nu.marginalia.api.searchquery.*;
|
||||||
import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
|
import nu.marginalia.api.searchquery.model.compiled.CompiledQuery;
|
||||||
import nu.marginalia.api.searchquery.model.compiled.CqDataLong;
|
import nu.marginalia.api.searchquery.model.compiled.CqDataLong;
|
||||||
@ -174,6 +175,7 @@ public class IndexResultRankingService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
List<RpcDecoratedResultItem> resultItems = new ArrayList<>(resultsList.size());
|
List<RpcDecoratedResultItem> resultItems = new ArrayList<>(resultsList.size());
|
||||||
|
LongOpenHashSet seenDocumentHashes = new LongOpenHashSet(resultsList.size());
|
||||||
|
|
||||||
// Decorate the results with the document details
|
// Decorate the results with the document details
|
||||||
for (var result : resultsList) {
|
for (var result : resultsList) {
|
||||||
@ -185,6 +187,11 @@ public class IndexResultRankingService {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Filter out duplicates by content
|
||||||
|
if (!seenDocumentHashes.add(docData.dataHash())) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
var rawItem = RpcRawResultItem.newBuilder();
|
var rawItem = RpcRawResultItem.newBuilder();
|
||||||
|
|
||||||
rawItem.setCombinedId(result.combinedId);
|
rawItem.setCombinedId(result.combinedId);
|
||||||
|
Loading…
Reference in New Issue
Block a user