diff --git a/code/processes/export-task-process/java/nu/marginalia/extractor/AtagExporter.java b/code/processes/export-task-process/java/nu/marginalia/extractor/AtagExporter.java index 6b602a61..9c66b882 100644 --- a/code/processes/export-task-process/java/nu/marginalia/extractor/AtagExporter.java +++ b/code/processes/export-task-process/java/nu/marginalia/extractor/AtagExporter.java @@ -155,9 +155,12 @@ public class AtagExporter implements ExporterIf { } // Deduplicate by hash; we've already checked that the strings are ASCII printable so we don't - // need to be concerned about using the fast ASCII hash. Note we don't consider the destination URL - // here, but the source domain instead. - return !hashes.add(hash.hashLowerBytes(linkText) ^ hash.hashLowerBytes(baseUrl.domain.toString())); + // need to be concerned about using the fast ASCII hash + if (hashes.add(hash.hashLowerBytes(linkText) ^ hash.hashLowerBytes(urlString))) { + return false; + } + + return true; } }