diff --git a/code/tools/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/AdjacenciesData.java b/code/tools/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/AdjacenciesData.java index 75152a7f..4ddc087b 100644 --- a/code/tools/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/AdjacenciesData.java +++ b/code/tools/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/AdjacenciesData.java @@ -82,10 +82,10 @@ public class AdjacenciesData { } private boolean isEligible(RoaringBitmap value) { - return true; -// int cardinality = value.getCardinality(); +// return true; + int cardinality = value.getCardinality(); -// return cardinality < 10000; + return cardinality < 10000; } diff --git a/code/tools/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/WebsiteAdjacenciesCalculator.java b/code/tools/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/WebsiteAdjacenciesCalculator.java index e915abff..f6a4022f 100644 --- a/code/tools/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/WebsiteAdjacenciesCalculator.java +++ b/code/tools/website-adjacencies-calculator/src/main/java/nu/marginalia/adjacencies/WebsiteAdjacenciesCalculator.java @@ -148,7 +148,7 @@ public class WebsiteAdjacenciesCalculator { private void findAdjacentDtoS(int domainId, Consumer andThen) { var vector = adjacenciesData.getVector(domainId); - if (vector == null || !vector.cardinalityExceeds(5)) { + if (vector == null || !vector.cardinalityExceeds(10)) { return; } @@ -168,18 +168,12 @@ public class WebsiteAdjacenciesCalculator { if (otherVec.getCardinality() < cardMin) return true; -// if (vector.getCardinality() > 100) { -// if (otherVec.getCardinality() < cardMin) -// return true; -// -// // cheap non-weighted check -// if (cosineSimilarity(vector, otherVec) < 0.1) -// return true; -// } - - var similarity = cosineSimilarity(vector, otherVec); + double similarity = cosineSimilarity(vector, otherVec); if (similarity > 0.1) { - similarities.add(new DomainSimilarity(id, similarity)); + var recalculated = expensiveCosineSimilarity(vector, otherVec); + if (recalculated > 0.1) { + similarities.add(new DomainSimilarity(id, recalculated)); + } } return true;