Make the adjacency calculator behave like it used to in the past, when it gave better results.

This commit is contained in:
Viktor Lofgren 2023-06-07 22:03:06 +02:00
parent eb2ca942d5
commit 7ed3306be3
2 changed files with 9 additions and 15 deletions

View File

@ -82,10 +82,10 @@ public class AdjacenciesData {
} }
private boolean isEligible(RoaringBitmap value) { private boolean isEligible(RoaringBitmap value) {
return true; // return true;
// int cardinality = value.getCardinality(); int cardinality = value.getCardinality();
// return cardinality < 10000; return cardinality < 10000;
} }

View File

@ -148,7 +148,7 @@ public class WebsiteAdjacenciesCalculator {
private void findAdjacentDtoS(int domainId, Consumer<DomainSimilarities> andThen) { private void findAdjacentDtoS(int domainId, Consumer<DomainSimilarities> andThen) {
var vector = adjacenciesData.getVector(domainId); var vector = adjacenciesData.getVector(domainId);
if (vector == null || !vector.cardinalityExceeds(5)) { if (vector == null || !vector.cardinalityExceeds(10)) {
return; return;
} }
@ -168,18 +168,12 @@ public class WebsiteAdjacenciesCalculator {
if (otherVec.getCardinality() < cardMin) if (otherVec.getCardinality() < cardMin)
return true; return true;
// if (vector.getCardinality() > 100) { double similarity = cosineSimilarity(vector, otherVec);
// if (otherVec.getCardinality() < cardMin)
// return true;
//
// // cheap non-weighted check
// if (cosineSimilarity(vector, otherVec) < 0.1)
// return true;
// }
var similarity = cosineSimilarity(vector, otherVec);
if (similarity > 0.1) { if (similarity > 0.1) {
similarities.add(new DomainSimilarity(id, similarity)); var recalculated = expensiveCosineSimilarity(vector, otherVec);
if (recalculated > 0.1) {
similarities.add(new DomainSimilarity(id, recalculated));
}
} }
return true; return true;