Make the adjacency calculator behave like it used to in the past, when it gave better results.

This commit is contained in:
Viktor Lofgren 2023-06-07 22:03:06 +02:00
parent eb2ca942d5
commit 7ed3306be3
2 changed files with 9 additions and 15 deletions

View File

@ -82,10 +82,10 @@ public class AdjacenciesData {
}
private boolean isEligible(RoaringBitmap value) {
return true;
// int cardinality = value.getCardinality();
// return true;
int cardinality = value.getCardinality();
// return cardinality < 10000;
return cardinality < 10000;
}

View File

@ -148,7 +148,7 @@ public class WebsiteAdjacenciesCalculator {
private void findAdjacentDtoS(int domainId, Consumer<DomainSimilarities> andThen) {
var vector = adjacenciesData.getVector(domainId);
if (vector == null || !vector.cardinalityExceeds(5)) {
if (vector == null || !vector.cardinalityExceeds(10)) {
return;
}
@ -168,18 +168,12 @@ public class WebsiteAdjacenciesCalculator {
if (otherVec.getCardinality() < cardMin)
return true;
// if (vector.getCardinality() > 100) {
// if (otherVec.getCardinality() < cardMin)
// return true;
//
// // cheap non-weighted check
// if (cosineSimilarity(vector, otherVec) < 0.1)
// return true;
// }
var similarity = cosineSimilarity(vector, otherVec);
double similarity = cosineSimilarity(vector, otherVec);
if (similarity > 0.1) {
similarities.add(new DomainSimilarity(id, similarity));
var recalculated = expensiveCosineSimilarity(vector, otherVec);
if (recalculated > 0.1) {
similarities.add(new DomainSimilarity(id, recalculated));
}
}
return true;