mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(domain-info) Reduce memory usage
This commit is contained in:
parent
eaf836dc66
commit
c943954bb4
@ -34,6 +34,7 @@ dependencies {
|
|||||||
implementation libs.spark
|
implementation libs.spark
|
||||||
implementation libs.opencsv
|
implementation libs.opencsv
|
||||||
implementation libs.trove
|
implementation libs.trove
|
||||||
|
implementation libs.roaringbitmap
|
||||||
implementation libs.fastutil
|
implementation libs.fastutil
|
||||||
implementation libs.bundles.gson
|
implementation libs.bundles.gson
|
||||||
implementation libs.bundles.mariadb
|
implementation libs.bundles.mariadb
|
||||||
|
@ -8,10 +8,12 @@ import gnu.trove.map.hash.TIntDoubleHashMap;
|
|||||||
import gnu.trove.map.hash.TIntIntHashMap;
|
import gnu.trove.map.hash.TIntIntHashMap;
|
||||||
import gnu.trove.set.TIntSet;
|
import gnu.trove.set.TIntSet;
|
||||||
import gnu.trove.set.hash.TIntHashSet;
|
import gnu.trove.set.hash.TIntHashSet;
|
||||||
|
import it.unimi.dsi.fastutil.ints.Int2DoubleArrayMap;
|
||||||
import nu.marginalia.api.domains.*;
|
import nu.marginalia.api.domains.*;
|
||||||
import nu.marginalia.api.domains.model.SimilarDomain;
|
import nu.marginalia.api.domains.model.SimilarDomain;
|
||||||
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
|
import nu.marginalia.api.indexdomainlinks.AggregateDomainLinksClient;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import org.roaringbitmap.RoaringBitmap;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -32,12 +34,12 @@ public class SimilarDomainsService {
|
|||||||
private volatile TIntIntHashMap domainIdToIdx = new TIntIntHashMap(100_000);
|
private volatile TIntIntHashMap domainIdToIdx = new TIntIntHashMap(100_000);
|
||||||
private volatile int[] domainIdxToId;
|
private volatile int[] domainIdxToId;
|
||||||
|
|
||||||
public volatile TIntDoubleHashMap[] relatedDomains;
|
public volatile Int2DoubleArrayMap[] relatedDomains;
|
||||||
public volatile TIntList[] domainNeighbors = null;
|
public volatile TIntList[] domainNeighbors = null;
|
||||||
public volatile BitSet screenshotDomains = null;
|
public volatile RoaringBitmap screenshotDomains = null;
|
||||||
public volatile BitSet activeDomains = null;
|
public volatile RoaringBitmap activeDomains = null;
|
||||||
public volatile BitSet indexedDomains = null;
|
public volatile RoaringBitmap indexedDomains = null;
|
||||||
public volatile double[] domainRanks = null;
|
public volatile TIntDoubleHashMap domainRanks = null;
|
||||||
public volatile String[] domainNames = null;
|
public volatile String[] domainNames = null;
|
||||||
|
|
||||||
volatile boolean isReady = false;
|
volatile boolean isReady = false;
|
||||||
@ -69,13 +71,13 @@ public class SimilarDomainsService {
|
|||||||
domainIdxToId[idx] = id;
|
domainIdxToId[idx] = id;
|
||||||
return true;
|
return true;
|
||||||
});
|
});
|
||||||
domainRanks = new double[domainIdToIdx.size()];
|
domainRanks = new TIntDoubleHashMap(100_000, 0.5f, -1, 0.);
|
||||||
domainNames = new String[domainIdToIdx.size()];
|
domainNames = new String[domainIdToIdx.size()];
|
||||||
domainNeighbors = new TIntList[domainIdToIdx.size()];
|
domainNeighbors = new TIntList[domainIdToIdx.size()];
|
||||||
screenshotDomains = new BitSet(domainIdToIdx.size());
|
screenshotDomains = new RoaringBitmap();
|
||||||
activeDomains = new BitSet(domainIdToIdx.size());
|
activeDomains = new RoaringBitmap();
|
||||||
indexedDomains = new BitSet(domainIdToIdx.size());
|
indexedDomains = new RoaringBitmap();
|
||||||
relatedDomains = new TIntDoubleHashMap[domainIdToIdx.size()];
|
relatedDomains = new Int2DoubleArrayMap[domainIdToIdx.size()];
|
||||||
|
|
||||||
logger.info("Loaded {} domain IDs", domainIdToIdx.size());
|
logger.info("Loaded {} domain IDs", domainIdToIdx.size());
|
||||||
|
|
||||||
@ -94,13 +96,17 @@ public class SimilarDomainsService {
|
|||||||
int higherIndex = Math.max(didx, nidx);
|
int higherIndex = Math.max(didx, nidx);
|
||||||
|
|
||||||
if (relatedDomains[lowerIndex] == null)
|
if (relatedDomains[lowerIndex] == null)
|
||||||
relatedDomains[lowerIndex] = new TIntDoubleHashMap(32);
|
relatedDomains[lowerIndex] = new Int2DoubleArrayMap(4);
|
||||||
relatedDomains[lowerIndex].put(higherIndex, Math.round(100 * rs.getDouble(3)));
|
|
||||||
|
double rank = Math.round(100 * rs.getDouble(3));
|
||||||
|
if (rank > 0.1) {
|
||||||
|
relatedDomains[lowerIndex].put(higherIndex, rank);
|
||||||
|
}
|
||||||
|
|
||||||
if (domainNeighbors[didx] == null)
|
if (domainNeighbors[didx] == null)
|
||||||
domainNeighbors[didx] = new TIntArrayList(32);
|
domainNeighbors[didx] = new TIntArrayList(4);
|
||||||
if (domainNeighbors[nidx] == null)
|
if (domainNeighbors[nidx] == null)
|
||||||
domainNeighbors[nidx] = new TIntArrayList(32);
|
domainNeighbors[nidx] = new TIntArrayList(4);
|
||||||
|
|
||||||
domainNeighbors[didx].add(nidx);
|
domainNeighbors[didx].add(nidx);
|
||||||
domainNeighbors[nidx].add(didx);
|
domainNeighbors[nidx].add(didx);
|
||||||
@ -122,14 +128,14 @@ public class SimilarDomainsService {
|
|||||||
final int id = rs.getInt("ID");
|
final int id = rs.getInt("ID");
|
||||||
final int idx = domainIdToIdx.get(id);
|
final int idx = domainIdToIdx.get(id);
|
||||||
|
|
||||||
domainRanks[idx] = Math.round(100 * (1. - rs.getDouble("RANK")));
|
domainRanks.put(idx, Math.round(100 * (1. - rs.getDouble("RANK"))));
|
||||||
domainNames[idx] = rs.getString("DOMAIN_NAME");
|
domainNames[idx] = rs.getString("DOMAIN_NAME");
|
||||||
|
|
||||||
if (rs.getBoolean("INDEXED"))
|
if (rs.getBoolean("INDEXED"))
|
||||||
indexedDomains.set(idx);
|
indexedDomains.add(idx);
|
||||||
|
|
||||||
if (rs.getBoolean("ACTIVE"))
|
if (rs.getBoolean("ACTIVE"))
|
||||||
activeDomains.set(idx);
|
activeDomains.add(idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -142,10 +148,10 @@ public class SimilarDomainsService {
|
|||||||
final int id = rs.getInt(1);
|
final int id = rs.getInt(1);
|
||||||
final int idx = domainIdToIdx.get(id);
|
final int idx = domainIdToIdx.get(id);
|
||||||
|
|
||||||
screenshotDomains.set(idx);
|
screenshotDomains.add(idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info("Loaded {} domains", domainRanks.length);
|
logger.info("Loaded {} domains", domainRanks.size());
|
||||||
isReady = true;
|
isReady = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -222,10 +228,10 @@ public class SimilarDomainsService {
|
|||||||
.setDomainId(id)
|
.setDomainId(id)
|
||||||
.setUrl(new EdgeDomain(domainNames[idx]).toRootUrl().toString())
|
.setUrl(new EdgeDomain(domainNames[idx]).toRootUrl().toString())
|
||||||
.setRelatedness(getRelatedness(domainId, id))
|
.setRelatedness(getRelatedness(domainId, id))
|
||||||
.setRank(domainRanks[idx])
|
.setRank(domainRanks.get(idx))
|
||||||
.setIndexed(indexedDomains.get(idx))
|
.setIndexed(indexedDomains.contains(idx))
|
||||||
.setActive(activeDomains.get(idx))
|
.setActive(activeDomains.contains(idx))
|
||||||
.setScreenshot(screenshotDomains.get(idx))
|
.setScreenshot(screenshotDomains.contains(idx))
|
||||||
.setLinkType(RpcSimilarDomain.LINK_TYPE.valueOf(linkType.name()))
|
.setLinkType(RpcSimilarDomain.LINK_TYPE.valueOf(linkType.name()))
|
||||||
.build());
|
.build());
|
||||||
|
|
||||||
@ -291,7 +297,7 @@ public class SimilarDomainsService {
|
|||||||
|
|
||||||
double[] ranksArray = new double[idsArray.length];
|
double[] ranksArray = new double[idsArray.length];
|
||||||
for (int i = 0; i < idxArray.length; i++) {
|
for (int i = 0; i < idxArray.length; i++) {
|
||||||
ranksArray[i] = this.domainRanks[idxArray[i]];
|
ranksArray[i] = this.domainRanks.get(idxArray[i]);
|
||||||
}
|
}
|
||||||
double[] relatednessArray = new double[idsArray.length];
|
double[] relatednessArray = new double[idsArray.length];
|
||||||
for (int i = 0; i < idsArray.length; i++) {
|
for (int i = 0; i < idsArray.length; i++) {
|
||||||
@ -337,10 +343,10 @@ public class SimilarDomainsService {
|
|||||||
.setDomainId(id)
|
.setDomainId(id)
|
||||||
.setUrl(new EdgeDomain(domainNames[idx]).toRootUrl().toString())
|
.setUrl(new EdgeDomain(domainNames[idx]).toRootUrl().toString())
|
||||||
.setRelatedness(getRelatedness(domainId, id))
|
.setRelatedness(getRelatedness(domainId, id))
|
||||||
.setRank(domainRanks[idx])
|
.setRank(ranksArray[id])
|
||||||
.setIndexed(indexedDomains.get(idx))
|
.setIndexed(indexedDomains.contains(idx))
|
||||||
.setActive(activeDomains.get(idx))
|
.setActive(activeDomains.contains(idx))
|
||||||
.setScreenshot(screenshotDomains.get(idx))
|
.setScreenshot(screenshotDomains.contains(idx))
|
||||||
.setLinkType(RpcSimilarDomain.LINK_TYPE.valueOf(linkType.name()))
|
.setLinkType(RpcSimilarDomain.LINK_TYPE.valueOf(linkType.name()))
|
||||||
.build());
|
.build());
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user