mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
Add parameters to the ranking and search set configurations.
This commit is contained in:
parent
b92d18521d
commit
d6b02f6669
@ -2,9 +2,8 @@ package nu.marginalia.wmsa.edge.index;
|
||||
|
||||
import com.google.inject.AbstractModule;
|
||||
import com.google.inject.Provides;
|
||||
import com.google.inject.name.Names;
|
||||
import nu.marginalia.wmsa.configuration.WmsaHome;
|
||||
import nu.marginalia.wmsa.edge.index.model.RankingSettings;
|
||||
import nu.marginalia.wmsa.edge.index.config.RankingSettings;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.index.model;
|
||||
package nu.marginalia.wmsa.edge.index.config;
|
||||
|
||||
import lombok.ToString;
|
||||
import org.yaml.snakeyaml.Yaml;
|
||||
@ -10,10 +10,11 @@ import java.util.List;
|
||||
|
||||
@ToString
|
||||
public class RankingSettings {
|
||||
public List<String> small;
|
||||
public List<String> retro;
|
||||
public List<String> standard;
|
||||
public List<String> academia;
|
||||
public RankingSettingsEntry small;
|
||||
public RankingSettingsEntry retro;
|
||||
public RankingSettingsEntry standard;
|
||||
public RankingSettingsEntry academia;
|
||||
public RankingSettingsEntry ranking;
|
||||
|
||||
public static RankingSettings from(Path dir) {
|
||||
try {
|
@ -0,0 +1,8 @@
|
||||
package nu.marginalia.wmsa.edge.index.config;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public class RankingSettingsEntry {
|
||||
public List<String> domains;
|
||||
public int max;
|
||||
}
|
@ -9,7 +9,7 @@ import nu.marginalia.wmsa.edge.index.ranking.accumulator.RankingResultHashMapAcc
|
||||
import nu.marginalia.wmsa.edge.index.ranking.data.RankingDomainFetcher;
|
||||
import nu.marginalia.wmsa.edge.index.ranking.accumulator.RankingResultBitSetAccumulator;
|
||||
import nu.marginalia.wmsa.edge.index.IndexServicesFactory;
|
||||
import nu.marginalia.wmsa.edge.index.model.RankingSettings;
|
||||
import nu.marginalia.wmsa.edge.index.config.RankingSettings;
|
||||
import nu.marginalia.wmsa.edge.index.postings.DomainRankings;
|
||||
import nu.marginalia.wmsa.edge.index.ranking.data.RankingDomainFetcherForSimilarityData;
|
||||
import nu.marginalia.wmsa.edge.index.svc.searchset.RankingSearchSet;
|
||||
@ -86,9 +86,11 @@ public class EdgeIndexSearchSetsService {
|
||||
}
|
||||
|
||||
private void updateDomainRankings() {
|
||||
var spr = new StandardPageRank(similarityDomains, rankingSettings.retro.toArray(String[]::new));
|
||||
var entry = rankingSettings.academia;
|
||||
|
||||
var spr = new StandardPageRank(similarityDomains, entry.domains.toArray(String[]::new));
|
||||
var ranks = spr.pageRankWithPeripheralNodes(entry.max, () -> new RankingResultHashMapAccumulator(100_000));
|
||||
|
||||
var ranks = spr.pageRankWithPeripheralNodes(Math.min(100_000, spr.size() / 2), () -> new RankingResultHashMapAccumulator(100_000));
|
||||
synchronized (this) {
|
||||
domainRankings = new DomainRankings(ranks);
|
||||
}
|
||||
@ -96,8 +98,10 @@ public class EdgeIndexSearchSetsService {
|
||||
|
||||
@SneakyThrows
|
||||
public void updateRetroDomainsSet() {
|
||||
var spr = new StandardPageRank(similarityDomains, rankingSettings.retro.toArray(String[]::new));
|
||||
var data = spr.pageRankWithPeripheralNodes(Math.min(50_000, spr.size()), RankingResultBitSetAccumulator::new);
|
||||
var entry = rankingSettings.retro;
|
||||
|
||||
var spr = new StandardPageRank(similarityDomains, entry.domains.toArray(String[]::new));
|
||||
var data = spr.pageRankWithPeripheralNodes(entry.max, RankingResultBitSetAccumulator::new);
|
||||
|
||||
synchronized (this) {
|
||||
retroSet = new RankingSearchSet(SearchSetIdentifier.RETRO, retroSet.source, data);
|
||||
@ -107,9 +111,11 @@ public class EdgeIndexSearchSetsService {
|
||||
|
||||
@SneakyThrows
|
||||
public void updateSmallWebDomainsSet() {
|
||||
var rpr = new ReversePageRank(similarityDomains, rankingSettings.small.toArray(String[]::new));
|
||||
var entry = rankingSettings.small;
|
||||
|
||||
var rpr = new ReversePageRank(similarityDomains, entry.domains.toArray(String[]::new));
|
||||
rpr.setMaxKnownUrls(750);
|
||||
var data = rpr.pageRankWithPeripheralNodes(Math.min(10_000, rpr.size()), RankingResultBitSetAccumulator::new);
|
||||
var data = rpr.pageRankWithPeripheralNodes(entry.max, RankingResultBitSetAccumulator::new);
|
||||
|
||||
synchronized (this) {
|
||||
smallWebSet = new RankingSearchSet(SearchSetIdentifier.SMALLWEB, smallWebSet.source, data);
|
||||
@ -119,8 +125,10 @@ public class EdgeIndexSearchSetsService {
|
||||
|
||||
@SneakyThrows
|
||||
public void updateAcademiaDomainsSet() {
|
||||
var spr = new StandardPageRank(similarityDomains, rankingSettings.academia.toArray(String[]::new));
|
||||
var data = spr.pageRankWithPeripheralNodes(Math.min(15_000, spr.size()/2), RankingResultBitSetAccumulator::new);
|
||||
var entry = rankingSettings.academia;
|
||||
|
||||
var spr = new StandardPageRank(similarityDomains, entry.domains.toArray(String[]::new));
|
||||
var data = spr.pageRankWithPeripheralNodes(entry.max, RankingResultBitSetAccumulator::new);
|
||||
|
||||
synchronized (this) {
|
||||
academiaSet = new RankingSearchSet(SearchSetIdentifier.ACADEMIA, academiaSet.source, data);
|
||||
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.wmsa.edge.index.model;
|
||||
|
||||
import nu.marginalia.wmsa.edge.index.config.RankingSettings;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
@ -28,22 +29,33 @@ class RankingSettingsTest {
|
||||
void testParseRankingSettings() throws IOException {
|
||||
Files.writeString(tempFile, """
|
||||
retro:
|
||||
max: 50
|
||||
domains:
|
||||
- "www.rep.routledge.com"
|
||||
- "www.personal.kent.edu"
|
||||
small:
|
||||
max: 10
|
||||
domains:
|
||||
- "bikobatanari.art"
|
||||
- "wiki.xxiivv.com"
|
||||
academia:
|
||||
max: 101
|
||||
domains:
|
||||
- "%edu"
|
||||
standard:
|
||||
max: 23
|
||||
domains:
|
||||
- "memex.marginalia.nu"
|
||||
""");
|
||||
|
||||
var settings = RankingSettings.from(tempFile);
|
||||
assertEquals(List.of("www.rep.routledge.com","www.personal.kent.edu"), settings.retro);
|
||||
assertEquals(List.of("bikobatanari.art","wiki.xxiivv.com"), settings.small);
|
||||
assertEquals(List.of("%edu"), settings.academia);
|
||||
assertEquals(List.of("memex.marginalia.nu"), settings.standard);
|
||||
assertEquals(List.of("www.rep.routledge.com","www.personal.kent.edu"), settings.retro.domains);
|
||||
assertEquals(50, settings.retro.max);
|
||||
assertEquals(List.of("bikobatanari.art","wiki.xxiivv.com"), settings.small.domains);
|
||||
assertEquals(10, settings.small.max);
|
||||
assertEquals(List.of("bikobatanari.art","wiki.xxiivv.com"), settings.small.domains);
|
||||
assertEquals(List.of("%edu"), settings.academia.domains);
|
||||
assertEquals(List.of("memex.marginalia.nu"), settings.standard.domains);
|
||||
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user