diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexModule.java index 361a7d47..99a1e3f4 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexModule.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexModule.java @@ -2,9 +2,8 @@ package nu.marginalia.wmsa.edge.index; import com.google.inject.AbstractModule; import com.google.inject.Provides; -import com.google.inject.name.Names; import nu.marginalia.wmsa.configuration.WmsaHome; -import nu.marginalia.wmsa.edge.index.model.RankingSettings; +import nu.marginalia.wmsa.edge.index.config.RankingSettings; import java.nio.file.Path; diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/RankingSettings.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/config/RankingSettings.java similarity index 66% rename from marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/RankingSettings.java rename to marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/config/RankingSettings.java index 0dd64cbd..1c71e544 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/model/RankingSettings.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/config/RankingSettings.java @@ -1,4 +1,4 @@ -package nu.marginalia.wmsa.edge.index.model; +package nu.marginalia.wmsa.edge.index.config; import lombok.ToString; import org.yaml.snakeyaml.Yaml; @@ -10,10 +10,11 @@ import java.util.List; @ToString public class RankingSettings { - public List small; - public List retro; - public List standard; - public List academia; + public RankingSettingsEntry small; + public RankingSettingsEntry retro; + public RankingSettingsEntry standard; + public RankingSettingsEntry academia; + public RankingSettingsEntry ranking; public static RankingSettings from(Path dir) { try { diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/config/RankingSettingsEntry.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/config/RankingSettingsEntry.java new file mode 100644 index 00000000..f6aa501f --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/config/RankingSettingsEntry.java @@ -0,0 +1,8 @@ +package nu.marginalia.wmsa.edge.index.config; + +import java.util.List; + +public class RankingSettingsEntry { + public List domains; + public int max; +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/svc/EdgeIndexSearchSetsService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/svc/EdgeIndexSearchSetsService.java index fae6b171..4c42c80c 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/svc/EdgeIndexSearchSetsService.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/svc/EdgeIndexSearchSetsService.java @@ -9,7 +9,7 @@ import nu.marginalia.wmsa.edge.index.ranking.accumulator.RankingResultHashMapAcc import nu.marginalia.wmsa.edge.index.ranking.data.RankingDomainFetcher; import nu.marginalia.wmsa.edge.index.ranking.accumulator.RankingResultBitSetAccumulator; import nu.marginalia.wmsa.edge.index.IndexServicesFactory; -import nu.marginalia.wmsa.edge.index.model.RankingSettings; +import nu.marginalia.wmsa.edge.index.config.RankingSettings; import nu.marginalia.wmsa.edge.index.postings.DomainRankings; import nu.marginalia.wmsa.edge.index.ranking.data.RankingDomainFetcherForSimilarityData; import nu.marginalia.wmsa.edge.index.svc.searchset.RankingSearchSet; @@ -86,9 +86,11 @@ public class EdgeIndexSearchSetsService { } private void updateDomainRankings() { - var spr = new StandardPageRank(similarityDomains, rankingSettings.retro.toArray(String[]::new)); + var entry = rankingSettings.academia; + + var spr = new StandardPageRank(similarityDomains, entry.domains.toArray(String[]::new)); + var ranks = spr.pageRankWithPeripheralNodes(entry.max, () -> new RankingResultHashMapAccumulator(100_000)); - var ranks = spr.pageRankWithPeripheralNodes(Math.min(100_000, spr.size() / 2), () -> new RankingResultHashMapAccumulator(100_000)); synchronized (this) { domainRankings = new DomainRankings(ranks); } @@ -96,8 +98,10 @@ public class EdgeIndexSearchSetsService { @SneakyThrows public void updateRetroDomainsSet() { - var spr = new StandardPageRank(similarityDomains, rankingSettings.retro.toArray(String[]::new)); - var data = spr.pageRankWithPeripheralNodes(Math.min(50_000, spr.size()), RankingResultBitSetAccumulator::new); + var entry = rankingSettings.retro; + + var spr = new StandardPageRank(similarityDomains, entry.domains.toArray(String[]::new)); + var data = spr.pageRankWithPeripheralNodes(entry.max, RankingResultBitSetAccumulator::new); synchronized (this) { retroSet = new RankingSearchSet(SearchSetIdentifier.RETRO, retroSet.source, data); @@ -107,9 +111,11 @@ public class EdgeIndexSearchSetsService { @SneakyThrows public void updateSmallWebDomainsSet() { - var rpr = new ReversePageRank(similarityDomains, rankingSettings.small.toArray(String[]::new)); + var entry = rankingSettings.small; + + var rpr = new ReversePageRank(similarityDomains, entry.domains.toArray(String[]::new)); rpr.setMaxKnownUrls(750); - var data = rpr.pageRankWithPeripheralNodes(Math.min(10_000, rpr.size()), RankingResultBitSetAccumulator::new); + var data = rpr.pageRankWithPeripheralNodes(entry.max, RankingResultBitSetAccumulator::new); synchronized (this) { smallWebSet = new RankingSearchSet(SearchSetIdentifier.SMALLWEB, smallWebSet.source, data); @@ -119,8 +125,10 @@ public class EdgeIndexSearchSetsService { @SneakyThrows public void updateAcademiaDomainsSet() { - var spr = new StandardPageRank(similarityDomains, rankingSettings.academia.toArray(String[]::new)); - var data = spr.pageRankWithPeripheralNodes(Math.min(15_000, spr.size()/2), RankingResultBitSetAccumulator::new); + var entry = rankingSettings.academia; + + var spr = new StandardPageRank(similarityDomains, entry.domains.toArray(String[]::new)); + var data = spr.pageRankWithPeripheralNodes(entry.max, RankingResultBitSetAccumulator::new); synchronized (this) { academiaSet = new RankingSearchSet(SearchSetIdentifier.ACADEMIA, academiaSet.source, data); diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/model/RankingSettingsTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/model/RankingSettingsTest.java index eac47334..a8a0c17f 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/model/RankingSettingsTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/index/model/RankingSettingsTest.java @@ -1,5 +1,6 @@ package nu.marginalia.wmsa.edge.index.model; +import nu.marginalia.wmsa.edge.index.config.RankingSettings; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -28,22 +29,33 @@ class RankingSettingsTest { void testParseRankingSettings() throws IOException { Files.writeString(tempFile, """ retro: - - "www.rep.routledge.com" - - "www.personal.kent.edu" + max: 50 + domains: + - "www.rep.routledge.com" + - "www.personal.kent.edu" small: - - "bikobatanari.art" - - "wiki.xxiivv.com" + max: 10 + domains: + - "bikobatanari.art" + - "wiki.xxiivv.com" academia: - - "%edu" + max: 101 + domains: + - "%edu" standard: - - "memex.marginalia.nu" + max: 23 + domains: + - "memex.marginalia.nu" """); var settings = RankingSettings.from(tempFile); - assertEquals(List.of("www.rep.routledge.com","www.personal.kent.edu"), settings.retro); - assertEquals(List.of("bikobatanari.art","wiki.xxiivv.com"), settings.small); - assertEquals(List.of("%edu"), settings.academia); - assertEquals(List.of("memex.marginalia.nu"), settings.standard); + assertEquals(List.of("www.rep.routledge.com","www.personal.kent.edu"), settings.retro.domains); + assertEquals(50, settings.retro.max); + assertEquals(List.of("bikobatanari.art","wiki.xxiivv.com"), settings.small.domains); + assertEquals(10, settings.small.max); + assertEquals(List.of("bikobatanari.art","wiki.xxiivv.com"), settings.small.domains); + assertEquals(List.of("%edu"), settings.academia.domains); + assertEquals(List.of("memex.marginalia.nu"), settings.standard.domains); } } \ No newline at end of file