mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(search) Integrate 'similar' tab in site info.
This commit is contained in:
parent
97d43a6fa2
commit
902f235b5b
@ -1,57 +1,34 @@
|
||||
package nu.marginalia.search.command.commands;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.browse.DbBrowseDomainsRandom;
|
||||
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
|
||||
import nu.marginalia.browse.DbBrowseDomainsSimilarOldAlgo;
|
||||
import nu.marginalia.browse.model.BrowseResult;
|
||||
import nu.marginalia.browse.model.BrowseResultSet;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.db.DomainBlacklist;
|
||||
import nu.marginalia.search.command.SearchCommandInterface;
|
||||
import nu.marginalia.search.command.SearchParameters;
|
||||
import nu.marginalia.search.results.BrowseResultCleaner;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.renderer.MustacheRenderer;
|
||||
import nu.marginalia.renderer.RendererFactory;
|
||||
import nu.marginalia.search.command.SearchCommandInterface;
|
||||
import nu.marginalia.search.command.SearchParameters;
|
||||
import nu.marginalia.search.svc.SearchBrowseService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import spark.Response;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.Map;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static java.util.Collections.shuffle;
|
||||
|
||||
public class BrowseCommand implements SearchCommandInterface {
|
||||
private final DbBrowseDomainsRandom randomDomains;
|
||||
private final DbBrowseDomainsSimilarCosine similarDomains;
|
||||
private final DbBrowseDomainsSimilarOldAlgo similarDomainsOld;
|
||||
private final DbDomainQueries domainQueries;
|
||||
private final DomainBlacklist blacklist;
|
||||
private final SearchBrowseService browseService;
|
||||
private final MustacheRenderer<BrowseResultSet> browseResultsRenderer;
|
||||
private final BrowseResultCleaner browseResultCleaner;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private final Predicate<String> queryPatternPredicate = Pattern.compile("^browse:[.A-Za-z\\-0-9:]+$").asPredicate();
|
||||
|
||||
@Inject
|
||||
public BrowseCommand(DbBrowseDomainsRandom randomDomains,
|
||||
DbBrowseDomainsSimilarCosine similarDomains,
|
||||
DbBrowseDomainsSimilarOldAlgo similarDomainsOld, DbDomainQueries domainQueries,
|
||||
DomainBlacklist blacklist,
|
||||
RendererFactory rendererFactory,
|
||||
BrowseResultCleaner browseResultCleaner)
|
||||
public BrowseCommand(SearchBrowseService browseService,
|
||||
RendererFactory rendererFactory)
|
||||
throws IOException
|
||||
{
|
||||
this.randomDomains = randomDomains;
|
||||
this.similarDomains = similarDomains;
|
||||
this.similarDomainsOld = similarDomainsOld;
|
||||
this.domainQueries = domainQueries;
|
||||
this.blacklist = blacklist;
|
||||
this.browseResultCleaner = browseResultCleaner;
|
||||
this.browseService = browseService;
|
||||
|
||||
browseResultsRenderer = rendererFactory.renderer("search/browse-results");
|
||||
}
|
||||
@ -82,14 +59,14 @@ public class BrowseCommand implements SearchCommandInterface {
|
||||
|
||||
try {
|
||||
if ("random".equals(word)) {
|
||||
return getRandomEntries(0);
|
||||
return browseService.getRandomEntries(0);
|
||||
}
|
||||
if (word.startsWith("random:")) {
|
||||
int set = Integer.parseInt(word.split(":")[1]);
|
||||
return getRandomEntries(set);
|
||||
return browseService.getRandomEntries(set);
|
||||
}
|
||||
else {
|
||||
return getRelatedEntries(word);
|
||||
return browseService.getRelatedEntries(word);
|
||||
}
|
||||
}
|
||||
catch (Exception ex) {
|
||||
@ -98,34 +75,5 @@ public class BrowseCommand implements SearchCommandInterface {
|
||||
}
|
||||
}
|
||||
|
||||
private BrowseResultSet getRandomEntries(int set) {
|
||||
List<BrowseResult> results = randomDomains.getRandomDomains(25, blacklist, set);
|
||||
|
||||
results.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
|
||||
|
||||
return new BrowseResultSet(results);
|
||||
}
|
||||
|
||||
private BrowseResultSet getRelatedEntries(String word) {
|
||||
var domain = domainQueries.getDomainId(new EdgeDomain(word));
|
||||
|
||||
var neighbors = similarDomains.getDomainNeighborsAdjacentCosine(domain, blacklist, 256);
|
||||
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
|
||||
|
||||
// If the results are very few, supplement with the alternative shitty algorithm
|
||||
if (neighbors.size() < 25) {
|
||||
Set<BrowseResult> allNeighbors = new HashSet<>(neighbors);
|
||||
allNeighbors.addAll(similarDomainsOld.getDomainNeighborsAdjacent(domain, blacklist, 50));
|
||||
|
||||
neighbors.clear();
|
||||
neighbors.addAll(allNeighbors);
|
||||
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
|
||||
}
|
||||
|
||||
// shuffle the items for a less repetitive experience
|
||||
shuffle(neighbors);
|
||||
|
||||
return new BrowseResultSet(neighbors, word);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ public class SiteRedirectCommand implements SearchCommandInterface {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private final Predicate<String> queryPatternPredicate = Pattern.compile("^site:[.A-Za-z\\-0-9]+$").asPredicate();
|
||||
private final Predicate<String> queryPatternPredicate = Pattern.compile("^(site|links|similar):[.A-Za-z\\-0-9]+$").asPredicate();
|
||||
|
||||
@Inject
|
||||
public SiteRedirectCommand() {
|
||||
@ -30,18 +30,24 @@ public class SiteRedirectCommand implements SearchCommandInterface {
|
||||
return false;
|
||||
}
|
||||
|
||||
String definePrefix = "site:";
|
||||
String domain = parameters.query().substring(definePrefix.length()).toLowerCase();
|
||||
int idx = parameters.query().indexOf(':');
|
||||
String prefix = parameters.query().substring(0, idx);
|
||||
String domain = parameters.query().substring(idx + 1).toLowerCase();
|
||||
|
||||
// Use an HTML redirect here, so we can use relative URLs
|
||||
String view = switch (prefix) {
|
||||
case "links" -> "links";
|
||||
case "similar" -> "similar";
|
||||
default -> "info";
|
||||
};
|
||||
|
||||
response.raw().getOutputStream().println("""
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<meta charset="UTF-8">
|
||||
<title>Redirecting...</title>
|
||||
<meta http-equiv="refresh" content="0; url=/site/%s">
|
||||
""".formatted(domain));
|
||||
<meta http-equiv="refresh" content="0; url=/site/%s?view=%s">
|
||||
""".formatted(domain, view));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -0,0 +1,73 @@
|
||||
package nu.marginalia.search.svc;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.browse.DbBrowseDomainsRandom;
|
||||
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
|
||||
import nu.marginalia.browse.DbBrowseDomainsSimilarOldAlgo;
|
||||
import nu.marginalia.browse.model.BrowseResult;
|
||||
import nu.marginalia.browse.model.BrowseResultSet;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.db.DomainBlacklist;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.search.results.BrowseResultCleaner;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import static java.util.Collections.shuffle;
|
||||
|
||||
public class SearchBrowseService {
|
||||
private final DbBrowseDomainsRandom randomDomains;
|
||||
private final DbBrowseDomainsSimilarCosine similarDomains;
|
||||
private final DbBrowseDomainsSimilarOldAlgo similarDomainsOld;
|
||||
private final DbDomainQueries domainQueries;
|
||||
private final DomainBlacklist blacklist;
|
||||
private final BrowseResultCleaner browseResultCleaner;
|
||||
|
||||
@Inject
|
||||
public SearchBrowseService(DbBrowseDomainsRandom randomDomains,
|
||||
DbBrowseDomainsSimilarCosine similarDomains,
|
||||
DbBrowseDomainsSimilarOldAlgo similarDomainsOld,
|
||||
DbDomainQueries domainQueries,
|
||||
DomainBlacklist blacklist,
|
||||
BrowseResultCleaner browseResultCleaner)
|
||||
{
|
||||
this.randomDomains = randomDomains;
|
||||
this.similarDomains = similarDomains;
|
||||
this.similarDomainsOld = similarDomainsOld;
|
||||
this.domainQueries = domainQueries;
|
||||
this.blacklist = blacklist;
|
||||
this.browseResultCleaner = browseResultCleaner;
|
||||
}
|
||||
|
||||
public BrowseResultSet getRandomEntries(int set) {
|
||||
List<BrowseResult> results = randomDomains.getRandomDomains(25, blacklist, set);
|
||||
|
||||
results.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
|
||||
|
||||
return new BrowseResultSet(results);
|
||||
}
|
||||
|
||||
public BrowseResultSet getRelatedEntries(String word) {
|
||||
var domain = domainQueries.getDomainId(new EdgeDomain(word));
|
||||
|
||||
var neighbors = similarDomains.getDomainNeighborsAdjacentCosine(domain, blacklist, 256);
|
||||
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
|
||||
|
||||
// If the results are very few, supplement with the alternative shitty algorithm
|
||||
if (neighbors.size() < 25) {
|
||||
Set<BrowseResult> allNeighbors = new HashSet<>(neighbors);
|
||||
allNeighbors.addAll(similarDomainsOld.getDomainNeighborsAdjacent(domain, blacklist, 50));
|
||||
|
||||
neighbors.clear();
|
||||
neighbors.addAll(allNeighbors);
|
||||
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
|
||||
}
|
||||
|
||||
// shuffle the items for a less repetitive experience
|
||||
shuffle(neighbors);
|
||||
|
||||
return new BrowseResultSet(neighbors, word);
|
||||
}
|
||||
}
|
@ -1,5 +1,7 @@
|
||||
package nu.marginalia.search.svc;
|
||||
import com.google.inject.Inject;
|
||||
import nu.marginalia.browse.model.BrowseResult;
|
||||
import nu.marginalia.browse.model.BrowseResultSet;
|
||||
import nu.marginalia.client.Context;
|
||||
import nu.marginalia.db.DbDomainQueries;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
@ -15,6 +17,7 @@ import spark.*;
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.IOException;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.OptionalInt;
|
||||
@ -25,6 +28,7 @@ public class SearchSiteInfoService {
|
||||
private final DomainInformationService domainInformationService;
|
||||
private final SearchFlagSiteService flagSiteService;
|
||||
private final DbDomainQueries domainQueries;
|
||||
private final SearchBrowseService browseService;
|
||||
private final MustacheRenderer<Object> renderer;
|
||||
|
||||
@Inject
|
||||
@ -32,13 +36,15 @@ public class SearchSiteInfoService {
|
||||
DomainInformationService domainInformationService,
|
||||
RendererFactory rendererFactory,
|
||||
SearchFlagSiteService flagSiteService,
|
||||
DbDomainQueries domainQueries) throws IOException {
|
||||
DbDomainQueries domainQueries, SearchBrowseService browseService) throws IOException {
|
||||
this.searchOperator = searchOperator;
|
||||
this.domainInformationService = domainInformationService;
|
||||
this.flagSiteService = flagSiteService;
|
||||
this.domainQueries = domainQueries;
|
||||
|
||||
this.renderer = rendererFactory.renderer("search/site-info/site-info");
|
||||
this.browseService = browseService;
|
||||
|
||||
}
|
||||
|
||||
public Object handle(Request request, Response response) throws SQLException {
|
||||
@ -55,6 +61,7 @@ public class SearchSiteInfoService {
|
||||
case "links" -> listLinks(ctx, domainName);
|
||||
case "docs" -> listDocs(ctx, domainName);
|
||||
case "info" -> siteInfo(ctx, domainName);
|
||||
case "similar" -> listSimilar(ctx, domainName);
|
||||
case "report" -> reportSite(ctx, domainName);
|
||||
default -> siteInfo(ctx, domainName);
|
||||
};
|
||||
@ -129,7 +136,12 @@ public class SearchSiteInfoService {
|
||||
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
|
||||
searchOperator.doBacklinkSearch(ctx, domainName));
|
||||
}
|
||||
private SimilarSites listSimilar(Context ctx, String domainName) {
|
||||
|
||||
return new SimilarSites(domainName,
|
||||
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
|
||||
browseService.getRelatedEntries(domainName));
|
||||
}
|
||||
private Docs listDocs(Context ctx, String domainName) {
|
||||
return new Docs(domainName,
|
||||
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
|
||||
@ -210,6 +222,18 @@ public class SearchSiteInfoService {
|
||||
}
|
||||
}
|
||||
|
||||
public record SimilarSites(Map<String, Boolean> view, String domain, long domainId, List<BrowseResult> results) {
|
||||
public SimilarSites(String domain, long domainId, BrowseResultSet results) {
|
||||
this(Map.of("similar", true), domain, domainId, new ArrayList<>(results.results()));
|
||||
}
|
||||
|
||||
public String query() { return "similar:" + domain; }
|
||||
|
||||
public boolean isKnown() {
|
||||
return domainId > 0;
|
||||
}
|
||||
}
|
||||
|
||||
public record ReportDomain(
|
||||
Map<String, Boolean> view,
|
||||
String domain,
|
||||
|
@ -42,6 +42,7 @@
|
||||
</div>
|
||||
{{#each results}}{{>search/parts/search-result}}{{/each}}
|
||||
{{/if}}
|
||||
|
||||
{{#if view.docs}}
|
||||
<div class="infobox">
|
||||
Showing documents found in {{domain}}.
|
||||
@ -49,9 +50,18 @@
|
||||
|
||||
{{#each results}}{{>search/parts/search-result}}{{/each}}
|
||||
{{/if}}
|
||||
|
||||
{{#if view.report}}
|
||||
{{>search/site-info/site-info-report}}
|
||||
{{/if}}
|
||||
|
||||
{{#if view.similar}}
|
||||
<div class="infobox">Showing domains similar to {{domain}}</div>
|
||||
<section class="cards">
|
||||
{{#each results}}{{>search/browse-result}}{{/each}}
|
||||
</section>
|
||||
{{/if}}
|
||||
|
||||
{{>search/parts/search-footer}}
|
||||
</body>
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user