mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(search) Integrate 'similar' tab in site info.
This commit is contained in:
parent
97d43a6fa2
commit
902f235b5b
@ -1,57 +1,34 @@
|
|||||||
package nu.marginalia.search.command.commands;
|
package nu.marginalia.search.command.commands;
|
||||||
|
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import nu.marginalia.browse.DbBrowseDomainsRandom;
|
|
||||||
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
|
|
||||||
import nu.marginalia.browse.DbBrowseDomainsSimilarOldAlgo;
|
|
||||||
import nu.marginalia.browse.model.BrowseResult;
|
|
||||||
import nu.marginalia.browse.model.BrowseResultSet;
|
import nu.marginalia.browse.model.BrowseResultSet;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
|
||||||
import nu.marginalia.db.DomainBlacklist;
|
|
||||||
import nu.marginalia.search.command.SearchCommandInterface;
|
|
||||||
import nu.marginalia.search.command.SearchParameters;
|
|
||||||
import nu.marginalia.search.results.BrowseResultCleaner;
|
|
||||||
import nu.marginalia.client.Context;
|
import nu.marginalia.client.Context;
|
||||||
import nu.marginalia.renderer.MustacheRenderer;
|
import nu.marginalia.renderer.MustacheRenderer;
|
||||||
import nu.marginalia.renderer.RendererFactory;
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
|
import nu.marginalia.search.command.SearchCommandInterface;
|
||||||
|
import nu.marginalia.search.command.SearchParameters;
|
||||||
|
import nu.marginalia.search.svc.SearchBrowseService;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
import spark.Response;
|
import spark.Response;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.Map;
|
||||||
import java.util.function.Predicate;
|
import java.util.function.Predicate;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
import static java.util.Collections.shuffle;
|
|
||||||
|
|
||||||
public class BrowseCommand implements SearchCommandInterface {
|
public class BrowseCommand implements SearchCommandInterface {
|
||||||
private final DbBrowseDomainsRandom randomDomains;
|
private final SearchBrowseService browseService;
|
||||||
private final DbBrowseDomainsSimilarCosine similarDomains;
|
|
||||||
private final DbBrowseDomainsSimilarOldAlgo similarDomainsOld;
|
|
||||||
private final DbDomainQueries domainQueries;
|
|
||||||
private final DomainBlacklist blacklist;
|
|
||||||
private final MustacheRenderer<BrowseResultSet> browseResultsRenderer;
|
private final MustacheRenderer<BrowseResultSet> browseResultsRenderer;
|
||||||
private final BrowseResultCleaner browseResultCleaner;
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
private final Predicate<String> queryPatternPredicate = Pattern.compile("^browse:[.A-Za-z\\-0-9:]+$").asPredicate();
|
private final Predicate<String> queryPatternPredicate = Pattern.compile("^browse:[.A-Za-z\\-0-9:]+$").asPredicate();
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public BrowseCommand(DbBrowseDomainsRandom randomDomains,
|
public BrowseCommand(SearchBrowseService browseService,
|
||||||
DbBrowseDomainsSimilarCosine similarDomains,
|
RendererFactory rendererFactory)
|
||||||
DbBrowseDomainsSimilarOldAlgo similarDomainsOld, DbDomainQueries domainQueries,
|
|
||||||
DomainBlacklist blacklist,
|
|
||||||
RendererFactory rendererFactory,
|
|
||||||
BrowseResultCleaner browseResultCleaner)
|
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
this.randomDomains = randomDomains;
|
this.browseService = browseService;
|
||||||
this.similarDomains = similarDomains;
|
|
||||||
this.similarDomainsOld = similarDomainsOld;
|
|
||||||
this.domainQueries = domainQueries;
|
|
||||||
this.blacklist = blacklist;
|
|
||||||
this.browseResultCleaner = browseResultCleaner;
|
|
||||||
|
|
||||||
browseResultsRenderer = rendererFactory.renderer("search/browse-results");
|
browseResultsRenderer = rendererFactory.renderer("search/browse-results");
|
||||||
}
|
}
|
||||||
@ -82,14 +59,14 @@ public class BrowseCommand implements SearchCommandInterface {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
if ("random".equals(word)) {
|
if ("random".equals(word)) {
|
||||||
return getRandomEntries(0);
|
return browseService.getRandomEntries(0);
|
||||||
}
|
}
|
||||||
if (word.startsWith("random:")) {
|
if (word.startsWith("random:")) {
|
||||||
int set = Integer.parseInt(word.split(":")[1]);
|
int set = Integer.parseInt(word.split(":")[1]);
|
||||||
return getRandomEntries(set);
|
return browseService.getRandomEntries(set);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return getRelatedEntries(word);
|
return browseService.getRelatedEntries(word);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (Exception ex) {
|
catch (Exception ex) {
|
||||||
@ -98,34 +75,5 @@ public class BrowseCommand implements SearchCommandInterface {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private BrowseResultSet getRandomEntries(int set) {
|
|
||||||
List<BrowseResult> results = randomDomains.getRandomDomains(25, blacklist, set);
|
|
||||||
|
|
||||||
results.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
|
|
||||||
|
|
||||||
return new BrowseResultSet(results);
|
|
||||||
}
|
|
||||||
|
|
||||||
private BrowseResultSet getRelatedEntries(String word) {
|
|
||||||
var domain = domainQueries.getDomainId(new EdgeDomain(word));
|
|
||||||
|
|
||||||
var neighbors = similarDomains.getDomainNeighborsAdjacentCosine(domain, blacklist, 256);
|
|
||||||
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
|
|
||||||
|
|
||||||
// If the results are very few, supplement with the alternative shitty algorithm
|
|
||||||
if (neighbors.size() < 25) {
|
|
||||||
Set<BrowseResult> allNeighbors = new HashSet<>(neighbors);
|
|
||||||
allNeighbors.addAll(similarDomainsOld.getDomainNeighborsAdjacent(domain, blacklist, 50));
|
|
||||||
|
|
||||||
neighbors.clear();
|
|
||||||
neighbors.addAll(allNeighbors);
|
|
||||||
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
|
|
||||||
}
|
|
||||||
|
|
||||||
// shuffle the items for a less repetitive experience
|
|
||||||
shuffle(neighbors);
|
|
||||||
|
|
||||||
return new BrowseResultSet(neighbors, word);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,7 @@ public class SiteRedirectCommand implements SearchCommandInterface {
|
|||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
private final Predicate<String> queryPatternPredicate = Pattern.compile("^site:[.A-Za-z\\-0-9]+$").asPredicate();
|
private final Predicate<String> queryPatternPredicate = Pattern.compile("^(site|links|similar):[.A-Za-z\\-0-9]+$").asPredicate();
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public SiteRedirectCommand() {
|
public SiteRedirectCommand() {
|
||||||
@ -30,18 +30,24 @@ public class SiteRedirectCommand implements SearchCommandInterface {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
String definePrefix = "site:";
|
int idx = parameters.query().indexOf(':');
|
||||||
String domain = parameters.query().substring(definePrefix.length()).toLowerCase();
|
String prefix = parameters.query().substring(0, idx);
|
||||||
|
String domain = parameters.query().substring(idx + 1).toLowerCase();
|
||||||
|
|
||||||
// Use an HTML redirect here, so we can use relative URLs
|
// Use an HTML redirect here, so we can use relative URLs
|
||||||
|
String view = switch (prefix) {
|
||||||
|
case "links" -> "links";
|
||||||
|
case "similar" -> "similar";
|
||||||
|
default -> "info";
|
||||||
|
};
|
||||||
|
|
||||||
response.raw().getOutputStream().println("""
|
response.raw().getOutputStream().println("""
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<title>Redirecting...</title>
|
<title>Redirecting...</title>
|
||||||
<meta http-equiv="refresh" content="0; url=/site/%s">
|
<meta http-equiv="refresh" content="0; url=/site/%s?view=%s">
|
||||||
""".formatted(domain));
|
""".formatted(domain, view));
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,73 @@
|
|||||||
|
package nu.marginalia.search.svc;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.browse.DbBrowseDomainsRandom;
|
||||||
|
import nu.marginalia.browse.DbBrowseDomainsSimilarCosine;
|
||||||
|
import nu.marginalia.browse.DbBrowseDomainsSimilarOldAlgo;
|
||||||
|
import nu.marginalia.browse.model.BrowseResult;
|
||||||
|
import nu.marginalia.browse.model.BrowseResultSet;
|
||||||
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
|
import nu.marginalia.db.DomainBlacklist;
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.search.results.BrowseResultCleaner;
|
||||||
|
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import static java.util.Collections.shuffle;
|
||||||
|
|
||||||
|
public class SearchBrowseService {
|
||||||
|
private final DbBrowseDomainsRandom randomDomains;
|
||||||
|
private final DbBrowseDomainsSimilarCosine similarDomains;
|
||||||
|
private final DbBrowseDomainsSimilarOldAlgo similarDomainsOld;
|
||||||
|
private final DbDomainQueries domainQueries;
|
||||||
|
private final DomainBlacklist blacklist;
|
||||||
|
private final BrowseResultCleaner browseResultCleaner;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public SearchBrowseService(DbBrowseDomainsRandom randomDomains,
|
||||||
|
DbBrowseDomainsSimilarCosine similarDomains,
|
||||||
|
DbBrowseDomainsSimilarOldAlgo similarDomainsOld,
|
||||||
|
DbDomainQueries domainQueries,
|
||||||
|
DomainBlacklist blacklist,
|
||||||
|
BrowseResultCleaner browseResultCleaner)
|
||||||
|
{
|
||||||
|
this.randomDomains = randomDomains;
|
||||||
|
this.similarDomains = similarDomains;
|
||||||
|
this.similarDomainsOld = similarDomainsOld;
|
||||||
|
this.domainQueries = domainQueries;
|
||||||
|
this.blacklist = blacklist;
|
||||||
|
this.browseResultCleaner = browseResultCleaner;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BrowseResultSet getRandomEntries(int set) {
|
||||||
|
List<BrowseResult> results = randomDomains.getRandomDomains(25, blacklist, set);
|
||||||
|
|
||||||
|
results.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
|
||||||
|
|
||||||
|
return new BrowseResultSet(results);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BrowseResultSet getRelatedEntries(String word) {
|
||||||
|
var domain = domainQueries.getDomainId(new EdgeDomain(word));
|
||||||
|
|
||||||
|
var neighbors = similarDomains.getDomainNeighborsAdjacentCosine(domain, blacklist, 256);
|
||||||
|
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
|
||||||
|
|
||||||
|
// If the results are very few, supplement with the alternative shitty algorithm
|
||||||
|
if (neighbors.size() < 25) {
|
||||||
|
Set<BrowseResult> allNeighbors = new HashSet<>(neighbors);
|
||||||
|
allNeighbors.addAll(similarDomainsOld.getDomainNeighborsAdjacent(domain, blacklist, 50));
|
||||||
|
|
||||||
|
neighbors.clear();
|
||||||
|
neighbors.addAll(allNeighbors);
|
||||||
|
neighbors.removeIf(browseResultCleaner.shouldRemoveResultPredicate());
|
||||||
|
}
|
||||||
|
|
||||||
|
// shuffle the items for a less repetitive experience
|
||||||
|
shuffle(neighbors);
|
||||||
|
|
||||||
|
return new BrowseResultSet(neighbors, word);
|
||||||
|
}
|
||||||
|
}
|
@ -1,5 +1,7 @@
|
|||||||
package nu.marginalia.search.svc;
|
package nu.marginalia.search.svc;
|
||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.browse.model.BrowseResult;
|
||||||
|
import nu.marginalia.browse.model.BrowseResultSet;
|
||||||
import nu.marginalia.client.Context;
|
import nu.marginalia.client.Context;
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
@ -15,6 +17,7 @@ import spark.*;
|
|||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.OptionalInt;
|
import java.util.OptionalInt;
|
||||||
@ -25,6 +28,7 @@ public class SearchSiteInfoService {
|
|||||||
private final DomainInformationService domainInformationService;
|
private final DomainInformationService domainInformationService;
|
||||||
private final SearchFlagSiteService flagSiteService;
|
private final SearchFlagSiteService flagSiteService;
|
||||||
private final DbDomainQueries domainQueries;
|
private final DbDomainQueries domainQueries;
|
||||||
|
private final SearchBrowseService browseService;
|
||||||
private final MustacheRenderer<Object> renderer;
|
private final MustacheRenderer<Object> renderer;
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
@ -32,13 +36,15 @@ public class SearchSiteInfoService {
|
|||||||
DomainInformationService domainInformationService,
|
DomainInformationService domainInformationService,
|
||||||
RendererFactory rendererFactory,
|
RendererFactory rendererFactory,
|
||||||
SearchFlagSiteService flagSiteService,
|
SearchFlagSiteService flagSiteService,
|
||||||
DbDomainQueries domainQueries) throws IOException {
|
DbDomainQueries domainQueries, SearchBrowseService browseService) throws IOException {
|
||||||
this.searchOperator = searchOperator;
|
this.searchOperator = searchOperator;
|
||||||
this.domainInformationService = domainInformationService;
|
this.domainInformationService = domainInformationService;
|
||||||
this.flagSiteService = flagSiteService;
|
this.flagSiteService = flagSiteService;
|
||||||
this.domainQueries = domainQueries;
|
this.domainQueries = domainQueries;
|
||||||
|
|
||||||
this.renderer = rendererFactory.renderer("search/site-info/site-info");
|
this.renderer = rendererFactory.renderer("search/site-info/site-info");
|
||||||
|
this.browseService = browseService;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public Object handle(Request request, Response response) throws SQLException {
|
public Object handle(Request request, Response response) throws SQLException {
|
||||||
@ -55,6 +61,7 @@ public class SearchSiteInfoService {
|
|||||||
case "links" -> listLinks(ctx, domainName);
|
case "links" -> listLinks(ctx, domainName);
|
||||||
case "docs" -> listDocs(ctx, domainName);
|
case "docs" -> listDocs(ctx, domainName);
|
||||||
case "info" -> siteInfo(ctx, domainName);
|
case "info" -> siteInfo(ctx, domainName);
|
||||||
|
case "similar" -> listSimilar(ctx, domainName);
|
||||||
case "report" -> reportSite(ctx, domainName);
|
case "report" -> reportSite(ctx, domainName);
|
||||||
default -> siteInfo(ctx, domainName);
|
default -> siteInfo(ctx, domainName);
|
||||||
};
|
};
|
||||||
@ -129,7 +136,12 @@ public class SearchSiteInfoService {
|
|||||||
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
|
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
|
||||||
searchOperator.doBacklinkSearch(ctx, domainName));
|
searchOperator.doBacklinkSearch(ctx, domainName));
|
||||||
}
|
}
|
||||||
|
private SimilarSites listSimilar(Context ctx, String domainName) {
|
||||||
|
|
||||||
|
return new SimilarSites(domainName,
|
||||||
|
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
|
||||||
|
browseService.getRelatedEntries(domainName));
|
||||||
|
}
|
||||||
private Docs listDocs(Context ctx, String domainName) {
|
private Docs listDocs(Context ctx, String domainName) {
|
||||||
return new Docs(domainName,
|
return new Docs(domainName,
|
||||||
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
|
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
|
||||||
@ -210,6 +222,18 @@ public class SearchSiteInfoService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public record SimilarSites(Map<String, Boolean> view, String domain, long domainId, List<BrowseResult> results) {
|
||||||
|
public SimilarSites(String domain, long domainId, BrowseResultSet results) {
|
||||||
|
this(Map.of("similar", true), domain, domainId, new ArrayList<>(results.results()));
|
||||||
|
}
|
||||||
|
|
||||||
|
public String query() { return "similar:" + domain; }
|
||||||
|
|
||||||
|
public boolean isKnown() {
|
||||||
|
return domainId > 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public record ReportDomain(
|
public record ReportDomain(
|
||||||
Map<String, Boolean> view,
|
Map<String, Boolean> view,
|
||||||
String domain,
|
String domain,
|
||||||
|
@ -42,6 +42,7 @@
|
|||||||
</div>
|
</div>
|
||||||
{{#each results}}{{>search/parts/search-result}}{{/each}}
|
{{#each results}}{{>search/parts/search-result}}{{/each}}
|
||||||
{{/if}}
|
{{/if}}
|
||||||
|
|
||||||
{{#if view.docs}}
|
{{#if view.docs}}
|
||||||
<div class="infobox">
|
<div class="infobox">
|
||||||
Showing documents found in {{domain}}.
|
Showing documents found in {{domain}}.
|
||||||
@ -49,9 +50,18 @@
|
|||||||
|
|
||||||
{{#each results}}{{>search/parts/search-result}}{{/each}}
|
{{#each results}}{{>search/parts/search-result}}{{/each}}
|
||||||
{{/if}}
|
{{/if}}
|
||||||
|
|
||||||
{{#if view.report}}
|
{{#if view.report}}
|
||||||
{{>search/site-info/site-info-report}}
|
{{>search/site-info/site-info-report}}
|
||||||
{{/if}}
|
{{/if}}
|
||||||
|
|
||||||
|
{{#if view.similar}}
|
||||||
|
<div class="infobox">Showing domains similar to {{domain}}</div>
|
||||||
|
<section class="cards">
|
||||||
|
{{#each results}}{{>search/browse-result}}{{/each}}
|
||||||
|
</section>
|
||||||
|
{{/if}}
|
||||||
|
|
||||||
{{>search/parts/search-footer}}
|
{{>search/parts/search-footer}}
|
||||||
</body>
|
</body>
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user