From 7c8a60b8cf406214760170e6d3e853aeaae4497f Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Sat, 18 Nov 2023 17:42:35 +0100 Subject: [PATCH] (search) Site info view is mostly done Also optimize the rendering a bit to avoid having to allocate huge string buffers, writing directly to Spark's response instead. --- code/common/renderer/build.gradle | 1 + .../marginalia/renderer/MustacheRenderer.java | 32 +++ .../nu/marginalia/search/SearchOperator.java | 7 + .../search/SearchQueryParamFactory.java | 18 ++ .../nu/marginalia/search/SearchService.java | 7 +- .../search/command/CommandEvaluator.java | 21 +- .../command/SearchCommandInterface.java | 5 +- .../search/command/commands/BangCommand.java | 5 +- .../command/commands/BrowseCommand.java | 17 +- .../command/commands/ConvertCommand.java | 14 +- .../command/commands/DefinitionCommand.java | 12 +- .../command/commands/SearchCommand.java | 8 +- .../command/commands/SiteListCommand.java | 119 --------- .../command/commands/SiteRedirectCommand.java | 49 ++++ .../siteinfo/DomainInformationService.java | 2 +- .../search/svc/SearchFlagSiteService.java | 84 ++----- .../search/svc/SearchQueryService.java | 2 +- .../search/svc/SearchSiteInfoService.java | 236 ++++++++++++++++++ .../main/resources/static/search/serp.scss | 217 ++++++++++++++-- .../templates/search/browse-results.hdb | 2 +- .../templates/search/indict/indict-form.hdb | 80 ------ .../templates/search/parts/search-form.hdb | 2 +- .../search/parts/site-info-index.hdb | 66 ----- .../search/parts/site-info-links.hdb | 18 -- .../templates/search/search-results.hdb | 6 + .../resources/templates/search/site-info.hdb | 37 --- .../site-info/site-info-index-blacklisted.hdb | 8 + .../site-info/site-info-index-indexed.hdb | 16 ++ .../site-info/site-info-index-suggest.hdb | 12 + .../site-info/site-info-index-unknown.hdb | 9 + .../search/site-info/site-info-index.hdb | 25 ++ .../search/site-info/site-info-links.hdb | 9 + .../search/site-info/site-info-report.hdb | 60 +++++ .../search/site-info/site-info-screenshot.hdb | 0 .../templates/search/site-info/site-info.hdb | 58 +++++ .../command/commands/BangCommandTest.java | 38 --- 36 files changed, 818 insertions(+), 484 deletions(-) delete mode 100644 code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SiteListCommand.java create mode 100644 code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SiteRedirectCommand.java create mode 100644 code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchSiteInfoService.java delete mode 100644 code/services-application/search-service/src/main/resources/templates/search/indict/indict-form.hdb delete mode 100644 code/services-application/search-service/src/main/resources/templates/search/parts/site-info-index.hdb delete mode 100644 code/services-application/search-service/src/main/resources/templates/search/parts/site-info-links.hdb delete mode 100644 code/services-application/search-service/src/main/resources/templates/search/site-info.hdb create mode 100644 code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-blacklisted.hdb create mode 100644 code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-indexed.hdb create mode 100644 code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-suggest.hdb create mode 100644 code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index-unknown.hdb create mode 100644 code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-index.hdb create mode 100644 code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-links.hdb create mode 100644 code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-report.hdb create mode 100644 code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-screenshot.hdb create mode 100644 code/services-application/search-service/src/main/resources/templates/search/site-info/site-info.hdb delete mode 100644 code/services-application/search-service/src/test/java/nu/marginalia/search/command/commands/BangCommandTest.java diff --git a/code/common/renderer/build.gradle b/code/common/renderer/build.gradle index 3b5cd622..b40aaee2 100644 --- a/code/common/renderer/build.gradle +++ b/code/common/renderer/build.gradle @@ -16,6 +16,7 @@ dependencies { implementation libs.bundles.handlebars implementation libs.guice + implementation libs.spark testImplementation libs.bundles.slf4j.test testImplementation libs.bundles.junit diff --git a/code/common/renderer/src/main/java/nu/marginalia/renderer/MustacheRenderer.java b/code/common/renderer/src/main/java/nu/marginalia/renderer/MustacheRenderer.java index 4815a3fa..67bbd906 100644 --- a/code/common/renderer/src/main/java/nu/marginalia/renderer/MustacheRenderer.java +++ b/code/common/renderer/src/main/java/nu/marginalia/renderer/MustacheRenderer.java @@ -8,9 +8,12 @@ import lombok.SneakyThrows; import nu.marginalia.renderer.config.HandlebarsConfigurator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import spark.Response; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; import java.util.List; import java.util.Map; @@ -48,6 +51,23 @@ public class MustacheRenderer { return template.apply(model); } + private Writer getWriter(Response response) throws IOException { + + // response.raw() has a getWriter() method that fits here, but this is a trap, as subsequent + // calls to response.raw().getOutputStream() will fail with an IllegalStateException; and we + // have internal code that does this. + + return new OutputStreamWriter(response.raw().getOutputStream()); + } + + @SneakyThrows + public Object renderInto(Response response, T model) { + + template.apply(model, getWriter(response)); + + return ""; + } + @SneakyThrows public String render(T model, String name, List children) { Context ctx = Context.newBuilder(model).combine(name, children).build(); @@ -55,10 +75,22 @@ public class MustacheRenderer { return template.apply(ctx); } + @SneakyThrows + public void renderInto(Response response, T model, String name, List children) { + Context ctx = Context.newBuilder(model).combine(name, children).build(); + + template.apply(ctx, getWriter(response)); + } + @SneakyThrows public String render(T model, Map children) { Context ctx = Context.newBuilder(model).combine(children).build(); return template.apply(ctx); } + @SneakyThrows + public void renderInto(Response response, T model, Map children) { + Context ctx = Context.newBuilder(model).combine(children).build(); + template.apply(ctx, getWriter(response)); + } } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchOperator.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchOperator.java index 371f1b77..b80a7a9a 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchOperator.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchOperator.java @@ -75,7 +75,14 @@ public class SearchOperator { return searchQueryService.getResultsFromQuery(queryResponse); } + public List doBacklinkSearch(Context ctx, + String domain) { + var queryParams = paramFactory.forBacklinkSearch(domain); + var queryResponse = queryClient.search(ctx, queryParams); + + return searchQueryService.getResultsFromQuery(queryResponse); + } public DecoratedSearchResults doSearch(Context ctx, SearchParameters userParams) { Future eval = searchUnitConversionService.tryEval(ctx, userParams.query()); diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java index 2e5e00a6..4a4721a7 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchQueryParamFactory.java @@ -51,4 +51,22 @@ public class SearchQueryParamFactory { SearchSetIdentifier.NONE ); } + + + public QueryParams forBacklinkSearch(String domain) { + return new QueryParams("links:"+domain, + null, + List.of(), + List.of(), + List.of(), + List.of(), + SpecificationLimit.none(), + SpecificationLimit.none(), + SpecificationLimit.none(), + SpecificationLimit.none(), + List.of(), + new QueryLimits(100, 100, 100, 512), + SearchSetIdentifier.NONE + ); + } } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchService.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchService.java index 694a90ed..13f14e6e 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchService.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchService.java @@ -32,6 +32,7 @@ public class SearchService extends Service { SearchErrorPageService errorPageService, SearchAddToCrawlQueueService addToCrawlQueueService, SearchFlagSiteService flagSiteService, + SearchSiteInfoService siteInfoService, SearchQueryService searchQueryService ) { super(params); @@ -50,10 +51,10 @@ public class SearchService extends Service { Spark.post("/public/site/suggest/", addToCrawlQueueService::suggestCrawling); - Spark.get("/public/site/flag-site/:domainId", flagSiteService::flagSiteForm); - Spark.post("/public/site/flag-site/:domainId", flagSiteService::flagSiteAction); Spark.get("/public/site-search/:site/*", this::siteSearchRedir); - Spark.get("/public/site/:site", this::siteSearchRedir); + + Spark.get("/public/site/:site", siteInfoService::handle); + Spark.post("/public/site/:site", siteInfoService::handlePost); Spark.exception(Exception.class, (e,p,q) -> { logger.error("Error during processing", e); diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/CommandEvaluator.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/CommandEvaluator.java index bf72dbd6..7f90c8ca 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/CommandEvaluator.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/CommandEvaluator.java @@ -3,6 +3,7 @@ package nu.marginalia.search.command; import com.google.inject.Inject; import nu.marginalia.search.command.commands.*; import nu.marginalia.client.Context; +import spark.Response; import java.util.ArrayList; import java.util.List; @@ -17,30 +18,32 @@ public class CommandEvaluator { BrowseCommand browse, ConvertCommand convert, DefinitionCommand define, - SiteListCommand site, BangCommand bang, + SiteRedirectCommand siteRedirect, SearchCommand search ) { specialCommands.add(browse); specialCommands.add(convert); specialCommands.add(define); - specialCommands.add(site); specialCommands.add(bang); + specialCommands.add(siteRedirect); defaultCommand = search; } - public Object eval(Context ctx, SearchParameters parameters) { + public Object eval(Context ctx, Response response, SearchParameters parameters) { for (var cmd : specialCommands) { - var ret = cmd.process(ctx, parameters); - if (ret.isPresent()) { - return ret.get(); + if (cmd.process(ctx, response, parameters)) { + // The commands will write directly to the response, so we don't need to do anything else + // but it's important we don't return null, as this signals to Spark that we haven't handled + // the request. + + return ""; } } - // Always process the search command last - return defaultCommand.process(ctx, parameters) - .orElseThrow(() -> new IllegalStateException("Search Command returned Optional.empty()!") /* This Should Not be Possibleā„¢ */ ); + defaultCommand.process(ctx, response, parameters); + return ""; } } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchCommandInterface.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchCommandInterface.java index d543aa98..ed485b50 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchCommandInterface.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/SearchCommandInterface.java @@ -2,9 +2,8 @@ package nu.marginalia.search.command; import nu.marginalia.client.Context; - -import java.util.Optional; +import spark.Response; public interface SearchCommandInterface { - Optional process(Context ctx, SearchParameters parameters); + boolean process(Context ctx, Response response, SearchParameters parameters); } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/BangCommand.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/BangCommand.java index 0701031f..e0ed03b4 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/BangCommand.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/BangCommand.java @@ -5,6 +5,7 @@ import nu.marginalia.search.command.SearchCommandInterface; import nu.marginalia.search.command.SearchParameters; import nu.marginalia.client.Context; import nu.marginalia.search.exceptions.RedirectException; +import spark.Response; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; @@ -23,7 +24,7 @@ public class BangCommand implements SearchCommandInterface { } @Override - public Optional process(Context ctx, SearchParameters parameters) { + public boolean process(Context ctx, Response response, SearchParameters parameters) { for (var entry : bangsToPattern.entrySet()) { String bangPattern = entry.getKey(); @@ -37,7 +38,7 @@ public class BangCommand implements SearchCommandInterface { } } - return Optional.empty(); + return false; } private Optional matchBangPattern(String query, String bangKey) { diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/BrowseCommand.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/BrowseCommand.java index 47512195..38ae63f2 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/BrowseCommand.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/BrowseCommand.java @@ -17,6 +17,7 @@ import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import spark.Response; import java.io.IOException; import java.util.*; @@ -56,16 +57,22 @@ public class BrowseCommand implements SearchCommandInterface { } @Override - public Optional process(Context ctx, SearchParameters parameters) { + public boolean process(Context ctx, Response response, SearchParameters parameters) { if (!queryPatternPredicate.test(parameters.query())) { - return Optional.empty(); + return false; } - return Optional.ofNullable(browseSite(ctx, parameters.query())) - .map(results -> browseResultsRenderer.render(results, + var model = browseSite(ctx, parameters.query()); + + if (null == model) + return false; + + browseResultsRenderer.renderInto(response, model, Map.of("query", parameters.query(), "profile", parameters.profileStr(), - "focusDomain", results.focusDomain()))); + "focusDomain", model.focusDomain()) + ); + return true; } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/ConvertCommand.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/ConvertCommand.java index 3b434865..644d193c 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/ConvertCommand.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/ConvertCommand.java @@ -1,16 +1,17 @@ package nu.marginalia.search.command.commands; import com.google.inject.Inject; +import lombok.SneakyThrows; import nu.marginalia.search.command.SearchCommandInterface; import nu.marginalia.search.command.SearchParameters; import nu.marginalia.search.svc.SearchUnitConversionService; import nu.marginalia.client.Context; import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; +import spark.Response; import java.io.IOException; import java.util.Map; -import java.util.Optional; public class ConvertCommand implements SearchCommandInterface { private final SearchUnitConversionService searchUnitConversionService; @@ -24,16 +25,19 @@ public class ConvertCommand implements SearchCommandInterface { } @Override - public Optional process(Context ctx, SearchParameters parameters) { + @SneakyThrows + public boolean process(Context ctx, Response response, SearchParameters parameters) { var conversion = searchUnitConversionService.tryConversion(ctx, parameters.query()); if (conversion.isEmpty()) { - return Optional.empty(); + return false; } - return Optional.of(conversionRenderer.render(Map.of( + conversionRenderer.renderInto(response, Map.of( "query", parameters.query(), "result", conversion.get(), - "profile", parameters.profileStr())) + "profile", parameters.profileStr()) ); + + return true; } } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/DefinitionCommand.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/DefinitionCommand.java index a9b401a6..fcc16507 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/DefinitionCommand.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/DefinitionCommand.java @@ -12,10 +12,10 @@ import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import spark.Response; import java.io.IOException; import java.util.Map; -import java.util.Optional; import java.util.function.Predicate; import java.util.regex.Pattern; @@ -38,17 +38,19 @@ public class DefinitionCommand implements SearchCommandInterface { } @Override - public Optional process(Context ctx, SearchParameters parameters) { + public boolean process(Context ctx, Response response, SearchParameters parameters) { if (!queryPatternPredicate.test(parameters.query())) { - return Optional.empty(); + return false; } var results = lookupDefinition(ctx, parameters.query()); - return Optional.of(dictionaryRenderer.render(results, + dictionaryRenderer.renderInto(response, results, Map.of("query", parameters.query(), "profile", parameters.profileStr()) - )); + ); + + return true; } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SearchCommand.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SearchCommand.java index 2bc97ad2..3b0a7196 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SearchCommand.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SearchCommand.java @@ -10,9 +10,9 @@ import nu.marginalia.search.model.DecoratedSearchResults; import nu.marginalia.search.model.UrlDetails; import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; +import spark.Response; import java.io.IOException; -import java.util.Optional; public class SearchCommand implements SearchCommandInterface { private final DomainBlacklist blacklist; @@ -32,10 +32,12 @@ public class SearchCommand implements SearchCommandInterface { } @Override - public Optional process(Context ctx, SearchParameters parameters) { + public boolean process(Context ctx, Response response, SearchParameters parameters) { DecoratedSearchResults results = searchOperator.doSearch(ctx, parameters); - return Optional.of(searchResultsRenderer.render(results)); + searchResultsRenderer.renderInto(response, results); + + return true; } private boolean isBlacklisted(UrlDetails details) { diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SiteListCommand.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SiteListCommand.java deleted file mode 100644 index 6278a344..00000000 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SiteListCommand.java +++ /dev/null @@ -1,119 +0,0 @@ -package nu.marginalia.search.command.commands; - -import com.google.inject.Inject; -import nu.marginalia.db.DbDomainQueries; -import nu.marginalia.model.EdgeDomain; -import nu.marginalia.search.SearchOperator; -import nu.marginalia.search.model.UrlDetails; -import nu.marginalia.search.command.SearchCommandInterface; -import nu.marginalia.search.command.SearchParameters; -import nu.marginalia.search.model.DomainInformation; -import nu.marginalia.search.model.SearchProfile; -import nu.marginalia.search.siteinfo.DomainInformationService; -import nu.marginalia.search.svc.SearchQueryIndexService; -import nu.marginalia.client.Context; -import nu.marginalia.renderer.MustacheRenderer; -import nu.marginalia.renderer.RendererFactory; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.nio.file.Path; -import java.util.*; -import java.util.function.Predicate; -import java.util.regex.Pattern; - -public class SiteListCommand implements SearchCommandInterface { - private final DbDomainQueries domainQueries; - private final DomainInformationService domainInformationService; - private final SearchQueryIndexService searchQueryIndexService; - private final SearchOperator searchOperator; - private final Logger logger = LoggerFactory.getLogger(getClass()); - - private final MustacheRenderer siteInfoRenderer; - - private final Predicate queryPatternPredicate = Pattern.compile("^site:[.A-Za-z\\-0-9]+$").asPredicate(); - - @Inject - public SiteListCommand( - DomainInformationService domainInformationService, - DbDomainQueries domainQueries, - RendererFactory rendererFactory, - SearchQueryIndexService searchQueryIndexService, SearchOperator searchOperator) - throws IOException - { - this.domainQueries = domainQueries; - this.domainInformationService = domainInformationService; - - siteInfoRenderer = rendererFactory.renderer("search/site-info"); - this.searchQueryIndexService = searchQueryIndexService; - this.searchOperator = searchOperator; - } - - @Override - public Optional process(Context ctx, SearchParameters parameters) { - if (!queryPatternPredicate.test(parameters.query())) { - return Optional.empty(); - } - - var results = siteInfo(ctx, parameters.query()); - var domain = results.getDomain(); - - List resultSet; - Path screenshotPath = null; - int domainId = -1; - if (null != domain) { - resultSet = searchOperator.doSiteSearch(ctx, domain.toString()); - - var maybeId = domainQueries.tryGetDomainId(domain); - if (maybeId.isPresent()) { - domainId = maybeId.getAsInt(); - screenshotPath = Path.of("/screenshot/" + domainId); - } - else { - domainId = -1; - screenshotPath = Path.of("/screenshot/0"); - } - } - else { - resultSet = Collections.emptyList(); - } - - Map renderObject = new HashMap<>(10); - - renderObject.put("query", parameters.query()); - renderObject.put("hideRanking", true); - renderObject.put("profile", parameters.profileStr()); - renderObject.put("results", resultSet); - renderObject.put("screenshot", screenshotPath == null ? "" : screenshotPath.toString()); - renderObject.put("domainId", domainId); - renderObject.put("focusDomain", domain); - - return Optional.of(siteInfoRenderer.render(results, renderObject)); - } - - - private DomainInformation siteInfo(Context ctx, String humanQuery) { - String definePrefix = "site:"; - String word = humanQuery.substring(definePrefix.length()).toLowerCase(); - - logger.info("Fetching Site Info: {}", word); - - var results = domainInformationService - .domainInfo(word) - .orElseGet(() -> unknownSite(word)); - - logger.debug("Results = {}", results); - - return results; - - } - - private DomainInformation unknownSite(String url) { - return DomainInformation.builder() - .domain(new EdgeDomain(url)) - .suggestForCrawling(true) - .unknownDomain(true) - .build(); - } -} diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SiteRedirectCommand.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SiteRedirectCommand.java new file mode 100644 index 00000000..d3398bb1 --- /dev/null +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/command/commands/SiteRedirectCommand.java @@ -0,0 +1,49 @@ +package nu.marginalia.search.command.commands; + +import com.google.inject.Inject; +import lombok.SneakyThrows; +import nu.marginalia.client.Context; +import nu.marginalia.search.command.SearchCommandInterface; +import nu.marginalia.search.command.SearchParameters; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import spark.Response; + +import java.io.IOException; +import java.util.function.Predicate; +import java.util.regex.Pattern; + +public class SiteRedirectCommand implements SearchCommandInterface { + + private final Logger logger = LoggerFactory.getLogger(getClass()); + + private final Predicate queryPatternPredicate = Pattern.compile("^site:[.A-Za-z\\-0-9]+$").asPredicate(); + + @Inject + public SiteRedirectCommand() { + } + + @SneakyThrows + @Override + public boolean process(Context ctx, Response response, SearchParameters parameters) { + if (!queryPatternPredicate.test(parameters.query())) { + return false; + } + + String definePrefix = "site:"; + String domain = parameters.query().substring(definePrefix.length()).toLowerCase(); + + // Use an HTML redirect here, so we can use relative URLs + + response.raw().getOutputStream().println(""" + + + + Redirecting... + + """.formatted(domain)); + + return true; + } + +} diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/siteinfo/DomainInformationService.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/siteinfo/DomainInformationService.java index 9a72f359..c05cfec2 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/siteinfo/DomainInformationService.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/siteinfo/DomainInformationService.java @@ -76,7 +76,7 @@ public class DomainInformationService { .linkingDomains(linkingDomains) .inCrawlQueue(inCrawlQueue) .nodeAffinity(nodeAffinity) - .suggestForCrawling((pagesVisited == 0 && !inCrawlQueue)) + .suggestForCrawling((pagesVisited == 0 && outboundLinks == 0 && !inCrawlQueue)) .build(); return Optional.of(di); diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchFlagSiteService.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchFlagSiteService.java index 06e460b4..e4ffce78 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchFlagSiteService.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchFlagSiteService.java @@ -2,13 +2,7 @@ package nu.marginalia.search.svc; import com.google.inject.Inject; import com.zaxxer.hikari.HikariDataSource; -import nu.marginalia.renderer.MustacheRenderer; -import nu.marginalia.renderer.RendererFactory; -import spark.Request; -import spark.Response; -import spark.Spark; -import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; @@ -21,7 +15,6 @@ import java.util.stream.Collectors; * DomainComplaintService in control-service */ public class SearchFlagSiteService { - private final MustacheRenderer formTemplate; private final HikariDataSource dataSource; private final CategoryItem unknownCategory = new CategoryItem("unknown", "Unknown"); @@ -39,62 +32,21 @@ public class SearchFlagSiteService { private final Map categoryItemMap = categories.stream().collect(Collectors.toMap(CategoryItem::categoryName, Function.identity())); @Inject - public SearchFlagSiteService(RendererFactory rendererFactory, - HikariDataSource dataSource) throws IOException { - formTemplate = rendererFactory.renderer("search/indict/indict-form"); + public SearchFlagSiteService(HikariDataSource dataSource) { this.dataSource = dataSource; } - public Object flagSiteForm(Request request, Response response) throws SQLException { - final int domainId = Integer.parseInt(request.params("domainId")); - - var model = getModel(domainId, false); - return formTemplate.render(model); + public List getCategories() { + return categories; } - public Object flagSiteAction(Request request, Response response) throws SQLException { - - int domainId = Integer.parseInt(request.params("domainId")); - - var formData = new FlagSiteFormData( - domainId, - request.queryParams("category"), - request.queryParams("description"), - request.queryParams("samplequery") - ); - - insertComplaint(formData); - - return formTemplate.render(getModel(domainId, true)); - } - - private void insertComplaint(FlagSiteFormData formData) throws SQLException { - try (var conn = dataSource.getConnection(); - var stmt = conn.prepareStatement( - """ - INSERT INTO DOMAIN_COMPLAINT(DOMAIN_ID, CATEGORY, DESCRIPTION, SAMPLE) VALUES (?, ?, ?, ?) - """)) { - stmt.setInt(1, formData.domainId); - stmt.setString(2, formData.category); - stmt.setString(3, formData.description); - stmt.setString(4, formData.sampleQuery); - stmt.executeUpdate(); - } - } - - private FlagSiteViewModel getModel(int id, boolean isSubmitted) throws SQLException { - - + public List getExistingComplaints(int id) throws SQLException { try (var conn = dataSource.getConnection(); var complaintsStmt = conn.prepareStatement(""" SELECT CATEGORY, FILE_DATE, REVIEWED, DECISION FROM DOMAIN_COMPLAINT WHERE DOMAIN_ID=? - """); - var stmt = conn.prepareStatement( - """ - SELECT DOMAIN_NAME FROM EC_DOMAIN WHERE EC_DOMAIN.ID=? - """)) + """)) { List complaints = new ArrayList<>(); @@ -109,21 +61,25 @@ public class SearchFlagSiteService { rs.getString(4))); } - stmt.setInt(1, id); - rs = stmt.executeQuery(); - if (!rs.next()) { - Spark.halt(404); - } - return new FlagSiteViewModel(id, - rs.getString(1), - categories, - complaints, - isSubmitted); + return complaints; + } + } + + public void insertComplaint(FlagSiteFormData formData) throws SQLException { + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement( + """ + INSERT INTO DOMAIN_COMPLAINT(DOMAIN_ID, CATEGORY, DESCRIPTION, SAMPLE) VALUES (?, ?, ?, ?) + """)) { + stmt.setInt(1, formData.domainId); + stmt.setString(2, formData.category); + stmt.setString(3, formData.description); + stmt.setString(4, formData.sampleQuery); + stmt.executeUpdate(); } } public record CategoryItem(String categoryName, String categoryDesc) {} - public record FlagSiteViewModel(int domainId, String domain, List category, List complaints, boolean isSubmitted) {} public record FlagSiteComplaintModel(String category, String submitTime, boolean isReviewed, String decision) {} public record FlagSiteFormData(int domainId, String category, String description, String sampleQuery) {}; } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryService.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryService.java index 9a1a9a11..241b91be 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryService.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchQueryService.java @@ -37,7 +37,7 @@ public class SearchQueryService { final var ctx = Context.fromRequest(request); try { - return searchCommandEvaulator.eval(ctx, parseParameters(request)); + return searchCommandEvaulator.eval(ctx, response, parseParameters(request)); } catch (RedirectException ex) { response.redirect(ex.newUrl); diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchSiteInfoService.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchSiteInfoService.java new file mode 100644 index 00000000..1be81784 --- /dev/null +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchSiteInfoService.java @@ -0,0 +1,236 @@ +package nu.marginalia.search.svc; +import com.google.inject.Inject; +import nu.marginalia.client.Context; +import nu.marginalia.db.DbDomainQueries; +import nu.marginalia.model.EdgeDomain; +import nu.marginalia.renderer.MustacheRenderer; +import nu.marginalia.renderer.RendererFactory; +import nu.marginalia.search.SearchOperator; +import nu.marginalia.search.model.DomainInformation; +import nu.marginalia.search.model.UrlDetails; +import nu.marginalia.search.siteinfo.DomainInformationService; +import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData; +import spark.*; + +import javax.annotation.Nullable; +import java.io.IOException; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; +import java.util.OptionalInt; + +public class SearchSiteInfoService { + + private final SearchOperator searchOperator; + private final DomainInformationService domainInformationService; + private final SearchFlagSiteService flagSiteService; + private final DbDomainQueries domainQueries; + private final MustacheRenderer renderer; + + @Inject + public SearchSiteInfoService(SearchOperator searchOperator, + DomainInformationService domainInformationService, + RendererFactory rendererFactory, + SearchFlagSiteService flagSiteService, + DbDomainQueries domainQueries) throws IOException { + this.searchOperator = searchOperator; + this.domainInformationService = domainInformationService; + this.flagSiteService = flagSiteService; + this.domainQueries = domainQueries; + + this.renderer = rendererFactory.renderer("search/site-info/site-info"); + } + + public Object handle(Request request, Response response) throws SQLException { + String domainName = request.params("site"); + String view = request.queryParamOrDefault("view", "info"); + + if (null == domainName || domainName.isBlank()) { + return null; + } + + var ctx = Context.fromRequest(request); + + var model = switch (view) { + case "links" -> listLinks(ctx, domainName); + case "docs" -> listDocs(ctx, domainName); + case "info" -> siteInfo(ctx, domainName); + case "report" -> reportSite(ctx, domainName); + default -> siteInfo(ctx, domainName); + }; + + return renderer.renderInto(response, model); + } + + public Object handlePost(Request request, Response response) throws SQLException { + String domainName = request.params("site"); + String view = request.queryParamOrDefault("view", "info"); + + if (null == domainName || domainName.isBlank()) { + return null; + } + + if (!view.equals("report")) + return null; + + final int domainId = domainQueries.getDomainId(new EdgeDomain(domainName)); + + FlagSiteFormData formData = new FlagSiteFormData( + domainId, + request.queryParams("category"), + request.queryParams("description"), + request.queryParams("sampleQuery") + ); + flagSiteService.insertComplaint(formData); + + var complaints = flagSiteService.getExistingComplaints(domainId); + + var model = new ReportDomain(domainName, domainId, complaints, List.of(), true); + + return renderer.renderInto(response, model); + } + + private Object reportSite(Context ctx, String domainName) throws SQLException { + int domainId = domainQueries.getDomainId(new EdgeDomain(domainName)); + var existingComplaints = flagSiteService.getExistingComplaints(domainId); + + return new ReportDomain(domainName, + domainId, + existingComplaints, + flagSiteService.getCategories(), + false); + } + + private SiteInfo siteInfo(Context ctx, String domainName) { + OptionalInt id = domainQueries.tryGetDomainId(new EdgeDomain(domainName)); + + if (id.isEmpty()) { + return new SiteInfo(domainName, -1, null, dummyInformation(domainName)); + } + + String screenshotPath = "/screenshot/"+id.getAsInt(); + DomainInformation domainInfo = domainInformationService + .domainInfo(domainName) + .orElseGet(() -> dummyInformation(domainName)); + + return new SiteInfo(domainName, id.getAsInt(), screenshotPath, domainInfo); + } + + private DomainInformation dummyInformation(String domainName) { + return DomainInformation.builder() + .domain(new EdgeDomain(domainName)) + .suggestForCrawling(true) + .unknownDomain(true) + .build(); + } + + private Backlinks listLinks(Context ctx, String domainName) { + return new Backlinks(domainName, + domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1), + searchOperator.doBacklinkSearch(ctx, domainName)); + } + + private Docs listDocs(Context ctx, String domainName) { + return new Docs(domainName, + domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1), + searchOperator.doSiteSearch(ctx, domainName)); + } + + public record SiteInfo(Map view, + Map domainState, + long domainId, + String domain, + @Nullable String screenshotUrl, + DomainInformation domainInformation) + { + public SiteInfo(String domain, + long domainId, + @Nullable String screenshotUrl, + DomainInformation domainInformation) + { + this(Map.of("info", true), + Map.of(domainInfoState(domainInformation), true), + domainId, + domain, + screenshotUrl, + domainInformation); + } + + private static String domainInfoState(DomainInformation info) { + if (info.isBlacklisted()) { + return "blacklisted"; + } + if (!info.isUnknownDomain() && info.isSuggestForCrawling()) { + return "suggestForCrawling"; + } + if (info.isInCrawlQueue()) { + return "inCrawlQueue"; + } + if (info.isUnknownDomain()) { + return "unknownDomain"; + } + else { + return "indexed"; + } + } + + public String query() { return "site:" + domain; } + + public boolean isKnown() { + return domainId > 0; + } + } + + public record Docs(Map view, + String domain, + long domainId, + List results) { + public Docs(String domain, long domainId, List results) { + this(Map.of("docs", true), domain, domainId, results); + } + + public String focusDomain() { return domain; } + + public String query() { return "site:" + domain; } + + public boolean isKnown() { + return domainId > 0; + } + } + + public record Backlinks(Map view, String domain, long domainId, List results) { + public Backlinks(String domain, long domainId, List results) { + this(Map.of("links", true), domain, domainId, results); + } + + public String query() { return "links:" + domain; } + + public boolean isKnown() { + return domainId > 0; + } + } + + public record ReportDomain( + Map view, + String domain, + int domainId, + List complaints, + List category, + boolean submitted) + { + public ReportDomain(String domain, + int domainId, + List complaints, + List category, + boolean submitted) { + this(Map.of("report", true), domain, domainId, complaints, category, submitted); + } + + public String query() { return "site:" + domain; } + + public boolean isKnown() { + return domainId > 0; + } + } + +} diff --git a/code/services-application/search-service/src/main/resources/static/search/serp.scss b/code/services-application/search-service/src/main/resources/static/search/serp.scss index 07d97a17..fb5667e6 100644 --- a/code/services-application/search-service/src/main/resources/static/search/serp.scss +++ b/code/services-application/search-service/src/main/resources/static/search/serp.scss @@ -25,10 +25,68 @@ body { padding: 0; } +#siteinfo-nav { + display: block; + width: 100%; + @extend .dialog; + padding: 0.25ch !important; + margin-top: 1.5ch; + + + ul { + list-style: none; + padding: 0; + margin: 1ch; + + li { + display: inline; + padding: 1ch; + background-color: $highlight-light2; + + a { + text-decoration: none; + display: inline-block; + color: #000; + } + + .link-unavailable { + display: inline-block; + text-decoration: line-through; + color: #888; + } + } + + li.current { + background-color: $highlight-light; + a { + color: #fff; + } + } + } +} + +.dialog { + border: 1px solid $border-color; + box-shadow: 0 0 1ch $border-color; + background-color: #fff; + padding: 1ch; + + h2 { + margin: 0; + font-family: sans-serif; + font-weight: normal; + padding: 0.5ch; + font-size: 12pt; + background-color: $highlight-light; + color: #fff; + } +} + header { background-color: $nicotine-dark; color: #fff; - border-bottom: 1px solid $border-color; + border: 1px solid #888; + box-shadow: 0 0 0.5ch #888; margin-bottom: 1ch; nav { @@ -46,6 +104,7 @@ header { rgba(100,255,100,1) 50%, rgba(100,100,255,1) 100%); color: black; + text-shadow: 0 0 0.25ch #ccc; } a:hover, a:focus { @@ -55,6 +114,119 @@ header { } } +#complaint { + @extend .dialog; + max-width: 60ch; + margin-left: auto; + margin-right: auto; + margin-top: 2ch; + + textarea { + width: 100%; + height: 10ch; + } +} + +#siteinfo { + margin-top: 1ch; + display: flex; + gap: 1ch; + flex-grow: 0.5; + flex-shrink: 0.5; + flex-basis: 10ch 10ch; + flex-direction: row; + flex-wrap: wrap; + align-content: stretch; + align-items: stretch; + justify-content: stretch; + + #index-info, #link-info { + width: 32ch; + @extend .dialog; + } + #screenshot { + @extend .dialog; + } + #screenshot img { + width: 30ch; + height: 22.5ch; + } +} + +.infobox { + background-color: #fff; + padding: 1ch; + margin: 1ch; + border: 1px solid $border-color; + box-shadow: 0 0 1ch $border-color; +} + +section.cards { + display: flex; + flex-direction: row; + flex-wrap: wrap; + padding-top: 1ch; + gap: 2ch; + justify-content: flex-start; + + .card { + border: 2px #ccc; + background-color: #fff; + border-left: 1px solid #ecb; + border-top: 1px solid #ecb; + box-shadow: #0008 0 0 5px; + + h2 { + color: #fff; + background-color: $highlight-light; + border-bottom: 1px solid $border-color; + font-weight: normal; + font-size: 12pt; + padding: .5ch .5ch .5ch .5ch; + margin: 0 0 0 0; + word-break: break-word; + font-family: $heading-fonts; + + text-decoration: none; + } + + h2 a { + display: block !important; + color: #fff; + text-decoration: none; + } + a:focus img { + filter: sepia(100%); + box-shadow: #444 0px 0px 20px; + } + a:focus:not(.nofocus) { + background-color: black; + color: white; + } + + .description { + padding-left: 1ch; + padding-right: 1ch; + overflow: auto; + -webkit-hyphens: auto; + -moz-hyphens: auto; + -ms-hyphens: auto; + hyphens: auto; + } + + img { + width: 28ch; + height: auto; + } + + .info { + padding-left: 1ch; + padding-right: 1ch; + line-height: 1.6; + } + } +} + .positions { box-shadow: 0 0 2px #888; background-color: #e4e4e4; @@ -258,30 +430,29 @@ footer { margin: 0; } - .utils { - display: flex; - font-size: 10pt; - padding: 1ch; - background-color: #eee; - - > * { - margin-right: 1ch; - margin-left: 1ch; - } - .meta { - flex-grow: 2; - text-align: right; - } - .meta > * { - padding-left: 4px; - } - a { - color: #000; - } - - } } +.utils { + display: flex; + font-size: 10pt; + padding: 1ch; + background-color: #eee; + + > * { + margin-right: 1ch; + margin-left: 1ch; + } + .meta { + flex-grow: 2; + text-align: right; + } + .meta > * { + padding-left: 4px; + } + a { + color: #000; + } +} @media (max-device-width: 624px) { body[data-has-js="true"] { margin: 0 !important; diff --git a/code/services-application/search-service/src/main/resources/templates/search/browse-results.hdb b/code/services-application/search-service/src/main/resources/templates/search/browse-results.hdb index d80d2956..c8d38481 100644 --- a/code/services-application/search-service/src/main/resources/templates/search/browse-results.hdb +++ b/code/services-application/search-service/src/main/resources/templates/search/browse-results.hdb @@ -4,7 +4,7 @@ Marginalia Search - {{query}} - + diff --git a/code/services-application/search-service/src/main/resources/templates/search/indict/indict-form.hdb b/code/services-application/search-service/src/main/resources/templates/search/indict/indict-form.hdb deleted file mode 100644 index 5a8ebbce..00000000 --- a/code/services-application/search-service/src/main/resources/templates/search/indict/indict-form.hdb +++ /dev/null @@ -1,80 +0,0 @@ - - - - - Marginalia Search - File complaint against {{domain}} - - - - - - - - - -{{>search/parts/search-header}} - -
-{{>search/parts/search-form}} - -
- -{{#if isSubmitted}} -

Your complaint against {{domain}} has been submitted

-

The review process is manual and may take a while.

-{{/if}} - -{{#unless isSubmitted}} -

Flag {{domain}} for review

-Note, this is not intended to police acceptable thoughts or ideas. -

-That said, offensive content in obvious bad faith is not tolerated, especially when designed -to crop up when you didn't go looking for it. How and where it is said is more -important than what is said. -

-This form can also be used to appeal unfairly blacklisted sites. -

- -

-
- Flag for Review - -
- -
-
-
-
-
-
-
-
-
- -
-
-

-Communicating through forms and tables is a bit impersonal, -you may also reach a human being through email at kontakt@marginalia.nu. -{{/unless}} - -{{#if complaints}} -


-

Complaints against {{domain}}

- - -{{#each complaints}} - - - - - -{{/each}} -
CategorySubmittedReviewed
{{category}}{{submitTime}}{{#if reviewed}}✓{{/if}}
-{{/if}} -
- -{{>search/parts/search-footer}} - diff --git a/code/services-application/search-service/src/main/resources/templates/search/parts/search-form.hdb b/code/services-application/search-service/src/main/resources/templates/search/parts/search-form.hdb index e5a1187e..63375e92 100644 --- a/code/services-application/search-service/src/main/resources/templates/search/parts/search-form.hdb +++ b/code/services-application/search-service/src/main/resources/templates/search/parts/search-form.hdb @@ -1,4 +1,4 @@ -
+
\ No newline at end of file diff --git a/code/services-application/search-service/src/main/resources/templates/search/parts/site-info-links.hdb b/code/services-application/search-service/src/main/resources/templates/search/parts/site-info-links.hdb deleted file mode 100644 index 0e16be4b..00000000 --- a/code/services-application/search-service/src/main/resources/templates/search/parts/site-info-links.hdb +++ /dev/null @@ -1,18 +0,0 @@ -
-

Links

-
-
-
- Link Graph - Ranking: {{ranking}}%
- Incoming Links: {{incomingLinks}}
- Outbound Links: {{outboundLinks}}
-
-
-
- Explore - Which pages link here?
- Explore similar domains
-
-
-
\ No newline at end of file diff --git a/code/services-application/search-service/src/main/resources/templates/search/search-results.hdb b/code/services-application/search-service/src/main/resources/templates/search/search-results.hdb index f0f34c97..70eab6ce 100644 --- a/code/services-application/search-service/src/main/resources/templates/search/search-results.hdb +++ b/code/services-application/search-service/src/main/resources/templates/search/search-results.hdb @@ -20,6 +20,12 @@