From f050bf5c4c3b38abfcb20b427d84d638dafc3c58 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Thu, 5 Dec 2024 14:00:17 +0100 Subject: [PATCH] (WIP) Initial semi-working transformation to new tailwind UI Still missing is a proper build, we're currently pulling in tailwind from a CDN, which is no bueno in prod. There's also a lot of polish remaining everywhere, dead links, etc. --- .gitignore | 1 + .../browse/model/BrowseResultSet.java | 4 + .../api/domains/model/DomainInformation.java | 17 ++ .../api/domains/model/SimilarDomain.java | 10 +- .../search-service/build.gradle | 1 + .../nu/marginalia/search/JteRenderer.java | 29 ++ .../nu/marginalia/search/SearchOperator.java | 46 ++-- .../search/SearchQueryParamFactory.java | 2 +- .../nu/marginalia/search/SearchService.java | 13 +- .../search/command/SearchParameters.java | 57 +++- .../command/commands/BrowseCommand.java | 62 ++--- .../command/commands/SearchCommand.java | 16 +- .../search/model/ClusteredUrlDetails.java | 48 ++++ .../search/model/DecoratedSearchResults.java | 24 +- .../search/model/GroupedUrlDetails.java | 35 +++ .../marginalia/search/model/NavbarModel.java | 85 ++++++ .../marginalia/search/model/ResultsPage.java | 4 + .../search/model/SearchErrorMessageModel.java | 6 + .../search/model/SearchFilters.java | 128 +++++++-- .../search/model/SearchProfile.java | 4 +- .../search/model/SimpleSearchResults.java | 13 + .../marginalia/search/model/UrlDetails.java | 65 +---- .../search/svc/SearchBrowseService.java | 39 ++- .../search/svc/SearchCrosstalkService.java | 7 +- .../search/svc/SearchErrorPageService.java | 57 ++-- .../search/svc/SearchFlagSiteService.java | 2 +- .../search/svc/SearchFrontPageService.java | 28 +- .../search/svc/SearchQueryService.java | 2 +- .../search/svc/SearchSiteInfoService.java | 181 ++++++------- .../search-service/resources/jte/.jteroot | 0 .../resources/jte/explore/main.jte | 58 ++++ .../resources/jte/part/footerLegal.jte | 36 +++ .../resources/jte/part/head.jte | 30 +++ .../resources/jte/part/navbar.jte | 33 +++ .../resources/jte/serp/error.jte | 48 ++++ .../resources/jte/serp/first.jte | 69 +++++ .../resources/jte/serp/main.jte | 91 +++++++ .../resources/jte/serp/part/footerHowto.jte | 91 +++++++ .../resources/jte/serp/part/matchogram.jte | 24 ++ .../resources/jte/serp/part/mobile-menu.jte | 50 ++++ .../resources/jte/serp/part/result.jte | 108 ++++++++ .../resources/jte/serp/part/searchform.jte | 41 +++ .../resources/jte/serp/part/sidebar.jte | 59 ++++ .../resources/jte/siteinfo/main.jte | 108 ++++++++ .../jte/siteinfo/part/linkedDomains.jte | 67 +++++ .../resources/jte/siteinfo/start.jte | 91 +++++++ .../resources/jte/siteinfo/view/backlinks.jte | 60 +++++ .../resources/jte/siteinfo/view/docs.jte | 89 +++++++ .../resources/jte/siteinfo/view/overview.jte | 195 ++++++++++++++ .../jte/siteinfo/view/reportDomain.jte | 111 ++++++++ .../command/commands/BangCommandTest.java | 2 +- .../search/paperdoll/JtePaperDoll.java | 122 +++++++++ .../search/rendering/MockedSearchResults.java | 252 ++++++++++++++++++ .../search/rendering/RenderingTest.java | 30 +++ settings.gradle | 4 + 55 files changed, 2521 insertions(+), 334 deletions(-) create mode 100644 code/services-application/search-service/java/nu/marginalia/search/JteRenderer.java create mode 100644 code/services-application/search-service/java/nu/marginalia/search/model/GroupedUrlDetails.java create mode 100644 code/services-application/search-service/java/nu/marginalia/search/model/NavbarModel.java create mode 100644 code/services-application/search-service/java/nu/marginalia/search/model/ResultsPage.java create mode 100644 code/services-application/search-service/java/nu/marginalia/search/model/SearchErrorMessageModel.java create mode 100644 code/services-application/search-service/java/nu/marginalia/search/model/SimpleSearchResults.java create mode 100644 code/services-application/search-service/resources/jte/.jteroot create mode 100644 code/services-application/search-service/resources/jte/explore/main.jte create mode 100644 code/services-application/search-service/resources/jte/part/footerLegal.jte create mode 100644 code/services-application/search-service/resources/jte/part/head.jte create mode 100644 code/services-application/search-service/resources/jte/part/navbar.jte create mode 100644 code/services-application/search-service/resources/jte/serp/error.jte create mode 100644 code/services-application/search-service/resources/jte/serp/first.jte create mode 100644 code/services-application/search-service/resources/jte/serp/main.jte create mode 100644 code/services-application/search-service/resources/jte/serp/part/footerHowto.jte create mode 100644 code/services-application/search-service/resources/jte/serp/part/matchogram.jte create mode 100644 code/services-application/search-service/resources/jte/serp/part/mobile-menu.jte create mode 100644 code/services-application/search-service/resources/jte/serp/part/result.jte create mode 100644 code/services-application/search-service/resources/jte/serp/part/searchform.jte create mode 100644 code/services-application/search-service/resources/jte/serp/part/sidebar.jte create mode 100644 code/services-application/search-service/resources/jte/siteinfo/main.jte create mode 100644 code/services-application/search-service/resources/jte/siteinfo/part/linkedDomains.jte create mode 100644 code/services-application/search-service/resources/jte/siteinfo/start.jte create mode 100644 code/services-application/search-service/resources/jte/siteinfo/view/backlinks.jte create mode 100644 code/services-application/search-service/resources/jte/siteinfo/view/docs.jte create mode 100644 code/services-application/search-service/resources/jte/siteinfo/view/overview.jte create mode 100644 code/services-application/search-service/resources/jte/siteinfo/view/reportDomain.jte create mode 100644 code/services-application/search-service/test/nu/marginalia/search/paperdoll/JtePaperDoll.java create mode 100644 code/services-application/search-service/test/nu/marginalia/search/rendering/MockedSearchResults.java create mode 100644 code/services-application/search-service/test/nu/marginalia/search/rendering/RenderingTest.java diff --git a/.gitignore b/.gitignore index a970eb30..248049f5 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ build/ lombok.config Dockerfile run +jte-classes \ No newline at end of file diff --git a/code/features-search/random-websites/java/nu/marginalia/browse/model/BrowseResultSet.java b/code/features-search/random-websites/java/nu/marginalia/browse/model/BrowseResultSet.java index 5d6f7b91..d828a230 100644 --- a/code/features-search/random-websites/java/nu/marginalia/browse/model/BrowseResultSet.java +++ b/code/features-search/random-websites/java/nu/marginalia/browse/model/BrowseResultSet.java @@ -6,4 +6,8 @@ public record BrowseResultSet(Collection results, String focusDoma public BrowseResultSet(Collection results) { this(results, ""); } + + public boolean hasFocusDomain() { + return focusDomain != null && !focusDomain.isBlank(); + } } diff --git a/code/functions/domain-info/api/java/nu/marginalia/api/domains/model/DomainInformation.java b/code/functions/domain-info/api/java/nu/marginalia/api/domains/model/DomainInformation.java index 5dff4d18..26b5a4c9 100644 --- a/code/functions/domain-info/api/java/nu/marginalia/api/domains/model/DomainInformation.java +++ b/code/functions/domain-info/api/java/nu/marginalia/api/domains/model/DomainInformation.java @@ -71,6 +71,23 @@ public class DomainInformation { return new String(Character.toChars(firstChar)) + new String(Character.toChars(secondChar)); } + public String getAsnFlag() { + if (asnCountry == null || asnCountry.codePointCount(0, asnCountry.length()) != 2) { + return ""; + } + String country = asnCountry; + + if ("UK".equals(country)) { + country = "GB"; + } + + int offset = 0x1F1E6; + int asciiOffset = 0x41; + int firstChar = Character.codePointAt(country, 0) - asciiOffset + offset; + int secondChar = Character.codePointAt(country, 1) - asciiOffset + offset; + return new String(Character.toChars(firstChar)) + new String(Character.toChars(secondChar)); + } + public EdgeDomain getDomain() { return this.domain; } diff --git a/code/functions/domain-info/api/java/nu/marginalia/api/domains/model/SimilarDomain.java b/code/functions/domain-info/api/java/nu/marginalia/api/domains/model/SimilarDomain.java index d15d0f3d..9efa2073 100644 --- a/code/functions/domain-info/api/java/nu/marginalia/api/domains/model/SimilarDomain.java +++ b/code/functions/domain-info/api/java/nu/marginalia/api/domains/model/SimilarDomain.java @@ -52,12 +52,12 @@ public record SimilarDomain(EdgeUrl url, return NONE; } - public String toString() { + public String faIcon() { return switch (this) { - case FOWARD -> "→"; - case BACKWARD -> "←"; - case BIDIRECTIONAL -> "⇆"; - case NONE -> "-"; + case FOWARD -> "fa-solid fa-arrow-right"; + case BACKWARD -> "fa-solid fa-arrow-left"; + case BIDIRECTIONAL -> "fa-solid fa-arrow-right-arrow-left"; + case NONE -> ""; }; } diff --git a/code/services-application/search-service/build.gradle b/code/services-application/search-service/build.gradle index 55c2aa90..641405b0 100644 --- a/code/services-application/search-service/build.gradle +++ b/code/services-application/search-service/build.gradle @@ -70,6 +70,7 @@ dependencies { implementation libs.bundles.jetty implementation libs.opencsv implementation libs.trove + implementation libs.jte implementation libs.fastutil implementation libs.bundles.gson implementation libs.bundles.mariadb diff --git a/code/services-application/search-service/java/nu/marginalia/search/JteRenderer.java b/code/services-application/search-service/java/nu/marginalia/search/JteRenderer.java new file mode 100644 index 00000000..05bd63fe --- /dev/null +++ b/code/services-application/search-service/java/nu/marginalia/search/JteRenderer.java @@ -0,0 +1,29 @@ +package nu.marginalia.search; + +import gg.jte.CodeResolver; +import gg.jte.ContentType; +import gg.jte.TemplateEngine; +import gg.jte.output.StringOutput; +import gg.jte.resolve.ResourceCodeResolver; +import jakarta.inject.Singleton; + +import java.util.Map; + +@Singleton +public class JteRenderer { + private final CodeResolver codeResolver = new ResourceCodeResolver("jte"); + private final TemplateEngine templateEngine = TemplateEngine.create(codeResolver, ContentType.Html); + + public String render(String template, Object model) { + StringOutput output = new StringOutput(); + templateEngine.render(template, model, output); + return output.toString(); + } + + public String render(String template, Map models) { + StringOutput output = new StringOutput(); + templateEngine.render(template, models, output); + return output.toString(); + } + +} diff --git a/code/services-application/search-service/java/nu/marginalia/search/SearchOperator.java b/code/services-application/search-service/java/nu/marginalia/search/SearchOperator.java index 9a86db64..7416f963 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/SearchOperator.java +++ b/code/services-application/search-service/java/nu/marginalia/search/SearchOperator.java @@ -14,10 +14,7 @@ import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeUrl; import nu.marginalia.model.crawl.DomainIndexingState; import nu.marginalia.search.command.SearchParameters; -import nu.marginalia.search.model.ClusteredUrlDetails; -import nu.marginalia.search.model.DecoratedSearchResults; -import nu.marginalia.search.model.SearchFilters; -import nu.marginalia.search.model.UrlDetails; +import nu.marginalia.search.model.*; import nu.marginalia.search.results.UrlDeduplicator; import nu.marginalia.search.svc.SearchQueryCountService; import nu.marginalia.search.svc.SearchUnitConversionService; @@ -75,9 +72,10 @@ public class SearchOperator { this.searchVisitorCount = searchVisitorCount; } - public List doSiteSearch(String domain, + public SimpleSearchResults doSiteSearch(String domain, int domainId, - int count) { + int count, + int page) { var queryParams = paramFactory.forSiteSearch(domain, domainId, count); var queryResponse = queryClient.search(queryParams); @@ -85,15 +83,16 @@ public class SearchOperator { return getResultsFromQuery(queryResponse); } - public List doBacklinkSearch(String domain) { + public SimpleSearchResults doBacklinkSearch(String domain, int page) { - var queryParams = paramFactory.forBacklinkSearch(domain); + var queryParams = paramFactory.forBacklinkSearch(domain, page); var queryResponse = queryClient.search(queryParams); + return getResultsFromQuery(queryResponse); } - public List doLinkSearch(String source, String dest) { + public SimpleSearchResults doLinkSearch(String source, String dest) { var queryParams = paramFactory.forLinkSearch(source, dest); var queryResponse = queryClient.search(queryParams); @@ -110,7 +109,7 @@ public class SearchOperator { var queryParams = paramFactory.forRegularSearch(userParams); QueryResponse queryResponse = queryClient.search(queryParams); - var queryResults = getResultsFromQuery(queryResponse); + var queryResults = getResultsFromQuery(queryResponse).results; // Cluster the results based on the query response List clusteredResults = SearchResultClusterer @@ -126,17 +125,17 @@ public class SearchOperator { String evalResult = getFutureOrDefault(eval, ""); String focusDomain = queryResponse.domain(); - int focusDomainId = focusDomain == null + int focusDomainId = (focusDomain == null || focusDomain.isBlank()) ? -1 - : domainQueries.tryGetDomainId(new EdgeDomain(focusDomain)).orElse(-1); + : domainQueries.tryGetDomainId(new EdgeDomain(focusDomain)).orElse(0); List problems = getProblems(evalResult, queryResults, queryResponse); - List resultPages = IntStream.rangeClosed(1, queryResponse.totalPages()) - .mapToObj(number -> new DecoratedSearchResults.Page( + List resultPages = IntStream.rangeClosed(1, queryResponse.totalPages()) + .mapToObj(number -> new ResultsPage( number, number == userParams.page(), - userParams.withPage(number).renderUrl(websiteUrl) + userParams.withPage(number).renderUrl() )) .toList(); @@ -146,7 +145,7 @@ public class SearchOperator { .problems(problems) .evalResult(evalResult) .results(clusteredResults) - .filters(new SearchFilters(websiteUrl, userParams)) + .filters(new SearchFilters(userParams)) .focusDomain(focusDomain) .focusDomainId(focusDomainId) .resultPages(resultPages) @@ -154,18 +153,28 @@ public class SearchOperator { } - public List getResultsFromQuery(QueryResponse queryResponse) { + public SimpleSearchResults getResultsFromQuery(QueryResponse queryResponse) { final QueryLimits limits = queryResponse.specs().queryLimits; final UrlDeduplicator deduplicator = new UrlDeduplicator(limits.resultsByDomain()); // Update the query count (this is what you see on the front page) searchVisitorCount.registerQuery(); - return queryResponse.results().stream() + List details = queryResponse.results().stream() .filter(deduplicator::shouldRetain) .limit(limits.resultsTotal()) .map(SearchOperator::createDetails) .toList(); + + List pages = IntStream.rangeClosed(1, queryResponse.totalPages()) + .mapToObj(number -> new ResultsPage( + number, + number == queryResponse.currentPage(), + "" + )) + .toList(); + + return new SimpleSearchResults(details, pages); } private static UrlDetails createDetails(DecoratedSearchResultItem item) { @@ -181,6 +190,7 @@ public class SearchOperator { item.rankingScore, // termScore item.resultsFromDomain, BrailleBlockPunchCards.printBits(item.bestPositions, 64), + item.bestPositions, Long.bitCount(item.bestPositions), item.rawIndexResult, item.rawIndexResult.keywordScores diff --git a/code/services-application/search-service/java/nu/marginalia/search/SearchQueryParamFactory.java b/code/services-application/search-service/java/nu/marginalia/search/SearchQueryParamFactory.java index 6852423a..2d1e8040 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/SearchQueryParamFactory.java +++ b/code/services-application/search-service/java/nu/marginalia/search/SearchQueryParamFactory.java @@ -62,7 +62,7 @@ public class SearchQueryParamFactory { ); } - public QueryParams forBacklinkSearch(String domain) { + public QueryParams forBacklinkSearch(String domain, int page) { return new QueryParams("links:"+domain, null, List.of(), diff --git a/code/services-application/search-service/java/nu/marginalia/search/SearchService.java b/code/services-application/search-service/java/nu/marginalia/search/SearchService.java index 6abdfcb3..1ba1e8bf 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/SearchService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/SearchService.java @@ -45,6 +45,7 @@ public class SearchService extends Service { SearchAddToCrawlQueueService addToCrawlQueueService, SearchSiteInfoService siteInfoService, SearchCrosstalkService crosstalkService, + SearchBrowseService searchBrowseService, SearchQueryService searchQueryService) throws Exception { @@ -56,26 +57,34 @@ public class SearchService extends Service { Spark.staticFiles.expireTime(600); SearchServiceMetrics.get("/search", searchQueryService::pathSearch); - SearchServiceMetrics.get("/", frontPageService::render); SearchServiceMetrics.get("/news.xml", frontPageService::renderNewsFeed); - SearchServiceMetrics.get("/:resource", this::serveStatic); SearchServiceMetrics.post("/site/suggest/", addToCrawlQueueService::suggestCrawling); SearchServiceMetrics.get("/site-search/:site/*", this::siteSearchRedir); + SearchServiceMetrics.get("/site", siteInfoService::handleOverview); SearchServiceMetrics.get("/site/:site", siteInfoService::handle); SearchServiceMetrics.post("/site/:site", siteInfoService::handlePost); + SearchServiceMetrics.get("/explore", searchBrowseService::handleBrowseRandom); + SearchServiceMetrics.get("/explore/:site", searchBrowseService::handleBrowseSite); + SearchServiceMetrics.get("/crosstalk/", crosstalkService::handle); + SearchServiceMetrics.get("/:resource", this::serveStatic); Spark.exception(Exception.class, (e,p,q) -> { logger.error("Error during processing", e); wmsa_search_service_error_count.labels(p.pathInfo(), p.requestMethod()).inc(); errorPageService.serveError(p, q); }); + // Add compression + Spark.after((rq, rs) -> { + rs.header("Content-Encoding", "gzip"); + }); + Spark.awaitInitialization(); } diff --git a/code/services-application/search-service/java/nu/marginalia/search/command/SearchParameters.java b/code/services-application/search-service/java/nu/marginalia/search/command/SearchParameters.java index c10d0092..07a47c91 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/command/SearchParameters.java +++ b/code/services-application/search-service/java/nu/marginalia/search/command/SearchParameters.java @@ -4,16 +4,19 @@ import nu.marginalia.WebsiteUrl; import nu.marginalia.api.searchquery.model.results.ResultRankingParameters; import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.SpecificationLimit; +import nu.marginalia.model.EdgeDomain; import nu.marginalia.search.model.SearchProfile; import spark.Request; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.Objects; +import java.util.StringJoiner; import static nu.marginalia.search.command.SearchRecentParameter.RECENT; -public record SearchParameters(String query, +public record SearchParameters(WebsiteUrl url, + String query, SearchProfile profile, SearchJsParameter js, SearchRecentParameter recent, @@ -23,8 +26,21 @@ public record SearchParameters(String query, int page ) { - public SearchParameters(String queryString, Request request) { - this( + public static SearchParameters defaultsForQuery(WebsiteUrl url, String query, int page) { + return new SearchParameters( + url, + "test", + SearchProfile.NO_FILTER, + SearchJsParameter.DEFAULT, + SearchRecentParameter.DEFAULT, + SearchTitleParameter.DEFAULT, + SearchAdtechParameter.DEFAULT, + false, + page); + } + public static SearchParameters forRequest(String queryString, WebsiteUrl url, Request request) { + return new SearchParameters( + url, queryString, SearchProfile.getSearchProfile(request.queryParams("profile")), SearchJsParameter.parse(request.queryParams("js")), @@ -41,29 +57,48 @@ public record SearchParameters(String query, } public SearchParameters withProfile(SearchProfile profile) { - return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page); + return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page); } public SearchParameters withJs(SearchJsParameter js) { - return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page); + return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page); } public SearchParameters withAdtech(SearchAdtechParameter adtech) { - return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page); + return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page); } public SearchParameters withRecent(SearchRecentParameter recent) { - return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page); + return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page); } public SearchParameters withTitle(SearchTitleParameter title) { - return new SearchParameters(query, profile, js, recent, title, adtech, true, page); + return new SearchParameters(url, query, profile, js, recent, title, adtech, true, page); } public SearchParameters withPage(int page) { - return new SearchParameters(query, profile, js, recent, searchTitle, adtech, false, page); + return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page); } - public String renderUrl(WebsiteUrl baseUrl) { + public SearchParameters withQuery(String query) { + return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page); + } + + public String renderUrlWithoutSiteFocus() { + String[] parts = query.split("\\s+"); + StringJoiner newQuery = new StringJoiner(" "); + for (var part : parts) { + if (!part.startsWith("site:")) { + newQuery.add(part); + } + } + return withQuery(newQuery.toString()).renderUrl(); + } + + public String renderUrlWithSiteFocus(EdgeDomain domain) { + return withQuery(query + " site:"+domain.toString()).renderUrl(); + } + + public String renderUrl() { String path = String.format("/search?query=%s&profile=%s&js=%s&adtech=%s&recent=%s&searchTitle=%s&newfilter=%s&page=%d", URLEncoder.encode(query, StandardCharsets.UTF_8), URLEncoder.encode(profile.filterId, StandardCharsets.UTF_8), @@ -75,7 +110,7 @@ public record SearchParameters(String query, page ); - return baseUrl.withPath(path); + return url.withPath(path); } public ResultRankingParameters.TemporalBias temporalBias() { diff --git a/code/services-application/search-service/java/nu/marginalia/search/command/commands/BrowseCommand.java b/code/services-application/search-service/java/nu/marginalia/search/command/commands/BrowseCommand.java index a889ec3d..83bf35be 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/command/commands/BrowseCommand.java +++ b/code/services-application/search-service/java/nu/marginalia/search/command/commands/BrowseCommand.java @@ -1,36 +1,21 @@ package nu.marginalia.search.command.commands; import com.google.inject.Inject; -import nu.marginalia.browse.model.BrowseResultSet; -import nu.marginalia.renderer.MustacheRenderer; -import nu.marginalia.renderer.RendererFactory; import nu.marginalia.search.command.SearchCommandInterface; import nu.marginalia.search.command.SearchParameters; -import nu.marginalia.search.svc.SearchBrowseService; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import spark.Response; -import java.io.IOException; -import java.util.Map; import java.util.Optional; import java.util.function.Predicate; import java.util.regex.Pattern; public class BrowseCommand implements SearchCommandInterface { - private final SearchBrowseService browseService; - private final MustacheRenderer browseResultsRenderer; - private final Logger logger = LoggerFactory.getLogger(getClass()); private final Predicate queryPatternPredicate = Pattern.compile("^browse:[.A-Za-z\\-0-9:]+$").asPredicate(); @Inject - public BrowseCommand(SearchBrowseService browseService, - RendererFactory rendererFactory) - throws IOException + public BrowseCommand() { - this.browseService = browseService; - browseResultsRenderer = rendererFactory.renderer("search/browse-results"); } @Override @@ -39,39 +24,24 @@ public class BrowseCommand implements SearchCommandInterface { return Optional.empty(); } - var model = browseSite(parameters.query()); - - if (null == model) - return Optional.empty(); - - return Optional.of(browseResultsRenderer.render(model, - Map.of("query", parameters.query(), - "profile", parameters.profileStr(), - "focusDomain", model.focusDomain()) - )); - } - - - private BrowseResultSet browseSite(String humanQuery) { String definePrefix = "browse:"; - String word = humanQuery.substring(definePrefix.length()).toLowerCase(); + String word = parameters.query().substring(definePrefix.length()).toLowerCase(); - try { - if ("random".equals(word)) { - return browseService.getRandomEntries(0); - } - if (word.startsWith("random:")) { - int set = Integer.parseInt(word.split(":")[1]); - return browseService.getRandomEntries(set); - } - else { - return browseService.getRelatedEntries(word); - } - } - catch (Exception ex) { - logger.info("No Results"); - return null; + String redirectPath; + + if (word.equals("random")) { + redirectPath = "/explore"; + } else { + redirectPath = "/explore/" + word; } + + return Optional.of(""" + + + + Redirecting... + + """.formatted(redirectPath)); } diff --git a/code/services-application/search-service/java/nu/marginalia/search/command/commands/SearchCommand.java b/code/services-application/search-service/java/nu/marginalia/search/command/commands/SearchCommand.java index 6684bf74..9ce1e9d0 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/command/commands/SearchCommand.java +++ b/code/services-application/search-service/java/nu/marginalia/search/command/commands/SearchCommand.java @@ -1,35 +1,37 @@ package nu.marginalia.search.command.commands; import com.google.inject.Inject; -import nu.marginalia.renderer.MustacheRenderer; -import nu.marginalia.renderer.RendererFactory; +import nu.marginalia.search.JteRenderer; import nu.marginalia.search.SearchOperator; import nu.marginalia.search.command.SearchCommandInterface; import nu.marginalia.search.command.SearchParameters; import nu.marginalia.search.model.DecoratedSearchResults; +import nu.marginalia.search.model.NavbarModel; import spark.Response; import java.io.IOException; +import java.util.Map; import java.util.Optional; public class SearchCommand implements SearchCommandInterface { private final SearchOperator searchOperator; - private final MustacheRenderer searchResultsRenderer; + private final JteRenderer jteRenderer; @Inject public SearchCommand(SearchOperator searchOperator, - RendererFactory rendererFactory) throws IOException { + JteRenderer jteRenderer) throws IOException { this.searchOperator = searchOperator; - - searchResultsRenderer = rendererFactory.renderer("search/search-results"); + this.jteRenderer = jteRenderer; } @Override public Optional process(Response response, SearchParameters parameters) { try { DecoratedSearchResults results = searchOperator.doSearch(parameters); - return Optional.of(searchResultsRenderer.render(results)); + return Optional.of(jteRenderer.render("serp/main.jte", + Map.of("results", results, "navbar", NavbarModel.SEARCH) + )); } catch (InterruptedException ex) { Thread.currentThread().interrupt(); diff --git a/code/services-application/search-service/java/nu/marginalia/search/model/ClusteredUrlDetails.java b/code/services-application/search-service/java/nu/marginalia/search/model/ClusteredUrlDetails.java index 701a2c51..eb391ed1 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/model/ClusteredUrlDetails.java +++ b/code/services-application/search-service/java/nu/marginalia/search/model/ClusteredUrlDetails.java @@ -1,6 +1,7 @@ package nu.marginalia.search.model; import nu.marginalia.model.EdgeDomain; +import nu.marginalia.model.idx.DocumentFlags; import nu.marginalia.model.idx.WordFlags; import org.jetbrains.annotations.NotNull; @@ -16,6 +17,9 @@ public class ClusteredUrlDetails implements Comparable { @NotNull public final List rest; + /** Selects color scheme in the GUI for the result */ + public final PostColorScheme colorScheme; + /** Create a new ClusteredUrlDetails from a collection of UrlDetails, * with the best result as "first", and the others, in descending order * of quality as the "rest"... @@ -32,6 +36,7 @@ public class ClusteredUrlDetails implements Comparable { this.first = items.removeFirst(); this.rest = items; + this.colorScheme = PostColorScheme.select(first); double bestScore = first.termScore; double scoreLimit = Math.min(4.0, bestScore * 1.25); @@ -64,6 +69,14 @@ public class ClusteredUrlDetails implements Comparable { public ClusteredUrlDetails(@NotNull UrlDetails onlyFirst) { this.first = onlyFirst; this.rest = Collections.emptyList(); + this.colorScheme = PostColorScheme.select(first); + } + + /** For tests */ + public ClusteredUrlDetails(@NotNull UrlDetails onlyFirst, @NotNull List rest) { + this.first = onlyFirst; + this.rest = rest; + this.colorScheme = PostColorScheme.select(first); } // For renderer use, do not remove @@ -99,4 +112,39 @@ public class ClusteredUrlDetails implements Comparable { public int compareTo(@NotNull ClusteredUrlDetails o) { return Objects.compare(first, o.first, UrlDetails::compareTo); } + + public enum PostColorScheme { + Slate("bg-slate-100", "text-slate-950", "bg-slate-200", "text-black"), + Green("bg-green-50", "text-green-900", "bg-green-100", "text-black"), + Purple("bg-purple-50", "text-purple-900", "bg-purple-100", "text-black"), + White("bg-white", "text-blue-950", "bg-gray-100", "text-black"); + + PostColorScheme(String backgroundColor, String textColor, String backgroundColor2, String textColor2) { + this.backgroundColor = backgroundColor; + this.textColor = textColor; + this.backgroundColor2 = backgroundColor2; + this.textColor2 = textColor2; + } + + public static PostColorScheme select(UrlDetails result) { + long encodedMetadata = result.resultItem.encodedDocMetadata; + if (DocumentFlags.PlainText.isPresent(encodedMetadata)) { + return Slate; + } + else if (DocumentFlags.GeneratorWiki.isPresent(encodedMetadata)) { + return Green; + } + else if (DocumentFlags.GeneratorForum.isPresent(encodedMetadata)) { + return Purple; + } + else { + return White; + } + } + + public final String backgroundColor; + public final String textColor; + public final String backgroundColor2; + public final String textColor2; + } } diff --git a/code/services-application/search-service/java/nu/marginalia/search/model/DecoratedSearchResults.java b/code/services-application/search-service/java/nu/marginalia/search/model/DecoratedSearchResults.java index 87fc336a..aed16e15 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/model/DecoratedSearchResults.java +++ b/code/services-application/search-service/java/nu/marginalia/search/model/DecoratedSearchResults.java @@ -1,5 +1,6 @@ package nu.marginalia.search.model; +import nu.marginalia.WebsiteUrl; import nu.marginalia.search.command.SearchParameters; import java.util.List; @@ -21,7 +22,7 @@ public class DecoratedSearchResults { String focusDomain, int focusDomainId, SearchFilters filters, - List resultPages) { + List resultPages) { this.params = params; this.problems = problems; this.evalResult = evalResult; @@ -62,27 +63,29 @@ public class DecoratedSearchResults { return focusDomainId; } + public boolean hasFocusDomain() { + return focusDomainId >= 0; + } + public SearchFilters getFilters() { return filters; } - public List getResultPages() { + public List getResultPages() { return resultPages; } private final String focusDomain; private final int focusDomainId; + private final SearchFilters filters; - private final List resultPages; + private final List resultPages; public boolean isMultipage() { return resultPages.size() > 1; } - public record Page(int number, boolean current, String href) { - } - // These are used by the search form, they look unused in the IDE but are used by the mustache template, // DO NOT REMOVE THEM public int getResultCount() { @@ -130,7 +133,8 @@ public class DecoratedSearchResults { private String focusDomain; private int focusDomainId; private SearchFilters filters; - private List resultPages; + private List resultPages; + private WebsiteUrl websiteUrl; DecoratedSearchResultsBuilder() { } @@ -170,7 +174,7 @@ public class DecoratedSearchResults { return this; } - public DecoratedSearchResultsBuilder resultPages(List resultPages) { + public DecoratedSearchResultsBuilder resultPages(List resultPages) { this.resultPages = resultPages; return this; } @@ -178,9 +182,5 @@ public class DecoratedSearchResults { public DecoratedSearchResults build() { return new DecoratedSearchResults(this.params, this.problems, this.evalResult, this.results, this.focusDomain, this.focusDomainId, this.filters, this.resultPages); } - - public String toString() { - return "DecoratedSearchResults.DecoratedSearchResultsBuilder(params=" + this.params + ", problems=" + this.problems + ", evalResult=" + this.evalResult + ", results=" + this.results + ", focusDomain=" + this.focusDomain + ", focusDomainId=" + this.focusDomainId + ", filters=" + this.filters + ", resultPages=" + this.resultPages + ")"; - } } } diff --git a/code/services-application/search-service/java/nu/marginalia/search/model/GroupedUrlDetails.java b/code/services-application/search-service/java/nu/marginalia/search/model/GroupedUrlDetails.java new file mode 100644 index 00000000..43c18dae --- /dev/null +++ b/code/services-application/search-service/java/nu/marginalia/search/model/GroupedUrlDetails.java @@ -0,0 +1,35 @@ +package nu.marginalia.search.model; + +import nu.marginalia.model.EdgeDomain; + +import java.util.Comparator; +import java.util.List; +import java.util.stream.Collectors; + +/** A number of url details grouped by their domain. This is conceptually similar to + * ClusteredUrlDetails, but it has more logic to conditionally perform this grouping operation, + * whereas this class always groups the domains. + * */ +public record GroupedUrlDetails (List urlDetails) { + public GroupedUrlDetails(List urlDetails) { + this.urlDetails = urlDetails; + if (urlDetails.isEmpty()) { + throw new IllegalArgumentException("urlDetails must never be empty"); + } + } + public EdgeDomain domain() { + return urlDetails.getFirst().getUrl().domain; + } + + public UrlDetails first() { + return urlDetails.getFirst(); + } + + public static List groupResults(List details) { + return details.stream() + .sorted(Comparator.comparing(d -> d.termScore)) + .collect(Collectors.groupingBy(d -> d.getUrl().domain)) + .values().stream().map(GroupedUrlDetails::new) + .toList(); + } +} diff --git a/code/services-application/search-service/java/nu/marginalia/search/model/NavbarModel.java b/code/services-application/search-service/java/nu/marginalia/search/model/NavbarModel.java new file mode 100644 index 00000000..46aea24e --- /dev/null +++ b/code/services-application/search-service/java/nu/marginalia/search/model/NavbarModel.java @@ -0,0 +1,85 @@ +package nu.marginalia.search.model; + +import java.util.List; + +public record NavbarModel(NavbarGroup first, NavbarGroup second) { + + public record NavbarEntry(String name, String url, boolean active) { } + public record NavbarGroup(List entries) { } + + public static NavbarModel LIMBO = + new NavbarModel( + new NavbarGroup( + List.of( + new NavbarEntry("Search", "/", false), + new NavbarEntry("Domains", "/site", false), + new NavbarEntry("Explore", "/explore", false) + ) + ) + , + new NavbarGroup( + List.of( + new NavbarEntry("About", "/", false), + new NavbarEntry("API", "/", false), + new NavbarEntry("Donate", "/", false) + ) + ) + ); + + public static NavbarModel SEARCH = + new NavbarModel( + new NavbarGroup( + List.of( + new NavbarEntry("Search", "/", true), + new NavbarEntry("Domains", "/site", false), + new NavbarEntry("Explore", "/explore", false) + ) + ) + , + new NavbarGroup( + List.of( + new NavbarEntry("About", "/", false), + new NavbarEntry("API", "/", false), + new NavbarEntry("Donate", "/", false) + ) + ) + ); + + public static NavbarModel SITEINFO = + new NavbarModel( + new NavbarGroup( + List.of( + new NavbarEntry("Search", "/", false), + new NavbarEntry("Domains", "/site", true), + new NavbarEntry("Explore", "/explore", false) + ) + ) + , + new NavbarGroup( + List.of( + new NavbarEntry("About", "/", false), + new NavbarEntry("API", "/", false), + new NavbarEntry("Donate", "/", false) + ) + ) + ); + + public static NavbarModel EXPLORE = + new NavbarModel( + new NavbarGroup( + List.of( + new NavbarEntry("Search", "/", false), + new NavbarEntry("Domains", "/site", false), + new NavbarEntry("Explore", "/explore", true) + ) + ) + , + new NavbarGroup( + List.of( + new NavbarEntry("About", "/", false), + new NavbarEntry("API", "/", false), + new NavbarEntry("Donate", "/", false) + ) + ) + ); +} diff --git a/code/services-application/search-service/java/nu/marginalia/search/model/ResultsPage.java b/code/services-application/search-service/java/nu/marginalia/search/model/ResultsPage.java new file mode 100644 index 00000000..625a03e4 --- /dev/null +++ b/code/services-application/search-service/java/nu/marginalia/search/model/ResultsPage.java @@ -0,0 +1,4 @@ +package nu.marginalia.search.model; + +public record ResultsPage(int number, boolean current, String href) { +} diff --git a/code/services-application/search-service/java/nu/marginalia/search/model/SearchErrorMessageModel.java b/code/services-application/search-service/java/nu/marginalia/search/model/SearchErrorMessageModel.java new file mode 100644 index 00000000..79430fd4 --- /dev/null +++ b/code/services-application/search-service/java/nu/marginalia/search/model/SearchErrorMessageModel.java @@ -0,0 +1,6 @@ +package nu.marginalia.search.model; + +import nu.marginalia.search.command.SearchParameters; + +public record SearchErrorMessageModel(String errorTitle, String errorRest, SearchParameters parameters, SearchFilters filters) { +} diff --git a/code/services-application/search-service/java/nu/marginalia/search/model/SearchFilters.java b/code/services-application/search-service/java/nu/marginalia/search/model/SearchFilters.java index bd46b7fa..207d6404 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/model/SearchFilters.java +++ b/code/services-application/search-service/java/nu/marginalia/search/model/SearchFilters.java @@ -7,8 +7,6 @@ import java.util.List; /** Models the search filters displayed next to the search results */ public class SearchFilters { - private final WebsiteUrl url; - public final String currentFilter; // These are necessary for the renderer to access the data @@ -45,9 +43,27 @@ public class SearchFilters { public List> getFilterGroups() { return filterGroups; } + public List searchOptions() { + return List.of( + searchTitleOption, + showRecentOption, + removeJsOption, + reduceAdtechOption + ); + } - public SearchFilters(WebsiteUrl url, SearchParameters parameters) { - this.url = url; + public SearchFilters(WebsiteUrl url) { + this(new SearchParameters(url, "", + SearchProfile.NO_FILTER, + SearchJsParameter.DEFAULT, + SearchRecentParameter.DEFAULT, + SearchTitleParameter.DEFAULT, + SearchAdtechParameter.DEFAULT, + false, + 1)); + } + + public SearchFilters(SearchParameters parameters) { removeJsOption = new RemoveJsOption(parameters); reduceAdtechOption = new ReduceAdtechOption(parameters); @@ -59,40 +75,48 @@ public class SearchFilters { filterGroups = List.of( List.of( - new Filter("No Filter", SearchProfile.NO_FILTER, parameters), -// new Filter("Popular", SearchProfile.POPULAR, parameters), - new Filter("Small Web", SearchProfile.SMALLWEB, parameters), - new Filter("Blogosphere", SearchProfile.BLOGOSPHERE, parameters), - new Filter("Academia", SearchProfile.ACADEMIA, parameters) + new Filter("All", "fa-globe", SearchProfile.NO_FILTER, parameters), + new Filter("Blogs", "fa-blog", SearchProfile.BLOGOSPHERE, parameters), + new Filter("Academia", "fa-university", SearchProfile.ACADEMIA, parameters) ), List.of( - new Filter("Vintage", SearchProfile.VINTAGE, parameters), - new Filter("Plain Text", SearchProfile.PLAIN_TEXT, parameters), - new Filter("~tilde", SearchProfile.TILDE, parameters) + new Filter("Vintage", "fa-clock-rotate-left", SearchProfile.VINTAGE, parameters), + new Filter("Plain Text", "fa-file", SearchProfile.PLAIN_TEXT, parameters), + new Filter("Tilde", "fa-house", SearchProfile.TILDE, parameters) ), List.of( - new Filter("Wiki", SearchProfile.WIKI, parameters), - new Filter("Forum", SearchProfile.FORUM, parameters), - new Filter("Docs", SearchProfile.DOCS, parameters), - new Filter("Recipes", SearchProfile.FOOD, parameters) + new Filter("Wikis", "fa-pencil", SearchProfile.WIKI, parameters), + new Filter("Forums", "fa-comments", SearchProfile.FORUM, parameters), + new Filter("Recipes", "fa-utensils", SearchProfile.FOOD, parameters) ) ); } - public class RemoveJsOption { + public class RemoveJsOption implements SearchOption { private final SearchJsParameter value; - + private final String icon = "fa-wrench"; public final String url; + + public String value() { + return this.value.name(); + } + public String getUrl() { return url; } - + public String id() { + return getClass().getSimpleName(); + } public boolean isSet() { return value.equals(SearchJsParameter.DENY_JS); } + public String icon() { + return icon; + } + public String name() { return "Remove Javascript"; } @@ -105,14 +129,26 @@ public class SearchFilters { default -> SearchJsParameter.DENY_JS; }; - this.url = parameters.withJs(toggledValue).renderUrl(SearchFilters.this.url); + this.url = parameters.withJs(toggledValue).renderUrl(); } } - public class ReduceAdtechOption { + public class ReduceAdtechOption implements SearchOption { private final SearchAdtechParameter value; - + private final String icon = "fa-dumpster-fire"; public final String url; + + public String value() { + return this.value.name(); + } + + public String id() { + return getClass().getSimpleName(); + } + public String icon() { + return icon; + } + public String getUrl() { return url; } @@ -133,17 +169,28 @@ public class SearchFilters { default -> SearchAdtechParameter.REDUCE; }; - this.url = parameters.withAdtech(toggledValue).renderUrl(SearchFilters.this.url); + this.url = parameters.withAdtech(toggledValue).renderUrl(); } } - public class ShowRecentOption { + public class ShowRecentOption implements SearchOption { private final SearchRecentParameter value; + private final String icon = "fa-baby"; + + public String value() { + return this.value.name(); + } public final String url; public String getUrl() { return url; } + public String id() { + return getClass().getSimpleName(); + } + public String icon() { + return icon; + } public boolean isSet() { return value.equals(SearchRecentParameter.RECENT); @@ -161,14 +208,26 @@ public class SearchFilters { default -> SearchRecentParameter.RECENT; }; - this.url = parameters.withRecent(toggledValue).renderUrl(SearchFilters.this.url); + this.url = parameters.withRecent(toggledValue).renderUrl(); } } - public class SearchTitleOption { + public class SearchTitleOption implements SearchOption { private final SearchTitleParameter value; + public String icon = "fa-angle-up"; public final String url; + + public String value() { + return this.value.name(); + } + + public String id() { + return getClass().getSimpleName(); + } + public String icon() { + return icon; + } public String getUrl() { return url; } @@ -189,23 +248,34 @@ public class SearchFilters { default -> SearchTitleParameter.TITLE; }; - this.url = parameters.withTitle(toggledValue).renderUrl(SearchFilters.this.url); + this.url = parameters.withTitle(toggledValue).renderUrl(); } } + public interface SearchOption { + String name(); + boolean isSet(); + String getUrl(); + String icon(); + String id(); + String value(); + } + public class Filter { + public final String icon; public final SearchProfile profile; public final String displayName; public final boolean current; public final String url; - public Filter(String displayName, SearchProfile profile, SearchParameters parameters) { + public Filter(String displayName, String icon, SearchProfile profile, SearchParameters parameters) { this.displayName = displayName; + this.icon = icon; this.profile = profile; this.current = profile.equals(parameters.profile()); - this.url = parameters.withProfile(profile).renderUrl(SearchFilters.this.url); + this.url = parameters.withProfile(profile).renderUrl(); } public String getDisplayName() { diff --git a/code/services-application/search-service/java/nu/marginalia/search/model/SearchProfile.java b/code/services-application/search-service/java/nu/marginalia/search/model/SearchProfile.java index 955c3fcb..316663bf 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/model/SearchProfile.java +++ b/code/services-application/search-service/java/nu/marginalia/search/model/SearchProfile.java @@ -1,9 +1,9 @@ package nu.marginalia.search.model; -import nu.marginalia.index.query.limit.SpecificationLimit; -import nu.marginalia.model.crawl.HtmlFeature; import nu.marginalia.api.searchquery.model.query.SearchQuery; import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier; +import nu.marginalia.index.query.limit.SpecificationLimit; +import nu.marginalia.model.crawl.HtmlFeature; import java.util.Objects; diff --git a/code/services-application/search-service/java/nu/marginalia/search/model/SimpleSearchResults.java b/code/services-application/search-service/java/nu/marginalia/search/model/SimpleSearchResults.java new file mode 100644 index 00000000..2634b9be --- /dev/null +++ b/code/services-application/search-service/java/nu/marginalia/search/model/SimpleSearchResults.java @@ -0,0 +1,13 @@ +package nu.marginalia.search.model; + +import java.util.List; + +public class SimpleSearchResults { + public final List results; + public final List resultPages; + + public SimpleSearchResults(List results, List resultPages) { + this.results = results; + this.resultPages = resultPages; + } +} diff --git a/code/services-application/search-service/java/nu/marginalia/search/model/UrlDetails.java b/code/services-application/search-service/java/nu/marginalia/search/model/UrlDetails.java index 1426f9dc..68500c4f 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/model/UrlDetails.java +++ b/code/services-application/search-service/java/nu/marginalia/search/model/UrlDetails.java @@ -5,6 +5,7 @@ import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore; import nu.marginalia.model.EdgeUrl; import nu.marginalia.model.crawl.DomainIndexingState; import nu.marginalia.model.crawl.HtmlFeature; +import nu.marginalia.model.idx.DocumentMetadata; import java.util.ArrayList; import java.util.List; @@ -29,12 +30,14 @@ public class UrlDetails implements Comparable { public int resultsFromSameDomain; + public int topology; public String positions; + public long positionsMask; public int positionsCount; public SearchResultItem resultItem; public List keywordScores; - public UrlDetails(long id, int domainId, EdgeUrl url, String title, String description, String format, int features, DomainIndexingState domainState, double termScore, int resultsFromSameDomain, String positions, int positionsCount, SearchResultItem resultItem, List keywordScores) { + public UrlDetails(long id, int domainId, EdgeUrl url, String title, String description, String format, int features, DomainIndexingState domainState, double termScore, int resultsFromSameDomain, String positions, long positionsMask, int positionsCount, SearchResultItem resultItem, List keywordScores) { this.id = id; this.domainId = domainId; this.url = url; @@ -47,6 +50,8 @@ public class UrlDetails implements Comparable { this.resultsFromSameDomain = resultsFromSameDomain; this.positions = positions; this.positionsCount = positionsCount; + this.positionsMask = positionsMask; + this.topology = DocumentMetadata.decodeTopology(resultItem.encodedDocMetadata); this.resultItem = resultItem; this.keywordScores = keywordScores; } @@ -227,67 +232,11 @@ public class UrlDetails implements Comparable { return this.keywordScores; } - public UrlDetails withId(long id) { - return this.id == id ? this : new UrlDetails(id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores); - } - - public UrlDetails withDomainId(int domainId) { - return this.domainId == domainId ? this : new UrlDetails(this.id, domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores); - } - - public UrlDetails withUrl(EdgeUrl url) { - return this.url == url ? this : new UrlDetails(this.id, this.domainId, url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores); - } - - public UrlDetails withTitle(String title) { - return this.title == title ? this : new UrlDetails(this.id, this.domainId, this.url, title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores); - } - - public UrlDetails withDescription(String description) { - return this.description == description ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores); - } - - public UrlDetails withFormat(String format) { - return this.format == format ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores); - } - - public UrlDetails withFeatures(int features) { - return this.features == features ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores); - } - - public UrlDetails withDomainState(DomainIndexingState domainState) { - return this.domainState == domainState ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores); - } - - public UrlDetails withTermScore(double termScore) { - return this.termScore == termScore ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores); - } - - public UrlDetails withResultsFromSameDomain(int resultsFromSameDomain) { - return this.resultsFromSameDomain == resultsFromSameDomain ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores); - } - - public UrlDetails withPositions(String positions) { - return this.positions == positions ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, positions, this.positionsCount, this.resultItem, this.keywordScores); - } - - public UrlDetails withPositionsCount(int positionsCount) { - return this.positionsCount == positionsCount ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, positionsCount, this.resultItem, this.keywordScores); - } - - public UrlDetails withResultItem(SearchResultItem resultItem) { - return this.resultItem == resultItem ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, resultItem, this.keywordScores); - } - - public UrlDetails withKeywordScores(List keywordScores) { - return this.keywordScores == keywordScores ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, keywordScores); - } - public String toString() { return "UrlDetails(id=" + this.getId() + ", domainId=" + this.getDomainId() + ", url=" + this.getUrl() + ", title=" + this.getTitle() + ", description=" + this.getDescription() + ", format=" + this.getFormat() + ", features=" + this.getFeatures() + ", domainState=" + this.getDomainState() + ", termScore=" + this.getTermScore() + ", resultsFromSameDomain=" + this.getResultsFromSameDomain() + ", positions=" + this.getPositions() + ", positionsCount=" + this.getPositionsCount() + ", resultItem=" + this.getResultItem() + ", keywordScores=" + this.getKeywordScores() + ")"; } - public static record UrlProblem(String name, String description) { + public record UrlProblem(String name, String description) { } } diff --git a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchBrowseService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchBrowseService.java index 11c2e0e8..ceab6b7b 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchBrowseService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchBrowseService.java @@ -9,12 +9,14 @@ import nu.marginalia.browse.model.BrowseResultSet; import nu.marginalia.db.DbDomainQueries; import nu.marginalia.db.DomainBlacklist; import nu.marginalia.model.EdgeDomain; +import nu.marginalia.search.JteRenderer; +import nu.marginalia.search.model.NavbarModel; import nu.marginalia.search.results.BrowseResultCleaner; +import spark.Request; +import spark.Response; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import java.io.IOException; +import java.util.*; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; @@ -26,6 +28,7 @@ public class SearchBrowseService { private final DbDomainQueries domainQueries; private final DomainBlacklist blacklist; private final DomainInfoClient domainInfoClient; + private final JteRenderer jteRenderer; private final BrowseResultCleaner browseResultCleaner; @Inject @@ -33,15 +36,43 @@ public class SearchBrowseService { DbDomainQueries domainQueries, DomainBlacklist blacklist, DomainInfoClient domainInfoClient, + JteRenderer jteRenderer, BrowseResultCleaner browseResultCleaner) { this.randomDomains = randomDomains; this.domainQueries = domainQueries; this.blacklist = blacklist; this.domainInfoClient = domainInfoClient; + this.jteRenderer = jteRenderer; this.browseResultCleaner = browseResultCleaner; } + public String handleBrowseRandom(Request request, Response response) throws IOException { + return jteRenderer.render("explore/main.jte", + Map.of("navbar", NavbarModel.EXPLORE, + "results", getRandomEntries(1) + ) + ); + } + + public String handleBrowseSite(Request request, Response response) throws Exception { + String domainName = request.params("site"); + BrowseResultSet entries; + + try { + entries = getRelatedEntries(domainName); + } + catch (Exception ex) { + entries = new BrowseResultSet(List.of(), domainName); + } + + return jteRenderer.render("explore/main.jte", + Map.of("navbar", NavbarModel.EXPLORE, + "results", entries + ) + ); + } + public BrowseResultSet getRandomEntries(int set) { List results = randomDomains.getRandomDomains(25, blacklist, set); diff --git a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchCrosstalkService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchCrosstalkService.java index ddce56ac..968f5927 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchCrosstalkService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchCrosstalkService.java @@ -4,6 +4,7 @@ import com.google.inject.Inject; import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; import nu.marginalia.search.SearchOperator; +import nu.marginalia.search.model.SimpleSearchResults; import nu.marginalia.search.model.UrlDetails; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -42,10 +43,10 @@ public class SearchCrosstalkService { parts[i] = parts[i].trim(); } - var resAtoB = searchOperator.doLinkSearch(parts[0], parts[1]); - var resBtoA = searchOperator.doLinkSearch(parts[1], parts[0]); + SimpleSearchResults resAtoB = searchOperator.doLinkSearch(parts[0], parts[1]); + SimpleSearchResults resBtoA = searchOperator.doLinkSearch(parts[1], parts[0]); - var model = new CrosstalkResult(parts[0], parts[1], resAtoB, resBtoA); + CrosstalkResult model = new CrosstalkResult(parts[0], parts[1], resAtoB.results, resBtoA.results); return renderer.render(model); } diff --git a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchErrorPageService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchErrorPageService.java index 346506e7..9ca9a391 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchErrorPageService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchErrorPageService.java @@ -1,9 +1,12 @@ package nu.marginalia.search.svc; import com.google.inject.Inject; -import nu.marginalia.index.api.IndexMqClient; -import nu.marginalia.renderer.MustacheRenderer; -import nu.marginalia.renderer.RendererFactory; +import nu.marginalia.WebsiteUrl; +import nu.marginalia.search.JteRenderer; +import nu.marginalia.search.command.SearchParameters; +import nu.marginalia.search.model.NavbarModel; +import nu.marginalia.search.model.SearchErrorMessageModel; +import nu.marginalia.search.model.SearchFilters; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import spark.Request; @@ -13,35 +16,39 @@ import java.io.IOException; import java.util.Map; public class SearchErrorPageService { - private final IndexMqClient indexMqClient; + private final WebsiteUrl websiteUrl; + private final JteRenderer jteRenderer; private final Logger logger = LoggerFactory.getLogger(getClass()); - private final MustacheRenderer renderer; @Inject - public SearchErrorPageService(IndexMqClient indexMqClient, - RendererFactory rendererFactory) throws IOException { - - renderer = rendererFactory.renderer("search/error-page-search"); - - this.indexMqClient = indexMqClient; + public SearchErrorPageService(WebsiteUrl websiteUrl, + JteRenderer jteRenderer) throws IOException { + this.websiteUrl = websiteUrl; + this.jteRenderer = jteRenderer; } public void serveError(Request request, Response rsp) { - rsp.body(renderError(request, "Internal error", - """ - An error occurred when communicating with the search engine index. -

- This is hopefully a temporary state of affairs. It may be due to - an upgrade. The index typically takes a about two or three minutes - to reload from a cold restart. Thanks for your patience. - """)); - } - private String renderError(Request request, String title, String message) { - return renderer.render(Map.of("title", title, "message", message, - "profile", request.queryParamOrDefault("profile", ""), - "js", request.queryParamOrDefault("js", ""), - "query", request.queryParamOrDefault("query", "") + var params = SearchParameters.forRequest( + request.queryParamOrDefault("query", ""), + websiteUrl, + request); + + + rsp.body(jteRenderer.render("serp/error.jte", + Map.of("navbar", NavbarModel.LIMBO, + "model", new SearchErrorMessageModel( + "An error occurred when communicating with the search engine index.", + """ + This is hopefully a temporary state of affairs. It may be due to + an upgrade. The index typically takes a about two or three minutes + to reload from a cold restart. Thanks for your patience. + """, + params, + new SearchFilters(params) + ) + ) )); } + } diff --git a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchFlagSiteService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchFlagSiteService.java index c7ccfa34..e04381af 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchFlagSiteService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchFlagSiteService.java @@ -19,7 +19,7 @@ public class SearchFlagSiteService { private final CategoryItem unknownCategory = new CategoryItem("unknown", "Unknown"); - private final List categories = + public static final List categories = List.of( new CategoryItem("spam", "Spam"), new CategoryItem("freebooting", "Reposting Stolen Content"), diff --git a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchFrontPageService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchFrontPageService.java index 8ebd9f8f..1c606441 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchFrontPageService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchFrontPageService.java @@ -3,9 +3,10 @@ package nu.marginalia.search.svc; import com.google.inject.Inject; import com.google.inject.Singleton; import com.zaxxer.hikari.HikariDataSource; -import nu.marginalia.renderer.MustacheRenderer; +import nu.marginalia.WebsiteUrl; import nu.marginalia.renderer.RendererFactory; -import nu.marginalia.search.svc.SearchQueryCountService; +import nu.marginalia.search.JteRenderer; +import nu.marginalia.search.model.NavbarModel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import spark.Request; @@ -19,34 +20,41 @@ import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.List; +import java.util.Map; /** Renders the front page (index) */ @Singleton public class SearchFrontPageService { - private final MustacheRenderer template; private final HikariDataSource dataSource; + private final JteRenderer jteRenderer; private final SearchQueryCountService searchVisitorCount; + private final WebsiteUrl websiteUrl; private final Logger logger = LoggerFactory.getLogger(getClass()); @Inject public SearchFrontPageService(RendererFactory rendererFactory, HikariDataSource dataSource, - SearchQueryCountService searchVisitorCount - ) throws IOException { - this.template = rendererFactory.renderer("search/index/index"); + JteRenderer jteRenderer, + SearchQueryCountService searchVisitorCount, WebsiteUrl websiteUrl + ) throws IOException { this.dataSource = dataSource; + this.jteRenderer = jteRenderer; this.searchVisitorCount = searchVisitorCount; + this.websiteUrl = websiteUrl; } public String render(Request request, Response response) { response.header("Cache-control", "public,max-age=3600"); - return template.render(new IndexModel( - getNewsItems(), - searchVisitorCount.getQueriesPerMinute() - )); + return jteRenderer.render("serp/first.jte", + Map.of("navbar", NavbarModel.SEARCH, "websiteUrl", websiteUrl) + ); +// return template.render(new IndexModel( +// getNewsItems(), +// searchVisitorCount.getQueriesPerMinute() +// )); } diff --git a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryService.java index 0f4648da..5fd6a5b9 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchQueryService.java @@ -50,7 +50,7 @@ public class SearchQueryService { throw new RedirectException(websiteUrl.url()); } - return new SearchParameters(queryParam.trim(), request); + return SearchParameters.forRequest(queryParam.trim(), websiteUrl, request); } catch (Exception ex) { // Bots keep sending bad requests, suppress the error otherwise it will diff --git a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchSiteInfoService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchSiteInfoService.java index f56c3b79..e3395e9b 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchSiteInfoService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchSiteInfoService.java @@ -1,6 +1,7 @@ package nu.marginalia.search.svc; import com.google.inject.Inject; +import com.zaxxer.hikari.HikariDataSource; import nu.marginalia.api.domains.DomainInfoClient; import nu.marginalia.api.domains.model.DomainInformation; import nu.marginalia.api.domains.model.SimilarDomain; @@ -13,7 +14,11 @@ import nu.marginalia.model.EdgeDomain; import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; import nu.marginalia.screenshot.ScreenshotService; +import nu.marginalia.search.JteRenderer; import nu.marginalia.search.SearchOperator; +import nu.marginalia.search.model.GroupedUrlDetails; +import nu.marginalia.search.model.NavbarModel; +import nu.marginalia.search.model.ResultsPage; import nu.marginalia.search.model.UrlDetails; import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData; import org.slf4j.Logger; @@ -23,8 +28,7 @@ import spark.Response; import java.io.IOException; import java.sql.SQLException; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; @@ -42,6 +46,9 @@ public class SearchSiteInfoService { private final LiveCaptureClient liveCaptureClient; private final ScreenshotService screenshotService; + private final HikariDataSource dataSource; + private final JteRenderer jteRenderer; + @Inject public SearchSiteInfoService(SearchOperator searchOperator, DomainInfoClient domainInfoClient, @@ -50,7 +57,9 @@ public class SearchSiteInfoService { DbDomainQueries domainQueries, FeedsClient feedsClient, LiveCaptureClient liveCaptureClient, - ScreenshotService screenshotService) throws IOException + ScreenshotService screenshotService, + HikariDataSource dataSource, + JteRenderer jteRenderer) throws IOException { this.searchOperator = searchOperator; this.domainInfoClient = domainInfoClient; @@ -62,6 +71,32 @@ public class SearchSiteInfoService { this.feedsClient = feedsClient; this.liveCaptureClient = liveCaptureClient; this.screenshotService = screenshotService; + this.dataSource = dataSource; + this.jteRenderer = jteRenderer; + } + + public Object handleOverview(Request request, Response response) { + List domains = new ArrayList<>(); + + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, DISCOVER_DATE FROM EC_DOMAIN WHERE NODE_AFFINITY = 0 ORDER BY ID DESC LIMIT 10")) { + + var rs = stmt.executeQuery(); + while (rs.next()) { + domains.add(new SiteOverviewModel.DiscoveredDomain(rs.getString("DOMAIN_NAME"), rs.getString("DISCOVER_DATE"))); + } + } + catch (SQLException ex) { + throw new RuntimeException(); + } + + return jteRenderer.render("siteinfo/start.jte", + Map.of("navbar", NavbarModel.SITEINFO, + "model", new SiteOverviewModel(domains))); + } + + public record SiteOverviewModel(List domains) { + public record DiscoveredDomain(String name, String timestamp) {} } public Object handle(Request request, Response response) throws SQLException { @@ -72,15 +107,18 @@ public class SearchSiteInfoService { return null; } - var model = switch (view) { - case "links" -> listLinks(domainName); - case "docs" -> listDocs(domainName); + int page = Integer.parseInt(request.queryParamOrDefault("page", "1")); + + SiteInfoModel model = switch (view) { + case "links" -> listLinks(domainName, page); + case "docs" -> listDocs(domainName, page); case "info" -> listInfo(domainName); case "report" -> reportSite(domainName); default -> listInfo(domainName); }; - return renderer.render(model); + return jteRenderer.render("siteinfo/main.jte", + Map.of("model", model, "navbar", NavbarModel.SITEINFO)); } public Object handlePost(Request request, Response response) throws SQLException { @@ -108,10 +146,11 @@ public class SearchSiteInfoService { var model = new ReportDomain(domainName, domainId, complaints, List.of(), true); - return renderer.render(model); + return jteRenderer.render("siteinfo/main.jte", + Map.of("model", model, "navbar", NavbarModel.SITEINFO)); } - private Object reportSite(String domainName) throws SQLException { + private ReportDomain reportSite(String domainName) throws SQLException { int domainId = domainQueries.getDomainId(new EdgeDomain(domainName)); var existingComplaints = flagSiteService.getExistingComplaints(domainId); @@ -123,15 +162,20 @@ public class SearchSiteInfoService { } - private Backlinks listLinks(String domainName) { + private Backlinks listLinks(String domainName, int page) { + var results = searchOperator.doBacklinkSearch(domainName, page); return new Backlinks(domainName, domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1), - searchOperator.doBacklinkSearch(domainName)); + GroupedUrlDetails.groupResults(results.results), + results.resultPages + ); } private SiteInfoWithContext listInfo(String domainName) { - final int domainId = domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1); + var domain = new EdgeDomain(domainName); + final int domainId = domainQueries.tryGetDomainId(domain).orElse(-1); + boolean viableAliasDomain = domain.aliasDomain().map(alias -> domainQueries.tryGetDomainId(alias).isPresent()).orElse(false); final Future domainInfoFuture; final Future> similarSetFuture; @@ -161,12 +205,13 @@ public class SearchSiteInfoService { feedItemsFuture = feedsClient.getFeed(domainId); } - List sampleResults = searchOperator.doSiteSearch(domainName, domainId,5); + List sampleResults = searchOperator.doSiteSearch(domainName, domainId,5, 1).results; if (!sampleResults.isEmpty()) { url = sampleResults.getFirst().url.withPathAndParam("/", null).toString(); } var result = new SiteInfoWithContext(domainName, + viableAliasDomain ? domain.aliasDomain().map(EdgeDomain::toString) : Optional.empty(), domainId, url, hasScreenshot, @@ -240,20 +285,21 @@ public class SearchSiteInfoService { .build(); } - private Docs listDocs(String domainName) { + private Docs listDocs(String domainName, int page) { int domainId = domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1); + var results = searchOperator.doSiteSearch(domainName, domainId, 100, page); + return new Docs(domainName, domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1), - searchOperator.doSiteSearch(domainName, domainId, 100)); + results.results.stream().sorted(Comparator.comparing(deets -> -deets.topology)).toList(), + results.resultPages + ); } - public record Docs(Map view, - String domain, + public record Docs(String domain, long domainId, - List results) { - public Docs(String domain, long domainId, List results) { - this(Map.of("docs", true), domain, domainId, results); - } + List results, + List pages) implements SiteInfoModel { public String focusDomain() { return domain; } @@ -264,11 +310,12 @@ public class SearchSiteInfoService { } } - public record Backlinks(Map view, String domain, long domainId, List results) { - public Backlinks(String domain, long domainId, List results) { - this(Map.of("links", true), domain, domainId, results); - } - + public record Backlinks(String domain, + long domainId, + List results, + List pages + ) implements SiteInfoModel + { public String query() { return "links:" + domain; } public boolean isKnown() { @@ -276,9 +323,12 @@ public class SearchSiteInfoService { } } - public record SiteInfoWithContext(Map view, - Map domainState, - String domain, + public interface SiteInfoModel { + String domain(); + } + + public record SiteInfoWithContext(String domain, + Optional aliasDomain, int domainId, String siteUrl, boolean hasScreenshot, @@ -286,68 +336,12 @@ public class SearchSiteInfoService { List similar, List linking, FeedItems feed, - List samples - ) { - public SiteInfoWithContext(String domain, - int domainId, - String siteUrl, - boolean hasScreenshot, - DomainInformation domainInformation, - List similar, - List linking, - FeedItems feedInfo, - List samples - ) - { - this(Map.of("info", true), - Map.of(domainInfoState(domainInformation), true), - domain, - domainId, - siteUrl, - hasScreenshot, - domainInformation, - similar, - linking, - feedInfo, - samples); - } - - public String getLayout() { - // My CSS is too weak to handle this in CSS alone, so I guess we're doing layout in Java... - if (similar != null && similar.size() < 25) { - return "lopsided"; - } - else if (feed != null && !feed.items().isEmpty()) { - return "lopsided"; - } - else if (samples != null && !samples.isEmpty()) { - return "lopsided"; - } - else { - return "balanced"; - } - } + List samples) + implements SiteInfoModel + { public String query() { return "site:" + domain; } - private static String domainInfoState(DomainInformation info) { - if (info.isBlacklisted()) { - return "blacklisted"; - } - if (!info.isUnknownDomain() && info.isSuggestForCrawling()) { - return "suggestForCrawling"; - } - if (info.isInCrawlQueue()) { - return "inCrawlQueue"; - } - if (info.isUnknownDomain()) { - return "unknownDomain"; - } - else { - return "indexed"; - } - } - public boolean isKnown() { return domainId > 0; } @@ -391,21 +385,12 @@ public class SearchSiteInfoService { } public record ReportDomain( - Map view, String domain, int domainId, List complaints, List category, - boolean submitted) + boolean submitted) implements SiteInfoModel { - public ReportDomain(String domain, - int domainId, - List complaints, - List category, - boolean submitted) { - this(Map.of("report", true), domain, domainId, complaints, category, submitted); - } - public String query() { return "site:" + domain; } public boolean isKnown() { diff --git a/code/services-application/search-service/resources/jte/.jteroot b/code/services-application/search-service/resources/jte/.jteroot new file mode 100644 index 00000000..e69de29b diff --git a/code/services-application/search-service/resources/jte/explore/main.jte b/code/services-application/search-service/resources/jte/explore/main.jte new file mode 100644 index 00000000..21e14de7 --- /dev/null +++ b/code/services-application/search-service/resources/jte/explore/main.jte @@ -0,0 +1,58 @@ +@import nu.marginalia.browse.model.BrowseResult +@import nu.marginalia.search.model.NavbarModel +@import nu.marginalia.browse.model.BrowseResultSet + +@param NavbarModel navbar +@param BrowseResultSet results + + + + +@template.part.head(title = "Marginalia Search - Explore") + + + +@template.part.navbar(navbar = navbar) + +

+
+
+

Explore Websites

+
+
+
+ +
+
+ @if (results.hasFocusDomain()) +
+ Showing websites similar to ${results.focusDomain()} + + +
+ @else + Showing a random selection of websites. Refresh the website to get a new selection. + @endif +
+
+ @for (BrowseResult result : results.results()) +
+
+ ${result.displayDomain()} +
+ + +
+ + + +
+ @endfor +
+
+ + +@template.part.footerLegal() + + + \ No newline at end of file diff --git a/code/services-application/search-service/resources/jte/part/footerLegal.jte b/code/services-application/search-service/resources/jte/part/footerLegal.jte new file mode 100644 index 00000000..07539611 --- /dev/null +++ b/code/services-application/search-service/resources/jte/part/footerLegal.jte @@ -0,0 +1,36 @@ +
+
+

Policies

+ + This website complies with the GDPR by not collecting any personal information, + and with the EU Cookie Directive by not using cookies for any purpose other than + to provide service functionality. + + + Access logs containing IP-addresses are retained for up to 24 hours, + anonymized logs with source addresses removed are sometimes kept longer + for to help diagnosing bugs. + +
+
+

Contact

+ + You can reach the webmaster of the search engine at kontakt@marginalia.nu. + +

Sources

+ + The search engine is open source with an AGPL license. The sources can be perused at + https://git.marginalia.nu/. + +
+
+

Data

+ + IP geolocation is sourced from the IP2Location LITE data available from + https://lite.ip2location.com/ + under + CC-BY-SA 4.0. + +
+ +
\ No newline at end of file diff --git a/code/services-application/search-service/resources/jte/part/head.jte b/code/services-application/search-service/resources/jte/part/head.jte new file mode 100644 index 00000000..91c72576 --- /dev/null +++ b/code/services-application/search-service/resources/jte/part/head.jte @@ -0,0 +1,30 @@ +@param String title + + + + + Marginalia Search Engine - ${title} + + + + \ No newline at end of file diff --git a/code/services-application/search-service/resources/jte/part/navbar.jte b/code/services-application/search-service/resources/jte/part/navbar.jte new file mode 100644 index 00000000..976a1fd4 --- /dev/null +++ b/code/services-application/search-service/resources/jte/part/navbar.jte @@ -0,0 +1,33 @@ +@import nu.marginalia.search.model.NavbarModel + +@param NavbarModel navbar + +
+
+ +
+
\ No newline at end of file diff --git a/code/services-application/search-service/resources/jte/serp/error.jte b/code/services-application/search-service/resources/jte/serp/error.jte new file mode 100644 index 00000000..11cf15cd --- /dev/null +++ b/code/services-application/search-service/resources/jte/serp/error.jte @@ -0,0 +1,48 @@ +@import nu.marginalia.search.model.NavbarModel +@import nu.marginalia.search.model.SearchErrorMessageModel + +@param SearchErrorMessageModel model +@param NavbarModel navbar + + + + +@template.part.head(title = "Marginalia Search - Error") + + + +@template.part.navbar(navbar = navbar) + +
+
+
+

Marginalia Search

+ @template.serp.part.searchform(query = model.parameters().query(), profile = model.parameters().profileStr(), filters = model.filters()) +
+
+
+ +
+ + +
+
+
+ +
${model.errorTitle()}
+
+ @if (!model.errorRest().isBlank()) +

+ ${model.errorRest()} +

+ @endif +
+ +
+ +
+ +@template.part.footerLegal() + + + \ No newline at end of file diff --git a/code/services-application/search-service/resources/jte/serp/first.jte b/code/services-application/search-service/resources/jte/serp/first.jte new file mode 100644 index 00000000..462610f6 --- /dev/null +++ b/code/services-application/search-service/resources/jte/serp/first.jte @@ -0,0 +1,69 @@ +@import nu.marginalia.WebsiteUrl +@import nu.marginalia.search.model.NavbarModel +@import nu.marginalia.search.model.SearchFilters +@import nu.marginalia.search.model.SearchProfile + +@param NavbarModel navbar +@param WebsiteUrl websiteUrl + + + +@template.part.head(title = "Marginalia Search") + + + +@template.part.navbar(navbar = navbar) + +
+
+
+

Marginalia Search

+ @template.serp.part.searchform(query = "", profile = SearchProfile.NO_FILTER.filterId, filters = new SearchFilters(websiteUrl)) +
+
+
+ +
+
+
+
Explore the Web
+
    +
  • Prioritizes non-commercial content
  • +
  • Tools for both search and discovery
  • +
  • Find lost old websites
  • +
+
+ +
+
Open Source
+
    +
  • AGPL license
  • +
  • Custom index software
  • +
  • Custom crawler software
  • +
+ +
+ +
+
Privacy by default
+
    +
  • Filter out tracking and adtech
  • +
  • No user or search data shared with 3rd parties
  • +
  • No long-term retention of queries or IP addresses
  • +
+ +
+
+ +
+ +@template.part.footerLegal() + + + diff --git a/code/services-application/search-service/resources/jte/serp/main.jte b/code/services-application/search-service/resources/jte/serp/main.jte new file mode 100644 index 00000000..b35cf312 --- /dev/null +++ b/code/services-application/search-service/resources/jte/serp/main.jte @@ -0,0 +1,91 @@ +@import nu.marginalia.search.model.DecoratedSearchResults +@import nu.marginalia.search.model.ClusteredUrlDetails +@import nu.marginalia.search.model.NavbarModel +@import nu.marginalia.search.model.ResultsPage + +@param DecoratedSearchResults results +@param NavbarModel navbar + + + + +@template.part.head(title = "Marginalia Search - " + results.getQuery()) + + +@template.part.navbar(navbar = navbar) + +
+
+
+
+

Marginalia Search

+
+ @template.serp.part.searchform(query = results.getParams().query(), profile = results.getProfile(), filters = results.getFilters()) +
+
+
+ +
+
+
+
+ +
+ + @template.serp.part.sidebar(filters = results.getFilters()) + + +
+ + @if (results.hasFocusDomain()) +
+
Showing results from ${results.getFocusDomain()}
+
+ +
+ @endif + +
+ + @for(ClusteredUrlDetails details : results.results) + @template.serp.part.result(result = details, parameters = results.getParams(), domainSearch = results.hasFocusDomain()) + @endfor +
+ + + @if (results.getResultPages().size() > 1) +
+ @for(ResultsPage page : results.getResultPages()) + @if (page.current()) + ${page.number()} + @else + ${page.number()} + @endif + @endfor +
+ @endif +
+
+ +
+ +@template.serp.part.footerHowto() + +@template.part.footerLegal() + +<%-- Put this last to not bother SR users with double menus --%> +@template.serp.part.mobile-menu(filters = results.getFilters()) + + + + \ No newline at end of file diff --git a/code/services-application/search-service/resources/jte/serp/part/footerHowto.jte b/code/services-application/search-service/resources/jte/serp/part/footerHowto.jte new file mode 100644 index 00000000..865de966 --- /dev/null +++ b/code/services-application/search-service/resources/jte/serp/part/footerHowto.jte @@ -0,0 +1,91 @@ +
+
+

Syntax

+

This is a keyword-based search engine. When entering multiple search terms, the search engine will attempt to match them against documents where the terms occur in close proximity.

+

Search terms can be excluded with a hyphen.

+

While the search engine at present does not allow full text search, quotes can be used to specifically search for names or terms in the title. Using quotes will also cause the search engine to be as literal as possible in interpreting the query.

+

Parentheses can be used to add terms to the query without giving weight to the terms when ranking the search results.

+ +

Samples

+
+
soup -chicken
+
Look for keywords that contain soup, but not + chicken.
+
"keyboard"
+
Look for pages containing the exact word + keyboard, not keyboards or the like.
+
"steve mcqueen"
+
Look for pages containing the exact words steve mcqueen + in that order, with no words in between.
+
apology (plato)
+
Look for pages containing apology and plato, but only rank them + based on their relevance to apology
+
+

Language Limitations

+

The search engine currently does not support any languages other than English.

+

Support for other languages is planned, + but not available right now. Adding support for additional languages and making it work well is somewhat time-consuming, + meanwhile having bad support for a language won't make anyone happy. +

+

Webmaster Information

+

If you wish to add your website to the index, follow the instructions in this git repository, + if you do not want to mess with git, you can also email kontakt@marginalia.nu with the domain name.

+

The search engine's crawler uses the user-agent string search.marginalia.nu, and requests come from the IPs indicated in + https://search.marginalia.nu/crawler-ips.txt.

+

If you do not want your website to be crawled, the search engine respects robots.txt. In case of questions, bug reports or concerns, email kontakt@marginalia.nu. +

+
+
+

Special Keywords

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeywordMeaning
site:example.comDisplay site information about example.com
site:example.com keywordSearch example.com for keyword
browse:example.comShow similar websites to example.com
ip:127.0.0.1Search documents hosted at 127.0.0.1
links:example.comSearch documents linking to example.com
tld:edu keywordSearch documents with the top level domain edu.
?tld:edu keywordPrefer but do not require results with the top level domain edu. + This syntax is also possible for links:..., ip:... and site:...
q>5The amount of javascript and modern features is at least 5 (on a scale 0 to 25)
q<5The amount of javascript and modern features is at most 5 (on a scale 0 to 25)
year>2005(beta) The document was ostensibly published in or after 2005
year=2005(beta) The document was ostensibly published in 2005
year<2005(beta) The document was ostensibly published in or before 2005
rank>50The ranking of the website is at least 50 in a span of 1 - 255
rank<50The ranking of the website is at most 50 in a span of 1 - 255
count>10 The search term must appear in at least 10 results form the domain
count<10 The search term must appear in at most 10 results from the domain
format:html5Filter documents using the HTML5 standard. This is typically modern websites.
format:xhtmlFilter documents using the XHTML standard
format:html123Filter documents using the HTML standards 1, 2, and 3. This is typically very old websites.
generator:wordpressFilter documents with the specified generator, in this case wordpress
file:zipFilter documents containing a link to a zip file (most file-endings work)
file:audioFilter documents containing a link to an audio file
file:videoFilter documents containing a link to a video file
file:archiveFilter documents containing a link to a compressed archive
file:documentFilter documents containing a link to a document
-special:mediaFilter out documents with audio or video tags
-special:scriptsFilter out documents with javascript
-special:affiliateFilter out documents with likely Amazon affiliate links
-special:trackingFilter out documents with analytics or tracking code
-special:cookiesFilter out documents with cookies
+
+ +
\ No newline at end of file diff --git a/code/services-application/search-service/resources/jte/serp/part/matchogram.jte b/code/services-application/search-service/resources/jte/serp/part/matchogram.jte new file mode 100644 index 00000000..423e115a --- /dev/null +++ b/code/services-application/search-service/resources/jte/serp/part/matchogram.jte @@ -0,0 +1,24 @@ +@import java.util.stream.IntStream + +@param long mask + + + +@for (int bit : IntStream.range(0, 56).filter(bit -> (mask & (1L << bit)) != 0).toArray()) + +@endfor + \ No newline at end of file diff --git a/code/services-application/search-service/resources/jte/serp/part/mobile-menu.jte b/code/services-application/search-service/resources/jte/serp/part/mobile-menu.jte new file mode 100644 index 00000000..7f929a46 --- /dev/null +++ b/code/services-application/search-service/resources/jte/serp/part/mobile-menu.jte @@ -0,0 +1,50 @@ +@import nu.marginalia.search.model.SearchFilters +@import java.util.List + +@param SearchFilters filters + + + \ No newline at end of file diff --git a/code/services-application/search-service/resources/jte/serp/part/result.jte b/code/services-application/search-service/resources/jte/serp/part/result.jte new file mode 100644 index 00000000..30cccb0d --- /dev/null +++ b/code/services-application/search-service/resources/jte/serp/part/result.jte @@ -0,0 +1,108 @@ +@import nu.marginalia.model.idx.DocumentFlags +@import nu.marginalia.search.command.SearchParameters +@import nu.marginalia.search.model.ClusteredUrlDetails +@import nu.marginalia.search.model.UrlDetails + +@param ClusteredUrlDetails result +@param SearchParameters parameters +@param boolean domainSearch + +
+
+
+
+
+
+ @template.serp.part.matchogram(mask = result.first.positionsMask) +
+ +
+ + +

+ ${result.first.description} +

+ +
+ @if (!domainSearch) +
+ + + +
+ + +
+ @endif +
+ @if (result.hasMultiple() && !domainSearch) +
+

Also from ${result.getDomain().toString()}:

+ +
    + @for(UrlDetails item : result.rest) +
  • + ${item.title} +
  • + @endfor +
+
+ @endif + + + @if (!domainSearch && result.remainingCount() > 0) + + ${result.remainingCount()} more + + @endif + +
+ @if (DocumentFlags.PlainText.isPresent(result.getFirst().resultItem.encodedDocMetadata)) + Plain text + @endif + @if (DocumentFlags.GeneratorForum.isPresent(result.getFirst().resultItem.encodedDocMetadata)) + Forum + @endif + @if (DocumentFlags.GeneratorWiki.isPresent(result.getFirst().resultItem.encodedDocMetadata)) + Wiki + @endif + + @if(result.getFirst().isCookies()) + Cookies + @endif + + @if(result.getFirst().isTracking()) + Track + @endif + + @if(result.getFirst().isScripts()) + JS + @endif + + @if(result.getFirst().isAds()) + Ads + @endif + + @if(result.getFirst().isAffiliate()) + Affiliate + @endif + +
+
+ +
\ No newline at end of file diff --git a/code/services-application/search-service/resources/jte/serp/part/searchform.jte b/code/services-application/search-service/resources/jte/serp/part/searchform.jte new file mode 100644 index 00000000..1f5f15bb --- /dev/null +++ b/code/services-application/search-service/resources/jte/serp/part/searchform.jte @@ -0,0 +1,41 @@ +@import nu.marginalia.search.model.SearchFilters + +@param String query +@param String profile +@param SearchFilters filters + + +
+
+ @if (query.isBlank()) + <%-- Add autofocus if the query is blank --%> + + @else + + @endif + + + +
+ + + + + + + +
diff --git a/code/services-application/search-service/resources/jte/serp/part/sidebar.jte b/code/services-application/search-service/resources/jte/serp/part/sidebar.jte new file mode 100644 index 00000000..3c4cbe4e --- /dev/null +++ b/code/services-application/search-service/resources/jte/serp/part/sidebar.jte @@ -0,0 +1,59 @@ +@import nu.marginalia.search.model.SearchFilters +@import java.util.List + +@param SearchFilters filters + + diff --git a/code/services-application/search-service/resources/jte/siteinfo/main.jte b/code/services-application/search-service/resources/jte/siteinfo/main.jte new file mode 100644 index 00000000..0f39b745 --- /dev/null +++ b/code/services-application/search-service/resources/jte/siteinfo/main.jte @@ -0,0 +1,108 @@ +@import nu.marginalia.search.model.NavbarModel +@import nu.marginalia.search.svc.* + +@param SearchSiteInfoService.SiteInfoModel model +@param NavbarModel navbar + + + + +@template.part.head(title = "Marginalia Search - " + model.domain()) + + + +@template.part.navbar(navbar = navbar) + +
+
+
+

Site Information

+ + + +
+
+ +
+ +
+ + @if (model instanceof SearchSiteInfoService.SiteInfoWithContext siteInfo) + @template.siteinfo.view.overview(siteInfo = siteInfo) + @elseif (model instanceof SearchSiteInfoService.ReportDomain reportDomain) + @template.siteinfo.view.reportDomain(reportDomain = reportDomain) + @elseif (model instanceof SearchSiteInfoService.Backlinks backlinks) + @template.siteinfo.view.backlinks(backlinks = backlinks) + @elseif (model instanceof SearchSiteInfoService.ReportDomain reportDomain) + @template.siteinfo.view.reportDomain(reportDomain = reportDomain) + @elseif (model instanceof SearchSiteInfoService.Docs docs) + @template.siteinfo.view.docs(docs = docs) + @endif + +
+ + +@template.part.footerLegal() + + + \ No newline at end of file diff --git a/code/services-application/search-service/resources/jte/siteinfo/part/linkedDomains.jte b/code/services-application/search-service/resources/jte/siteinfo/part/linkedDomains.jte new file mode 100644 index 00000000..2f1fabaf --- /dev/null +++ b/code/services-application/search-service/resources/jte/siteinfo/part/linkedDomains.jte @@ -0,0 +1,67 @@ +@import nu.marginalia.api.domains.model.SimilarDomain +@import java.util.List + +@param String title +@param String domainName +@param List list + +@if (!list.isEmpty()) + +
+
+

${title}

+
+ +
+ + + + + + + + + + + @for(SimilarDomain item : list) + + + + + + + + + @endfor + +
LinkRankDomainSimilarity
+ @if(item.linkType().isLinked()) + + + @endif + +
+ + $unsafe{item.getRankSymbols()} + +
+
+ + ${item.url().getDomain().toString()} + + +
+
+
+
+
+
+
+
+
+@endif diff --git a/code/services-application/search-service/resources/jte/siteinfo/start.jte b/code/services-application/search-service/resources/jte/siteinfo/start.jte new file mode 100644 index 00000000..dc4e95ba --- /dev/null +++ b/code/services-application/search-service/resources/jte/siteinfo/start.jte @@ -0,0 +1,91 @@ +@import nu.marginalia.search.model.NavbarModel +@import nu.marginalia.search.svc.* +@import nu.marginalia.search.svc.SearchSiteInfoService.SiteOverviewModel +@import nu.marginalia.search.svc.SearchSiteInfoService.SiteOverviewModel.DiscoveredDomain + +@param NavbarModel navbar +@param SiteOverviewModel model + + + +@template.part.head(title = "Marginalia Search - Site Viewer") + + + +@template.part.navbar(navbar = navbar) + +
+
+
+

Site Information

+ + + +
+
+
+ +
+
+
Recently Discovered Domains
+
+ + + + + + + + + @for (DiscoveredDomain domain : model.domains()) + + + + + @endfor + +
Domain NameDiscover Time
+ ${domain.name()} + + ${domain.timestamp()} +
+ +
+ +
+
+
Recently Discovered Domains
+
+ + + + + + + + + @for (DiscoveredDomain domain : model.domains()) + + + + + @endfor + +
Domain NameDiscover Time
+ ${domain.name()} + + ${domain.timestamp()} +
+ +
+ +
+
+ + +@template.part.footerLegal() + + + \ No newline at end of file diff --git a/code/services-application/search-service/resources/jte/siteinfo/view/backlinks.jte b/code/services-application/search-service/resources/jte/siteinfo/view/backlinks.jte new file mode 100644 index 00000000..18c715f1 --- /dev/null +++ b/code/services-application/search-service/resources/jte/siteinfo/view/backlinks.jte @@ -0,0 +1,60 @@ +@import nu.marginalia.search.model.GroupedUrlDetails +@import nu.marginalia.search.model.UrlDetails +@import nu.marginalia.search.model.ResultsPage +@import nu.marginalia.search.svc.SearchSiteInfoService.* + +@param Backlinks backlinks + +
+ +@if (backlinks.results().isEmpty()) +
+ The search engine isn't aware of any backlinks to ${backlinks.domain()}! +
+@else +
+ Showing documents linking to ${backlinks.domain()} +
+@endif + +@for (GroupedUrlDetails group : backlinks.results()) +
+ + + @for (UrlDetails details : group.urlDetails()) +
${details.title}
+
+ ${details.description} +
+ + @endfor +
+ +@endfor + + +@if (backlinks.pages().size() > 1) +
+ @for(ResultsPage page : backlinks.pages()) + @if (page.current()) + ${page.number()} + @else + ${page.number()} + @endif + @endfor +
+@endif +
+ + + diff --git a/code/services-application/search-service/resources/jte/siteinfo/view/docs.jte b/code/services-application/search-service/resources/jte/siteinfo/view/docs.jte new file mode 100644 index 00000000..ac5a8f41 --- /dev/null +++ b/code/services-application/search-service/resources/jte/siteinfo/view/docs.jte @@ -0,0 +1,89 @@ +@import nu.marginalia.search.svc.SearchSiteInfoService +@import nu.marginalia.search.svc.SearchSiteInfoService.* +@import nu.marginalia.search.model.UrlDetails +@import nu.marginalia.search.model.ResultsPage +@import nu.marginalia.model.idx.DocumentFlags +@param Docs docs + + + +
+ +@if (docs.results().isEmpty()) +
+ The search engine doesn't index any documents from ${docs.domain()} +
+@else +
+ Showing documents from ${docs.domain()} +
+@endif + +@for (UrlDetails details : docs.results()) +
+ + +

+ ${details.description} +

+ +
+
+ @if (DocumentFlags.PlainText.isPresent(details.resultItem.encodedDocMetadata)) + Plain text + @endif + @if (DocumentFlags.GeneratorForum.isPresent(details.resultItem.encodedDocMetadata)) + Forum + @endif + @if (DocumentFlags.GeneratorWiki.isPresent(details.resultItem.encodedDocMetadata)) + Wiki + @endif + + @if(details.isCookies()) + Cookies + @endif + + @if(details.isTracking()) + Track + @endif + + @if(details.isScripts()) + JS + @endif + + @if(details.isAds()) + Ads + @endif + + @if(details.isAffiliate()) + Affiliate + @endif + +
+
+@endfor + + +@if (docs.pages().size() > 1) +
+ @for(ResultsPage page : docs.pages()) + @if (page.current()) + ${page.number()} + @else + ${page.number()} + @endif + @endfor +
+@endif + +
diff --git a/code/services-application/search-service/resources/jte/siteinfo/view/overview.jte b/code/services-application/search-service/resources/jte/siteinfo/view/overview.jte new file mode 100644 index 00000000..d6dab198 --- /dev/null +++ b/code/services-application/search-service/resources/jte/siteinfo/view/overview.jte @@ -0,0 +1,195 @@ +@import nu.marginalia.search.svc.SearchSiteInfoService +@import nu.marginalia.search.svc.SearchSiteInfoService.* +@import nu.marginalia.search.model.UrlDetails + +@param SiteInfoWithContext siteInfo + + + +
+
+
+ + ${siteInfo.domain()} +
+
+ +
+ + @if (siteInfo.hasScreenshot()) + Screenshot of ${siteInfo.domain()} + @elseif (siteInfo.aliasDomain().isPresent() && siteInfo.domainInformation().getNodeAffinity() < 1) +
+ The search engine is also aware of links to ${siteInfo.aliasDomain().get()}, + this may be the canonical address. +
+ @endif + + @if (siteInfo.feed() != null && !siteInfo.feed().items().isEmpty()) + +
+ + Feed + + +
+ +
+ @for (SearchSiteInfoService.FeedItem item : siteInfo.feed().items()) +
+ ${item.title()} + ${item.pubDay()} +
+
${item.description()}
+ @endfor +
+ + @endif + + @if (siteInfo.samples() != null && !siteInfo.samples().isEmpty()) + +
+ + Sample +
+ +
+ @for (UrlDetails item : siteInfo.samples()) +
+ ${item.title} +
+
${item.description}
+ @endfor +
+ + @endif + + + @if (siteInfo.domainInformation().isUnknownDomain()) +
+ + Unknown Domain +
+ +
+

This website is not known to the search engine.

+ +

To submit the website for crawling, follow these instructions.

+
+ + @endif + + @if (siteInfo.domainInformation().isBlacklisted()) +
+ + Blacklisted +
+
+

This website is blacklisted. This excludes it from crawling and indexing.

+ +

This is usually because of some form of misbehavior on the webmaster's end, + either annoying search engine spam, or tasteless content bad faith content.

+
+ @endif + + @if (siteInfo.domainInformation().isSuggestForCrawling()) +
+
+
+ This website is not queued for crawling. If you would like it to be crawled, + use the checkbox and button below. +
+ + + +
+ + +
+ + +
+
+ @endif + + @if (siteInfo.isKnown()) +
+ + Crawl Statistics +
+ +
+
+

Pages Known

+

${siteInfo.domainInformation().getPagesKnown()}

+
+
+

Pages Fetched

+

${siteInfo.domainInformation().getPagesFetched()}

+
+
+

Pages Indexed

+

${siteInfo.domainInformation().getPagesIndexed()}

+
+
+

Incoming Links

+

${siteInfo.domainInformation().getIncomingLinks()}

+
+
+

Outbound Links

+

${siteInfo.domainInformation().getOutboundLinks()}

+
+
+

Node Affinity

+

${siteInfo.domainInformation().getNodeAffinity()}

+
+
+ + @if (siteInfo.domainInformation().getPagesKnown() >= 5_000_000) +
+ This website is very large, and the system can not accurately report the number of crawled + and indexed documents without affecting performance. +
+ @endif + + +
+ + Network Details +
+ +
+
+

IP Address

+

${siteInfo.domainInformation().getIp()}

+

${siteInfo.domainInformation().getIpCountry()} ${siteInfo.domainInformation().getIpFlag()}

+
+
+

ASN Details

+

AS${siteInfo.domainInformation().getAsn()} - ${siteInfo.domainInformation().getAsnOrg()}

+

${siteInfo.domainInformation().getAsnCountry()} ${siteInfo.domainInformation().getAsnFlag()}

+
+
+ @endif +
+
+ +@if (!siteInfo.similar().isEmpty() || !siteInfo.linking().isEmpty()) +
+ @template.siteinfo.part.linkedDomains("Similar Domains", siteInfo.domain(), siteInfo.similar()) + @template.siteinfo.part.linkedDomains("Linked Domains", siteInfo.domain(), siteInfo.linking()) +
+@endif diff --git a/code/services-application/search-service/resources/jte/siteinfo/view/reportDomain.jte b/code/services-application/search-service/resources/jte/siteinfo/view/reportDomain.jte new file mode 100644 index 00000000..c22aec03 --- /dev/null +++ b/code/services-application/search-service/resources/jte/siteinfo/view/reportDomain.jte @@ -0,0 +1,111 @@ +@import nu.marginalia.search.svc.SearchSiteInfoService.* + +@param ReportDomain reportDomain + +
+
+
+

Report Domain Issue

+
+ + @if (reportDomain.submitted()) +
Your complaint has been submitted and will be reviewed in a few weeks. + For urgent issues, email kontakt@marginalia.nu + instead of using this form. +
+ @else + +
+
+ + +
+ +
+ + +
+ +
+ + +
+ +
+ + +

+ For urgent issues, email kontakt@marginalia.nu + instead of using this form. +

+
+
+ @endif +
+ +@if (!reportDomain.complaints().isEmpty()) +
+
+

Existing Complaints

+
+ + +
+ + + + + + + + + + + @for (var complaint : reportDomain.complaints()) + + + + + + + + @endfor + +
CategoryTimestampReviewedDecision
+ ${complaint.category()} + + ${complaint.submitTime()} + + @if(complaint.isReviewed()) + + @endif + + ${complaint.decision()} +
+
+
+@endif + +
+ diff --git a/code/services-application/search-service/test/nu/marginalia/search/command/commands/BangCommandTest.java b/code/services-application/search-service/test/nu/marginalia/search/command/commands/BangCommandTest.java index d8d91654..6be5690e 100644 --- a/code/services-application/search-service/test/nu/marginalia/search/command/commands/BangCommandTest.java +++ b/code/services-application/search-service/test/nu/marginalia/search/command/commands/BangCommandTest.java @@ -15,7 +15,7 @@ class BangCommandTest { public void testG() { try { bangCommand.process(null, - new SearchParameters(" !g test", + new SearchParameters(null, " !g test", null, null, null, null, null, false, 1) ); Assertions.fail("Should have thrown RedirectException"); diff --git a/code/services-application/search-service/test/nu/marginalia/search/paperdoll/JtePaperDoll.java b/code/services-application/search-service/test/nu/marginalia/search/paperdoll/JtePaperDoll.java new file mode 100644 index 00000000..d4623351 --- /dev/null +++ b/code/services-application/search-service/test/nu/marginalia/search/paperdoll/JtePaperDoll.java @@ -0,0 +1,122 @@ +package nu.marginalia.search.paperdoll; + +import gg.jte.CodeResolver; +import gg.jte.ContentType; +import gg.jte.TemplateEngine; +import gg.jte.output.StringOutput; +import gg.jte.resolve.DirectoryCodeResolver; +import nu.marginalia.WebsiteUrl; +import nu.marginalia.search.model.NavbarModel; +import nu.marginalia.search.rendering.MockedSearchResults; +import org.junit.jupiter.api.Test; +import spark.Spark; + +import java.nio.file.Path; +import java.util.Map; + +public class JtePaperDoll { + final CodeResolver codeResolver = new DirectoryCodeResolver(Path.of(".").toAbsolutePath().resolve("resources/jte")); + final TemplateEngine templateEngine = TemplateEngine.create(codeResolver, ContentType.Html); + + private String render(String template, Object obj) { + var str = new StringOutput(); + templateEngine.render(template, obj, str); + return str.toString(); + } + + private String render(String template, Map map) { + var str = new StringOutput(); + templateEngine.render(template, map, str); + return str.toString(); + } + + @Test + public void searchResults() { + System.out.println(Path.of(".").toAbsolutePath()); + + Spark.port(9999); + + Spark.after((rq, rs) -> { + rs.header("Content-Encoding", "gzip"); + }); + Spark.get("/", + (rq, rs) -> MockedSearchResults.mockRegularSearchResults(), + ret -> this.render("serp/main.jte", Map.of("results", ret, "navbar", NavbarModel.SEARCH)) + ); + Spark.get("/site-focus", + (rq, rs) -> MockedSearchResults.mockSiteFocusResults(), + ret -> this.render("serp/main.jte", Map.of("results", ret, "navbar", NavbarModel.SEARCH)) + ); + Spark.get("/errors", + (rq, rs) -> MockedSearchResults.mockErrorData(), + ret -> this.render("serp/error.jte", Map.of("model", ret, "navbar", NavbarModel.LIMBO)) + ); + Spark.get("/first", + (rq, rs) -> new Object(), + ret -> this.render("serp/first.jte", Map.of( "navbar", NavbarModel.SEARCH, + "websiteUrl", new WebsiteUrl("https://localhost:9999/") + )) + ); + Spark.get("/explore", + (rq, rs) -> MockedSearchResults.mockBrowseResults(32), + ret -> this.render("explore/main.jte", Map.of( "navbar", NavbarModel.EXPLORE, + "results", ret) + ) + ); + Spark.get("/site-info", + (rq, rs) -> { + if ("links".equals(rq.queryParams("view"))) { + return MockedSearchResults.mockBacklinkData(); + } + else if ("docs".equals(rq.queryParams("view"))) { + return MockedSearchResults.mockDocsData(); + } + else if ("report".equals(rq.queryParams("view"))) { + return MockedSearchResults.mockReportDomain(); + } + else return MockedSearchResults.mockSiteInfoData(); + + }, + ret -> this.render("siteinfo/main.jte", Map.of("model", ret, "navbar", NavbarModel.EXPLORE)) + ); + Spark.get("/screenshot/*", (rq, rsp) -> { + rsp.type("image/svg+xml"); + + return """ + + + + + + + + + + + + + + + + + + + + + + + + + + + + """; + + }); + + Spark.init(); + + for (;;); + } + +} diff --git a/code/services-application/search-service/test/nu/marginalia/search/rendering/MockedSearchResults.java b/code/services-application/search-service/test/nu/marginalia/search/rendering/MockedSearchResults.java new file mode 100644 index 00000000..cabbefa8 --- /dev/null +++ b/code/services-application/search-service/test/nu/marginalia/search/rendering/MockedSearchResults.java @@ -0,0 +1,252 @@ +package nu.marginalia.search.rendering; + +import nu.marginalia.WebsiteUrl; +import nu.marginalia.api.domains.model.DomainInformation; +import nu.marginalia.api.domains.model.SimilarDomain; +import nu.marginalia.api.searchquery.model.results.SearchResultItem; +import nu.marginalia.browse.model.BrowseResult; +import nu.marginalia.browse.model.BrowseResultSet; +import nu.marginalia.model.EdgeDomain; +import nu.marginalia.model.EdgeUrl; +import nu.marginalia.model.crawl.DomainIndexingState; +import nu.marginalia.search.command.SearchParameters; +import nu.marginalia.search.model.*; +import nu.marginalia.search.svc.SearchFlagSiteService; +import nu.marginalia.search.svc.SearchSiteInfoService; + +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ThreadLocalRandom; + +public class MockedSearchResults { + + private static UrlDetails mockUrlDetails(String url, String title) throws URISyntaxException { + return mockUrlDetails(url, title, "Sing, Goddess, sing the rage of Achilles, son of Peleus—\n" + + "that murderous anger which condemned Achaeans\n" + + "to countless agonies and threw many warrior souls\n" + + "deep into Hades, leaving their dead bodies\n" + + "carrion food for dogs and birds—\n" + + "all in fulfilment of the will of Zeus."); + + } + + private static UrlDetails mockUrlDetails(String url, String title, String desc) throws URISyntaxException { + return new UrlDetails( + 1, + 1, + new EdgeUrl(url), + title, + desc, + "HTML5", + ThreadLocalRandom.current().nextInt(), + DomainIndexingState.ACTIVE, + 0.5, + 8, + "", + mockPositionsMask(), + 2, + new SearchResultItem(0, 0, 0, 0, 0), + null); + + } + private static long mockPositionsMask() { + + int hits = ThreadLocalRandom.current().nextInt(1, 24); + long mask = 0; + for (int i = 0; i < hits; i++) { + mask |= 1L << ThreadLocalRandom.current().nextInt(0, 64); + } + + return mask; + } + + private static List mockSearchResultsList() throws URISyntaxException { + return List.of( + // Non-clustered result + new ClusteredUrlDetails( + mockUrlDetails("https://clustered.marginalia.nu", "Non-clustered-result") + ), + new ClusteredUrlDetails( + mockUrlDetails("https://clustered.marginalia.nu", "Short Result", "Short") + ), + new ClusteredUrlDetails( + mockUrlDetails("https://clustered.marginalia.nu", "Clustered-result"), + List.of( + mockUrlDetails("https://clustered.marginalia.nu", "Additional result"), + mockUrlDetails("https://clustered.marginalia.nu", "One more result") + ) + ) + ); + } + + public static DecoratedSearchResults mockRegularSearchResults() throws URISyntaxException { + SearchParameters params = SearchParameters.defaultsForQuery(new WebsiteUrl("https://localhost:9999/"), "test", 1); + + return new DecoratedSearchResults( + params, + List.of("Not enough search engine oil"), + null, + mockSearchResultsList(), + "", + -1, + new SearchFilters(params), + List.of(new ResultsPage(1, true, "#"), + new ResultsPage(2, false, "#"))); + } + + public static DecoratedSearchResults mockSiteFocusResults() throws URISyntaxException { + SearchParameters params = SearchParameters.defaultsForQuery(new WebsiteUrl("https://localhost:9999/"), "test site:example.marginalia.nu", 1); + + return new DecoratedSearchResults( + params, + List.of("Not enough search engine oil"), + null, + mockSearchResultsList(), + "example.marginalia.nu", + 1, + new SearchFilters(params), + List.of(new ResultsPage(1, true, "#"), + new ResultsPage(2, false, "#"))); + } + + public static SearchErrorMessageModel mockErrorData() { + var params = SearchParameters.defaultsForQuery(new WebsiteUrl("https://localhost:9999/"), "test site:example.marginalia.nu", 1); + + return new SearchErrorMessageModel( + "An error occurred when communicating with the search engine index.", + """ + This is hopefully a temporary state of affairs. It may be due to + an upgrade. The index typically takes a about two or three minutes + to reload from a cold restart. Thanks for your patience. + """, + params, + new SearchFilters(params) + ); + } + + public static SearchSiteInfoService.SiteInfoWithContext mockSiteInfoData() throws URISyntaxException { + return new SearchSiteInfoService.SiteInfoWithContext( + "www.example.com", + Optional.of("other.example.com"), + 14, + "https://www.example.com", + true, + new DomainInformation( + new EdgeDomain("www.example.com"), + false, + 14, + 23, + 55, + 10, + 20, + 1, + 0.5, + false, + true, + false, + "127.0.0.1", + 4041, + "ACME INC", + "SE", + "SE", + "INDEXED" + ), + List.of( + new SimilarDomain(new EdgeUrl("https://www.other.com"), 4,65, 20, true, true, true, SimilarDomain.LinkType.BIDIRECTIONAL) + ), + List.of( + new SimilarDomain(new EdgeUrl("https://www.other.com"), 4,65, 80, true, true, true, SimilarDomain.LinkType.BIDIRECTIONAL), + new SimilarDomain(new EdgeUrl("https://www.other.com"), 4,35, 40, true, true, false, SimilarDomain.LinkType.BACKWARD), + new SimilarDomain(new EdgeUrl("https://www.other.com"), 4,25, 20, true, true, false, SimilarDomain.LinkType.FOWARD), + new SimilarDomain(new EdgeUrl("https://www.other.com"), 4,25, 20, true, true, false, SimilarDomain.LinkType.FOWARD) + ), + new SearchSiteInfoService.FeedItems("www.example.com", + "https://www.example.com/rss.xml", + "2024-01-01", + List.of( + new SearchSiteInfoService.FeedItem("Test Post", "2024-01-01", "Lorem ipsum dolor sit amet", "https://www.example.com/1"), + new SearchSiteInfoService.FeedItem("Other Post", "2024-01-04", "Sing, Goddess, sing the rage of Achilles, son of Peleus—\n" + + "that murderous anger which condemned Achaeans\n" + + "to countless agonies and threw many warrior souls\n" + + "deep into Hades, leaving their dead bodies\n" + + "carrion food for dogs and birds—\n" + + "all in fulfilment of the will of Zeus.", + "https://www.example.com/1") + + )), + List.of()); + } + + public static Object mockBacklinkData() throws URISyntaxException { + return new SearchSiteInfoService.Backlinks( + "www.example.com", + 4, + List.of( + new GroupedUrlDetails( + List.of( + mockUrlDetails("https://www.example.com/", "lorem ipsum"), + mockUrlDetails("https://www.example.com/", "dolor sit"), + mockUrlDetails("https://www.example.com/", "amet quia") + ) + ), + new GroupedUrlDetails( + List.of( + mockUrlDetails("https://other.example.com", "single link result") + ) + ) + ), + List.of( + new ResultsPage(1, true, "#"), + new ResultsPage(2, false, "#") + ) + ); + } + + public static SearchSiteInfoService.Docs mockDocsData() throws URISyntaxException { + return new SearchSiteInfoService.Docs( + "www.example.com", + 1, + List.of( + mockUrlDetails("https://www.example.com/", "lorem ipsum"), + mockUrlDetails("https://www.example.com/", "dolor sit"), + mockUrlDetails("https://www.example.com/", "amet quia") + ), + List.of( + new ResultsPage(1, true, "#"), + new ResultsPage(2, false, "#") + ) + ); + } + + public static SearchSiteInfoService.ReportDomain mockReportDomain() { + return new SearchSiteInfoService.ReportDomain( + "www.example.com", + 1, + List.of(new SearchFlagSiteService.FlagSiteComplaintModel( + "BAD", + "2024-10-01", + true, + "Appealed" + )), + SearchFlagSiteService.categories, + false + ); + } + + public static BrowseResultSet mockBrowseResults(int n) { + List results = new ArrayList<>(); + + for (int i = 0; i < n; i++) { + results.add(new BrowseResult( + new EdgeUrl("https", new EdgeDomain(i+".example.com"), null, "/", null), + i, + 0.5, + true + )); + } + + return new BrowseResultSet(results); + } +} diff --git a/code/services-application/search-service/test/nu/marginalia/search/rendering/RenderingTest.java b/code/services-application/search-service/test/nu/marginalia/search/rendering/RenderingTest.java new file mode 100644 index 00000000..2ec00f88 --- /dev/null +++ b/code/services-application/search-service/test/nu/marginalia/search/rendering/RenderingTest.java @@ -0,0 +1,30 @@ +package nu.marginalia.search.rendering; + +import gg.jte.CodeResolver; +import gg.jte.ContentType; +import gg.jte.TemplateEngine; +import gg.jte.output.StringOutput; +import gg.jte.resolve.DirectoryCodeResolver; +import org.junit.jupiter.api.Test; + +import java.net.URISyntaxException; +import java.nio.file.Path; + +/** This test class verifies that the templates render successfully. + * It does not perform checks that the output is correct */ +public class RenderingTest { + final CodeResolver codeResolver = new DirectoryCodeResolver(Path.of(".").toAbsolutePath().resolve("resources/jte")); + final TemplateEngine templateEngine = TemplateEngine.create(codeResolver, ContentType.Html); + + + + @Test + public void testSerp_Main() throws URISyntaxException { + templateEngine.render("serp/main.jte", MockedSearchResults.mockRegularSearchResults(), new StringOutput()); + } + + @Test + public void testSerp_SiteFocus() throws URISyntaxException { + templateEngine.render("serp/main.jte", MockedSearchResults.mockSiteFocusResults(), new StringOutput()); + } +} diff --git a/settings.gradle b/settings.gradle index 1ed535be..051755b6 100644 --- a/settings.gradle +++ b/settings.gradle @@ -230,6 +230,8 @@ dependencyResolutionManagement { library('jetty-util','org.eclipse.jetty','jetty-util').version('9.4.54.v20240208') library('jetty-servlet','org.eclipse.jetty','jetty-servlet').version('9.4.54.v20240208') + library('jte','gg.jte','jte').version('3.1.15') + library('slop', 'nu.marginalia', 'slop').version('0.0.8-SNAPSHOT') bundle('jetty', ['jetty-server', 'jetty-util', 'jetty-servlet']) @@ -250,6 +252,8 @@ dependencyResolutionManagement { bundle('flyway', ['flyway.core', 'flyway.mysql']) bundle('curator', ['curator-framework', 'curator-x-discovery']) + + } } }