(WIP) Initial semi-working transformation to new tailwind UI

Still missing is a proper build, we're currently pulling in tailwind from a CDN, which is no bueno in prod.

There's also a lot of polish remaining everywhere, dead links, etc.
This commit is contained in:
Viktor Lofgren 2024-12-05 14:00:17 +01:00
parent fdc3efa250
commit f050bf5c4c
55 changed files with 2521 additions and 334 deletions

1
.gitignore vendored
View File

@ -7,3 +7,4 @@ build/
lombok.config
Dockerfile
run
jte-classes

View File

@ -6,4 +6,8 @@ public record BrowseResultSet(Collection<BrowseResult> results, String focusDoma
public BrowseResultSet(Collection<BrowseResult> results) {
this(results, "");
}
public boolean hasFocusDomain() {
return focusDomain != null && !focusDomain.isBlank();
}
}

View File

@ -71,6 +71,23 @@ public class DomainInformation {
return new String(Character.toChars(firstChar)) + new String(Character.toChars(secondChar));
}
public String getAsnFlag() {
if (asnCountry == null || asnCountry.codePointCount(0, asnCountry.length()) != 2) {
return "";
}
String country = asnCountry;
if ("UK".equals(country)) {
country = "GB";
}
int offset = 0x1F1E6;
int asciiOffset = 0x41;
int firstChar = Character.codePointAt(country, 0) - asciiOffset + offset;
int secondChar = Character.codePointAt(country, 1) - asciiOffset + offset;
return new String(Character.toChars(firstChar)) + new String(Character.toChars(secondChar));
}
public EdgeDomain getDomain() {
return this.domain;
}

View File

@ -52,12 +52,12 @@ public record SimilarDomain(EdgeUrl url,
return NONE;
}
public String toString() {
public String faIcon() {
return switch (this) {
case FOWARD -> "&#8594;";
case BACKWARD -> "&#8592;";
case BIDIRECTIONAL -> "&#8646;";
case NONE -> "-";
case FOWARD -> "fa-solid fa-arrow-right";
case BACKWARD -> "fa-solid fa-arrow-left";
case BIDIRECTIONAL -> "fa-solid fa-arrow-right-arrow-left";
case NONE -> "";
};
}

View File

@ -70,6 +70,7 @@ dependencies {
implementation libs.bundles.jetty
implementation libs.opencsv
implementation libs.trove
implementation libs.jte
implementation libs.fastutil
implementation libs.bundles.gson
implementation libs.bundles.mariadb

View File

@ -0,0 +1,29 @@
package nu.marginalia.search;
import gg.jte.CodeResolver;
import gg.jte.ContentType;
import gg.jte.TemplateEngine;
import gg.jte.output.StringOutput;
import gg.jte.resolve.ResourceCodeResolver;
import jakarta.inject.Singleton;
import java.util.Map;
@Singleton
public class JteRenderer {
private final CodeResolver codeResolver = new ResourceCodeResolver("jte");
private final TemplateEngine templateEngine = TemplateEngine.create(codeResolver, ContentType.Html);
public String render(String template, Object model) {
StringOutput output = new StringOutput();
templateEngine.render(template, model, output);
return output.toString();
}
public String render(String template, Map<String, Object> models) {
StringOutput output = new StringOutput();
templateEngine.render(template, models, output);
return output.toString();
}
}

View File

@ -14,10 +14,7 @@ import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.model.crawl.DomainIndexingState;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.model.ClusteredUrlDetails;
import nu.marginalia.search.model.DecoratedSearchResults;
import nu.marginalia.search.model.SearchFilters;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.model.*;
import nu.marginalia.search.results.UrlDeduplicator;
import nu.marginalia.search.svc.SearchQueryCountService;
import nu.marginalia.search.svc.SearchUnitConversionService;
@ -75,9 +72,10 @@ public class SearchOperator {
this.searchVisitorCount = searchVisitorCount;
}
public List<UrlDetails> doSiteSearch(String domain,
public SimpleSearchResults doSiteSearch(String domain,
int domainId,
int count) {
int count,
int page) {
var queryParams = paramFactory.forSiteSearch(domain, domainId, count);
var queryResponse = queryClient.search(queryParams);
@ -85,15 +83,16 @@ public class SearchOperator {
return getResultsFromQuery(queryResponse);
}
public List<UrlDetails> doBacklinkSearch(String domain) {
public SimpleSearchResults doBacklinkSearch(String domain, int page) {
var queryParams = paramFactory.forBacklinkSearch(domain);
var queryParams = paramFactory.forBacklinkSearch(domain, page);
var queryResponse = queryClient.search(queryParams);
return getResultsFromQuery(queryResponse);
}
public List<UrlDetails> doLinkSearch(String source, String dest) {
public SimpleSearchResults doLinkSearch(String source, String dest) {
var queryParams = paramFactory.forLinkSearch(source, dest);
var queryResponse = queryClient.search(queryParams);
@ -110,7 +109,7 @@ public class SearchOperator {
var queryParams = paramFactory.forRegularSearch(userParams);
QueryResponse queryResponse = queryClient.search(queryParams);
var queryResults = getResultsFromQuery(queryResponse);
var queryResults = getResultsFromQuery(queryResponse).results;
// Cluster the results based on the query response
List<ClusteredUrlDetails> clusteredResults = SearchResultClusterer
@ -126,17 +125,17 @@ public class SearchOperator {
String evalResult = getFutureOrDefault(eval, "");
String focusDomain = queryResponse.domain();
int focusDomainId = focusDomain == null
int focusDomainId = (focusDomain == null || focusDomain.isBlank())
? -1
: domainQueries.tryGetDomainId(new EdgeDomain(focusDomain)).orElse(-1);
: domainQueries.tryGetDomainId(new EdgeDomain(focusDomain)).orElse(0);
List<String> problems = getProblems(evalResult, queryResults, queryResponse);
List<DecoratedSearchResults.Page> resultPages = IntStream.rangeClosed(1, queryResponse.totalPages())
.mapToObj(number -> new DecoratedSearchResults.Page(
List<ResultsPage> resultPages = IntStream.rangeClosed(1, queryResponse.totalPages())
.mapToObj(number -> new ResultsPage(
number,
number == userParams.page(),
userParams.withPage(number).renderUrl(websiteUrl)
userParams.withPage(number).renderUrl()
))
.toList();
@ -146,7 +145,7 @@ public class SearchOperator {
.problems(problems)
.evalResult(evalResult)
.results(clusteredResults)
.filters(new SearchFilters(websiteUrl, userParams))
.filters(new SearchFilters(userParams))
.focusDomain(focusDomain)
.focusDomainId(focusDomainId)
.resultPages(resultPages)
@ -154,18 +153,28 @@ public class SearchOperator {
}
public List<UrlDetails> getResultsFromQuery(QueryResponse queryResponse) {
public SimpleSearchResults getResultsFromQuery(QueryResponse queryResponse) {
final QueryLimits limits = queryResponse.specs().queryLimits;
final UrlDeduplicator deduplicator = new UrlDeduplicator(limits.resultsByDomain());
// Update the query count (this is what you see on the front page)
searchVisitorCount.registerQuery();
return queryResponse.results().stream()
List<UrlDetails> details = queryResponse.results().stream()
.filter(deduplicator::shouldRetain)
.limit(limits.resultsTotal())
.map(SearchOperator::createDetails)
.toList();
List<ResultsPage> pages = IntStream.rangeClosed(1, queryResponse.totalPages())
.mapToObj(number -> new ResultsPage(
number,
number == queryResponse.currentPage(),
""
))
.toList();
return new SimpleSearchResults(details, pages);
}
private static UrlDetails createDetails(DecoratedSearchResultItem item) {
@ -181,6 +190,7 @@ public class SearchOperator {
item.rankingScore, // termScore
item.resultsFromDomain,
BrailleBlockPunchCards.printBits(item.bestPositions, 64),
item.bestPositions,
Long.bitCount(item.bestPositions),
item.rawIndexResult,
item.rawIndexResult.keywordScores

View File

@ -62,7 +62,7 @@ public class SearchQueryParamFactory {
);
}
public QueryParams forBacklinkSearch(String domain) {
public QueryParams forBacklinkSearch(String domain, int page) {
return new QueryParams("links:"+domain,
null,
List.of(),

View File

@ -45,6 +45,7 @@ public class SearchService extends Service {
SearchAddToCrawlQueueService addToCrawlQueueService,
SearchSiteInfoService siteInfoService,
SearchCrosstalkService crosstalkService,
SearchBrowseService searchBrowseService,
SearchQueryService searchQueryService)
throws Exception
{
@ -56,26 +57,34 @@ public class SearchService extends Service {
Spark.staticFiles.expireTime(600);
SearchServiceMetrics.get("/search", searchQueryService::pathSearch);
SearchServiceMetrics.get("/", frontPageService::render);
SearchServiceMetrics.get("/news.xml", frontPageService::renderNewsFeed);
SearchServiceMetrics.get("/:resource", this::serveStatic);
SearchServiceMetrics.post("/site/suggest/", addToCrawlQueueService::suggestCrawling);
SearchServiceMetrics.get("/site-search/:site/*", this::siteSearchRedir);
SearchServiceMetrics.get("/site", siteInfoService::handleOverview);
SearchServiceMetrics.get("/site/:site", siteInfoService::handle);
SearchServiceMetrics.post("/site/:site", siteInfoService::handlePost);
SearchServiceMetrics.get("/explore", searchBrowseService::handleBrowseRandom);
SearchServiceMetrics.get("/explore/:site", searchBrowseService::handleBrowseSite);
SearchServiceMetrics.get("/crosstalk/", crosstalkService::handle);
SearchServiceMetrics.get("/:resource", this::serveStatic);
Spark.exception(Exception.class, (e,p,q) -> {
logger.error("Error during processing", e);
wmsa_search_service_error_count.labels(p.pathInfo(), p.requestMethod()).inc();
errorPageService.serveError(p, q);
});
// Add compression
Spark.after((rq, rs) -> {
rs.header("Content-Encoding", "gzip");
});
Spark.awaitInitialization();
}

View File

@ -4,16 +4,19 @@ import nu.marginalia.WebsiteUrl;
import nu.marginalia.api.searchquery.model.results.ResultRankingParameters;
import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.search.model.SearchProfile;
import spark.Request;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import java.util.StringJoiner;
import static nu.marginalia.search.command.SearchRecentParameter.RECENT;
public record SearchParameters(String query,
public record SearchParameters(WebsiteUrl url,
String query,
SearchProfile profile,
SearchJsParameter js,
SearchRecentParameter recent,
@ -23,8 +26,21 @@ public record SearchParameters(String query,
int page
) {
public SearchParameters(String queryString, Request request) {
this(
public static SearchParameters defaultsForQuery(WebsiteUrl url, String query, int page) {
return new SearchParameters(
url,
"test",
SearchProfile.NO_FILTER,
SearchJsParameter.DEFAULT,
SearchRecentParameter.DEFAULT,
SearchTitleParameter.DEFAULT,
SearchAdtechParameter.DEFAULT,
false,
page);
}
public static SearchParameters forRequest(String queryString, WebsiteUrl url, Request request) {
return new SearchParameters(
url,
queryString,
SearchProfile.getSearchProfile(request.queryParams("profile")),
SearchJsParameter.parse(request.queryParams("js")),
@ -41,29 +57,48 @@ public record SearchParameters(String query,
}
public SearchParameters withProfile(SearchProfile profile) {
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page);
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page);
}
public SearchParameters withJs(SearchJsParameter js) {
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page);
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page);
}
public SearchParameters withAdtech(SearchAdtechParameter adtech) {
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page);
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page);
}
public SearchParameters withRecent(SearchRecentParameter recent) {
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, true, page);
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, true, page);
}
public SearchParameters withTitle(SearchTitleParameter title) {
return new SearchParameters(query, profile, js, recent, title, adtech, true, page);
return new SearchParameters(url, query, profile, js, recent, title, adtech, true, page);
}
public SearchParameters withPage(int page) {
return new SearchParameters(query, profile, js, recent, searchTitle, adtech, false, page);
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page);
}
public String renderUrl(WebsiteUrl baseUrl) {
public SearchParameters withQuery(String query) {
return new SearchParameters(url, query, profile, js, recent, searchTitle, adtech, false, page);
}
public String renderUrlWithoutSiteFocus() {
String[] parts = query.split("\\s+");
StringJoiner newQuery = new StringJoiner(" ");
for (var part : parts) {
if (!part.startsWith("site:")) {
newQuery.add(part);
}
}
return withQuery(newQuery.toString()).renderUrl();
}
public String renderUrlWithSiteFocus(EdgeDomain domain) {
return withQuery(query + " site:"+domain.toString()).renderUrl();
}
public String renderUrl() {
String path = String.format("/search?query=%s&profile=%s&js=%s&adtech=%s&recent=%s&searchTitle=%s&newfilter=%s&page=%d",
URLEncoder.encode(query, StandardCharsets.UTF_8),
URLEncoder.encode(profile.filterId, StandardCharsets.UTF_8),
@ -75,7 +110,7 @@ public record SearchParameters(String query,
page
);
return baseUrl.withPath(path);
return url.withPath(path);
}
public ResultRankingParameters.TemporalBias temporalBias() {

View File

@ -1,36 +1,21 @@
package nu.marginalia.search.command.commands;
import com.google.inject.Inject;
import nu.marginalia.browse.model.BrowseResultSet;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.svc.SearchBrowseService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Response;
import java.io.IOException;
import java.util.Map;
import java.util.Optional;
import java.util.function.Predicate;
import java.util.regex.Pattern;
public class BrowseCommand implements SearchCommandInterface {
private final SearchBrowseService browseService;
private final MustacheRenderer<BrowseResultSet> browseResultsRenderer;
private final Logger logger = LoggerFactory.getLogger(getClass());
private final Predicate<String> queryPatternPredicate = Pattern.compile("^browse:[.A-Za-z\\-0-9:]+$").asPredicate();
@Inject
public BrowseCommand(SearchBrowseService browseService,
RendererFactory rendererFactory)
throws IOException
public BrowseCommand()
{
this.browseService = browseService;
browseResultsRenderer = rendererFactory.renderer("search/browse-results");
}
@Override
@ -39,39 +24,24 @@ public class BrowseCommand implements SearchCommandInterface {
return Optional.empty();
}
var model = browseSite(parameters.query());
if (null == model)
return Optional.empty();
return Optional.of(browseResultsRenderer.render(model,
Map.of("query", parameters.query(),
"profile", parameters.profileStr(),
"focusDomain", model.focusDomain())
));
}
private BrowseResultSet browseSite(String humanQuery) {
String definePrefix = "browse:";
String word = humanQuery.substring(definePrefix.length()).toLowerCase();
String word = parameters.query().substring(definePrefix.length()).toLowerCase();
try {
if ("random".equals(word)) {
return browseService.getRandomEntries(0);
}
if (word.startsWith("random:")) {
int set = Integer.parseInt(word.split(":")[1]);
return browseService.getRandomEntries(set);
}
else {
return browseService.getRelatedEntries(word);
}
}
catch (Exception ex) {
logger.info("No Results");
return null;
String redirectPath;
if (word.equals("random")) {
redirectPath = "/explore";
} else {
redirectPath = "/explore/" + word;
}
return Optional.of("""
<!DOCTYPE html>
<html lang="en">
<meta charset="UTF-8">
<title>Redirecting...</title>
<meta http-equiv="refresh" content="0; %s">
""".formatted(redirectPath));
}

View File

@ -1,35 +1,37 @@
package nu.marginalia.search.command.commands;
import com.google.inject.Inject;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.search.JteRenderer;
import nu.marginalia.search.SearchOperator;
import nu.marginalia.search.command.SearchCommandInterface;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.model.DecoratedSearchResults;
import nu.marginalia.search.model.NavbarModel;
import spark.Response;
import java.io.IOException;
import java.util.Map;
import java.util.Optional;
public class SearchCommand implements SearchCommandInterface {
private final SearchOperator searchOperator;
private final MustacheRenderer<DecoratedSearchResults> searchResultsRenderer;
private final JteRenderer jteRenderer;
@Inject
public SearchCommand(SearchOperator searchOperator,
RendererFactory rendererFactory) throws IOException {
JteRenderer jteRenderer) throws IOException {
this.searchOperator = searchOperator;
searchResultsRenderer = rendererFactory.renderer("search/search-results");
this.jteRenderer = jteRenderer;
}
@Override
public Optional<Object> process(Response response, SearchParameters parameters) {
try {
DecoratedSearchResults results = searchOperator.doSearch(parameters);
return Optional.of(searchResultsRenderer.render(results));
return Optional.of(jteRenderer.render("serp/main.jte",
Map.of("results", results, "navbar", NavbarModel.SEARCH)
));
}
catch (InterruptedException ex) {
Thread.currentThread().interrupt();

View File

@ -1,6 +1,7 @@
package nu.marginalia.search.model;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.idx.DocumentFlags;
import nu.marginalia.model.idx.WordFlags;
import org.jetbrains.annotations.NotNull;
@ -16,6 +17,9 @@ public class ClusteredUrlDetails implements Comparable<ClusteredUrlDetails> {
@NotNull
public final List<UrlDetails> rest;
/** Selects color scheme in the GUI for the result */
public final PostColorScheme colorScheme;
/** Create a new ClusteredUrlDetails from a collection of UrlDetails,
* with the best result as "first", and the others, in descending order
* of quality as the "rest"...
@ -32,6 +36,7 @@ public class ClusteredUrlDetails implements Comparable<ClusteredUrlDetails> {
this.first = items.removeFirst();
this.rest = items;
this.colorScheme = PostColorScheme.select(first);
double bestScore = first.termScore;
double scoreLimit = Math.min(4.0, bestScore * 1.25);
@ -64,6 +69,14 @@ public class ClusteredUrlDetails implements Comparable<ClusteredUrlDetails> {
public ClusteredUrlDetails(@NotNull UrlDetails onlyFirst) {
this.first = onlyFirst;
this.rest = Collections.emptyList();
this.colorScheme = PostColorScheme.select(first);
}
/** For tests */
public ClusteredUrlDetails(@NotNull UrlDetails onlyFirst, @NotNull List<UrlDetails> rest) {
this.first = onlyFirst;
this.rest = rest;
this.colorScheme = PostColorScheme.select(first);
}
// For renderer use, do not remove
@ -99,4 +112,39 @@ public class ClusteredUrlDetails implements Comparable<ClusteredUrlDetails> {
public int compareTo(@NotNull ClusteredUrlDetails o) {
return Objects.compare(first, o.first, UrlDetails::compareTo);
}
public enum PostColorScheme {
Slate("bg-slate-100", "text-slate-950", "bg-slate-200", "text-black"),
Green("bg-green-50", "text-green-900", "bg-green-100", "text-black"),
Purple("bg-purple-50", "text-purple-900", "bg-purple-100", "text-black"),
White("bg-white", "text-blue-950", "bg-gray-100", "text-black");
PostColorScheme(String backgroundColor, String textColor, String backgroundColor2, String textColor2) {
this.backgroundColor = backgroundColor;
this.textColor = textColor;
this.backgroundColor2 = backgroundColor2;
this.textColor2 = textColor2;
}
public static PostColorScheme select(UrlDetails result) {
long encodedMetadata = result.resultItem.encodedDocMetadata;
if (DocumentFlags.PlainText.isPresent(encodedMetadata)) {
return Slate;
}
else if (DocumentFlags.GeneratorWiki.isPresent(encodedMetadata)) {
return Green;
}
else if (DocumentFlags.GeneratorForum.isPresent(encodedMetadata)) {
return Purple;
}
else {
return White;
}
}
public final String backgroundColor;
public final String textColor;
public final String backgroundColor2;
public final String textColor2;
}
}

View File

@ -1,5 +1,6 @@
package nu.marginalia.search.model;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.search.command.SearchParameters;
import java.util.List;
@ -21,7 +22,7 @@ public class DecoratedSearchResults {
String focusDomain,
int focusDomainId,
SearchFilters filters,
List<Page> resultPages) {
List<ResultsPage> resultPages) {
this.params = params;
this.problems = problems;
this.evalResult = evalResult;
@ -62,27 +63,29 @@ public class DecoratedSearchResults {
return focusDomainId;
}
public boolean hasFocusDomain() {
return focusDomainId >= 0;
}
public SearchFilters getFilters() {
return filters;
}
public List<Page> getResultPages() {
public List<ResultsPage> getResultPages() {
return resultPages;
}
private final String focusDomain;
private final int focusDomainId;
private final SearchFilters filters;
private final List<Page> resultPages;
private final List<ResultsPage> resultPages;
public boolean isMultipage() {
return resultPages.size() > 1;
}
public record Page(int number, boolean current, String href) {
}
// These are used by the search form, they look unused in the IDE but are used by the mustache template,
// DO NOT REMOVE THEM
public int getResultCount() {
@ -130,7 +133,8 @@ public class DecoratedSearchResults {
private String focusDomain;
private int focusDomainId;
private SearchFilters filters;
private List<Page> resultPages;
private List<ResultsPage> resultPages;
private WebsiteUrl websiteUrl;
DecoratedSearchResultsBuilder() {
}
@ -170,7 +174,7 @@ public class DecoratedSearchResults {
return this;
}
public DecoratedSearchResultsBuilder resultPages(List<Page> resultPages) {
public DecoratedSearchResultsBuilder resultPages(List<ResultsPage> resultPages) {
this.resultPages = resultPages;
return this;
}
@ -178,9 +182,5 @@ public class DecoratedSearchResults {
public DecoratedSearchResults build() {
return new DecoratedSearchResults(this.params, this.problems, this.evalResult, this.results, this.focusDomain, this.focusDomainId, this.filters, this.resultPages);
}
public String toString() {
return "DecoratedSearchResults.DecoratedSearchResultsBuilder(params=" + this.params + ", problems=" + this.problems + ", evalResult=" + this.evalResult + ", results=" + this.results + ", focusDomain=" + this.focusDomain + ", focusDomainId=" + this.focusDomainId + ", filters=" + this.filters + ", resultPages=" + this.resultPages + ")";
}
}
}

View File

@ -0,0 +1,35 @@
package nu.marginalia.search.model;
import nu.marginalia.model.EdgeDomain;
import java.util.Comparator;
import java.util.List;
import java.util.stream.Collectors;
/** A number of url details grouped by their domain. This is conceptually similar to
* ClusteredUrlDetails, but it has more logic to conditionally perform this grouping operation,
* whereas this class always groups the domains.
* */
public record GroupedUrlDetails (List<UrlDetails> urlDetails) {
public GroupedUrlDetails(List<UrlDetails> urlDetails) {
this.urlDetails = urlDetails;
if (urlDetails.isEmpty()) {
throw new IllegalArgumentException("urlDetails must never be empty");
}
}
public EdgeDomain domain() {
return urlDetails.getFirst().getUrl().domain;
}
public UrlDetails first() {
return urlDetails.getFirst();
}
public static List<GroupedUrlDetails> groupResults(List<UrlDetails> details) {
return details.stream()
.sorted(Comparator.comparing(d -> d.termScore))
.collect(Collectors.groupingBy(d -> d.getUrl().domain))
.values().stream().map(GroupedUrlDetails::new)
.toList();
}
}

View File

@ -0,0 +1,85 @@
package nu.marginalia.search.model;
import java.util.List;
public record NavbarModel(NavbarGroup first, NavbarGroup second) {
public record NavbarEntry(String name, String url, boolean active) { }
public record NavbarGroup(List<NavbarEntry> entries) { }
public static NavbarModel LIMBO =
new NavbarModel(
new NavbarGroup(
List.of(
new NavbarEntry("Search", "/", false),
new NavbarEntry("Domains", "/site", false),
new NavbarEntry("Explore", "/explore", false)
)
)
,
new NavbarGroup(
List.of(
new NavbarEntry("About", "/", false),
new NavbarEntry("API", "/", false),
new NavbarEntry("Donate", "/", false)
)
)
);
public static NavbarModel SEARCH =
new NavbarModel(
new NavbarGroup(
List.of(
new NavbarEntry("Search", "/", true),
new NavbarEntry("Domains", "/site", false),
new NavbarEntry("Explore", "/explore", false)
)
)
,
new NavbarGroup(
List.of(
new NavbarEntry("About", "/", false),
new NavbarEntry("API", "/", false),
new NavbarEntry("Donate", "/", false)
)
)
);
public static NavbarModel SITEINFO =
new NavbarModel(
new NavbarGroup(
List.of(
new NavbarEntry("Search", "/", false),
new NavbarEntry("Domains", "/site", true),
new NavbarEntry("Explore", "/explore", false)
)
)
,
new NavbarGroup(
List.of(
new NavbarEntry("About", "/", false),
new NavbarEntry("API", "/", false),
new NavbarEntry("Donate", "/", false)
)
)
);
public static NavbarModel EXPLORE =
new NavbarModel(
new NavbarGroup(
List.of(
new NavbarEntry("Search", "/", false),
new NavbarEntry("Domains", "/site", false),
new NavbarEntry("Explore", "/explore", true)
)
)
,
new NavbarGroup(
List.of(
new NavbarEntry("About", "/", false),
new NavbarEntry("API", "/", false),
new NavbarEntry("Donate", "/", false)
)
)
);
}

View File

@ -0,0 +1,4 @@
package nu.marginalia.search.model;
public record ResultsPage(int number, boolean current, String href) {
}

View File

@ -0,0 +1,6 @@
package nu.marginalia.search.model;
import nu.marginalia.search.command.SearchParameters;
public record SearchErrorMessageModel(String errorTitle, String errorRest, SearchParameters parameters, SearchFilters filters) {
}

View File

@ -7,8 +7,6 @@ import java.util.List;
/** Models the search filters displayed next to the search results */
public class SearchFilters {
private final WebsiteUrl url;
public final String currentFilter;
// These are necessary for the renderer to access the data
@ -45,9 +43,27 @@ public class SearchFilters {
public List<List<Filter>> getFilterGroups() {
return filterGroups;
}
public List<SearchOption> searchOptions() {
return List.of(
searchTitleOption,
showRecentOption,
removeJsOption,
reduceAdtechOption
);
}
public SearchFilters(WebsiteUrl url, SearchParameters parameters) {
this.url = url;
public SearchFilters(WebsiteUrl url) {
this(new SearchParameters(url, "",
SearchProfile.NO_FILTER,
SearchJsParameter.DEFAULT,
SearchRecentParameter.DEFAULT,
SearchTitleParameter.DEFAULT,
SearchAdtechParameter.DEFAULT,
false,
1));
}
public SearchFilters(SearchParameters parameters) {
removeJsOption = new RemoveJsOption(parameters);
reduceAdtechOption = new ReduceAdtechOption(parameters);
@ -59,40 +75,48 @@ public class SearchFilters {
filterGroups = List.of(
List.of(
new Filter("No Filter", SearchProfile.NO_FILTER, parameters),
// new Filter("Popular", SearchProfile.POPULAR, parameters),
new Filter("Small Web", SearchProfile.SMALLWEB, parameters),
new Filter("Blogosphere", SearchProfile.BLOGOSPHERE, parameters),
new Filter("Academia", SearchProfile.ACADEMIA, parameters)
new Filter("All", "fa-globe", SearchProfile.NO_FILTER, parameters),
new Filter("Blogs", "fa-blog", SearchProfile.BLOGOSPHERE, parameters),
new Filter("Academia", "fa-university", SearchProfile.ACADEMIA, parameters)
),
List.of(
new Filter("Vintage", SearchProfile.VINTAGE, parameters),
new Filter("Plain Text", SearchProfile.PLAIN_TEXT, parameters),
new Filter("~tilde", SearchProfile.TILDE, parameters)
new Filter("Vintage", "fa-clock-rotate-left", SearchProfile.VINTAGE, parameters),
new Filter("Plain Text", "fa-file", SearchProfile.PLAIN_TEXT, parameters),
new Filter("Tilde", "fa-house", SearchProfile.TILDE, parameters)
),
List.of(
new Filter("Wiki", SearchProfile.WIKI, parameters),
new Filter("Forum", SearchProfile.FORUM, parameters),
new Filter("Docs", SearchProfile.DOCS, parameters),
new Filter("Recipes", SearchProfile.FOOD, parameters)
new Filter("Wikis", "fa-pencil", SearchProfile.WIKI, parameters),
new Filter("Forums", "fa-comments", SearchProfile.FORUM, parameters),
new Filter("Recipes", "fa-utensils", SearchProfile.FOOD, parameters)
)
);
}
public class RemoveJsOption {
public class RemoveJsOption implements SearchOption {
private final SearchJsParameter value;
private final String icon = "fa-wrench";
public final String url;
public String value() {
return this.value.name();
}
public String getUrl() {
return url;
}
public String id() {
return getClass().getSimpleName();
}
public boolean isSet() {
return value.equals(SearchJsParameter.DENY_JS);
}
public String icon() {
return icon;
}
public String name() {
return "Remove Javascript";
}
@ -105,14 +129,26 @@ public class SearchFilters {
default -> SearchJsParameter.DENY_JS;
};
this.url = parameters.withJs(toggledValue).renderUrl(SearchFilters.this.url);
this.url = parameters.withJs(toggledValue).renderUrl();
}
}
public class ReduceAdtechOption {
public class ReduceAdtechOption implements SearchOption {
private final SearchAdtechParameter value;
private final String icon = "fa-dumpster-fire";
public final String url;
public String value() {
return this.value.name();
}
public String id() {
return getClass().getSimpleName();
}
public String icon() {
return icon;
}
public String getUrl() {
return url;
}
@ -133,17 +169,28 @@ public class SearchFilters {
default -> SearchAdtechParameter.REDUCE;
};
this.url = parameters.withAdtech(toggledValue).renderUrl(SearchFilters.this.url);
this.url = parameters.withAdtech(toggledValue).renderUrl();
}
}
public class ShowRecentOption {
public class ShowRecentOption implements SearchOption {
private final SearchRecentParameter value;
private final String icon = "fa-baby";
public String value() {
return this.value.name();
}
public final String url;
public String getUrl() {
return url;
}
public String id() {
return getClass().getSimpleName();
}
public String icon() {
return icon;
}
public boolean isSet() {
return value.equals(SearchRecentParameter.RECENT);
@ -161,14 +208,26 @@ public class SearchFilters {
default -> SearchRecentParameter.RECENT;
};
this.url = parameters.withRecent(toggledValue).renderUrl(SearchFilters.this.url);
this.url = parameters.withRecent(toggledValue).renderUrl();
}
}
public class SearchTitleOption {
public class SearchTitleOption implements SearchOption {
private final SearchTitleParameter value;
public String icon = "fa-angle-up";
public final String url;
public String value() {
return this.value.name();
}
public String id() {
return getClass().getSimpleName();
}
public String icon() {
return icon;
}
public String getUrl() {
return url;
}
@ -189,23 +248,34 @@ public class SearchFilters {
default -> SearchTitleParameter.TITLE;
};
this.url = parameters.withTitle(toggledValue).renderUrl(SearchFilters.this.url);
this.url = parameters.withTitle(toggledValue).renderUrl();
}
}
public interface SearchOption {
String name();
boolean isSet();
String getUrl();
String icon();
String id();
String value();
}
public class Filter {
public final String icon;
public final SearchProfile profile;
public final String displayName;
public final boolean current;
public final String url;
public Filter(String displayName, SearchProfile profile, SearchParameters parameters) {
public Filter(String displayName, String icon, SearchProfile profile, SearchParameters parameters) {
this.displayName = displayName;
this.icon = icon;
this.profile = profile;
this.current = profile.equals(parameters.profile());
this.url = parameters.withProfile(profile).renderUrl(SearchFilters.this.url);
this.url = parameters.withProfile(profile).renderUrl();
}
public String getDisplayName() {

View File

@ -1,9 +1,9 @@
package nu.marginalia.search.model;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.model.crawl.HtmlFeature;
import nu.marginalia.api.searchquery.model.query.SearchQuery;
import nu.marginalia.api.searchquery.model.query.SearchSetIdentifier;
import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.model.crawl.HtmlFeature;
import java.util.Objects;

View File

@ -0,0 +1,13 @@
package nu.marginalia.search.model;
import java.util.List;
public class SimpleSearchResults {
public final List<UrlDetails> results;
public final List<ResultsPage> resultPages;
public SimpleSearchResults(List<UrlDetails> results, List<ResultsPage> resultPages) {
this.results = results;
this.resultPages = resultPages;
}
}

View File

@ -5,6 +5,7 @@ import nu.marginalia.api.searchquery.model.results.SearchResultKeywordScore;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.model.crawl.DomainIndexingState;
import nu.marginalia.model.crawl.HtmlFeature;
import nu.marginalia.model.idx.DocumentMetadata;
import java.util.ArrayList;
import java.util.List;
@ -29,12 +30,14 @@ public class UrlDetails implements Comparable<UrlDetails> {
public int resultsFromSameDomain;
public int topology;
public String positions;
public long positionsMask;
public int positionsCount;
public SearchResultItem resultItem;
public List<SearchResultKeywordScore> keywordScores;
public UrlDetails(long id, int domainId, EdgeUrl url, String title, String description, String format, int features, DomainIndexingState domainState, double termScore, int resultsFromSameDomain, String positions, int positionsCount, SearchResultItem resultItem, List<SearchResultKeywordScore> keywordScores) {
public UrlDetails(long id, int domainId, EdgeUrl url, String title, String description, String format, int features, DomainIndexingState domainState, double termScore, int resultsFromSameDomain, String positions, long positionsMask, int positionsCount, SearchResultItem resultItem, List<SearchResultKeywordScore> keywordScores) {
this.id = id;
this.domainId = domainId;
this.url = url;
@ -47,6 +50,8 @@ public class UrlDetails implements Comparable<UrlDetails> {
this.resultsFromSameDomain = resultsFromSameDomain;
this.positions = positions;
this.positionsCount = positionsCount;
this.positionsMask = positionsMask;
this.topology = DocumentMetadata.decodeTopology(resultItem.encodedDocMetadata);
this.resultItem = resultItem;
this.keywordScores = keywordScores;
}
@ -227,67 +232,11 @@ public class UrlDetails implements Comparable<UrlDetails> {
return this.keywordScores;
}
public UrlDetails withId(long id) {
return this.id == id ? this : new UrlDetails(id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
}
public UrlDetails withDomainId(int domainId) {
return this.domainId == domainId ? this : new UrlDetails(this.id, domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
}
public UrlDetails withUrl(EdgeUrl url) {
return this.url == url ? this : new UrlDetails(this.id, this.domainId, url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
}
public UrlDetails withTitle(String title) {
return this.title == title ? this : new UrlDetails(this.id, this.domainId, this.url, title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
}
public UrlDetails withDescription(String description) {
return this.description == description ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
}
public UrlDetails withFormat(String format) {
return this.format == format ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
}
public UrlDetails withFeatures(int features) {
return this.features == features ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
}
public UrlDetails withDomainState(DomainIndexingState domainState) {
return this.domainState == domainState ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
}
public UrlDetails withTermScore(double termScore) {
return this.termScore == termScore ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
}
public UrlDetails withResultsFromSameDomain(int resultsFromSameDomain) {
return this.resultsFromSameDomain == resultsFromSameDomain ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, this.keywordScores);
}
public UrlDetails withPositions(String positions) {
return this.positions == positions ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, positions, this.positionsCount, this.resultItem, this.keywordScores);
}
public UrlDetails withPositionsCount(int positionsCount) {
return this.positionsCount == positionsCount ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, positionsCount, this.resultItem, this.keywordScores);
}
public UrlDetails withResultItem(SearchResultItem resultItem) {
return this.resultItem == resultItem ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, resultItem, this.keywordScores);
}
public UrlDetails withKeywordScores(List<SearchResultKeywordScore> keywordScores) {
return this.keywordScores == keywordScores ? this : new UrlDetails(this.id, this.domainId, this.url, this.title, this.description, this.format, this.features, this.domainState, this.termScore, this.resultsFromSameDomain, this.positions, this.positionsCount, this.resultItem, keywordScores);
}
public String toString() {
return "UrlDetails(id=" + this.getId() + ", domainId=" + this.getDomainId() + ", url=" + this.getUrl() + ", title=" + this.getTitle() + ", description=" + this.getDescription() + ", format=" + this.getFormat() + ", features=" + this.getFeatures() + ", domainState=" + this.getDomainState() + ", termScore=" + this.getTermScore() + ", resultsFromSameDomain=" + this.getResultsFromSameDomain() + ", positions=" + this.getPositions() + ", positionsCount=" + this.getPositionsCount() + ", resultItem=" + this.getResultItem() + ", keywordScores=" + this.getKeywordScores() + ")";
}
public static record UrlProblem(String name, String description) {
public record UrlProblem(String name, String description) {
}
}

View File

@ -9,12 +9,14 @@ import nu.marginalia.browse.model.BrowseResultSet;
import nu.marginalia.db.DbDomainQueries;
import nu.marginalia.db.DomainBlacklist;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.search.JteRenderer;
import nu.marginalia.search.model.NavbarModel;
import nu.marginalia.search.results.BrowseResultCleaner;
import spark.Request;
import spark.Response;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
@ -26,6 +28,7 @@ public class SearchBrowseService {
private final DbDomainQueries domainQueries;
private final DomainBlacklist blacklist;
private final DomainInfoClient domainInfoClient;
private final JteRenderer jteRenderer;
private final BrowseResultCleaner browseResultCleaner;
@Inject
@ -33,15 +36,43 @@ public class SearchBrowseService {
DbDomainQueries domainQueries,
DomainBlacklist blacklist,
DomainInfoClient domainInfoClient,
JteRenderer jteRenderer,
BrowseResultCleaner browseResultCleaner)
{
this.randomDomains = randomDomains;
this.domainQueries = domainQueries;
this.blacklist = blacklist;
this.domainInfoClient = domainInfoClient;
this.jteRenderer = jteRenderer;
this.browseResultCleaner = browseResultCleaner;
}
public String handleBrowseRandom(Request request, Response response) throws IOException {
return jteRenderer.render("explore/main.jte",
Map.of("navbar", NavbarModel.EXPLORE,
"results", getRandomEntries(1)
)
);
}
public String handleBrowseSite(Request request, Response response) throws Exception {
String domainName = request.params("site");
BrowseResultSet entries;
try {
entries = getRelatedEntries(domainName);
}
catch (Exception ex) {
entries = new BrowseResultSet(List.of(), domainName);
}
return jteRenderer.render("explore/main.jte",
Map.of("navbar", NavbarModel.EXPLORE,
"results", entries
)
);
}
public BrowseResultSet getRandomEntries(int set) {
List<BrowseResult> results = randomDomains.getRandomDomains(25, blacklist, set);

View File

@ -4,6 +4,7 @@ import com.google.inject.Inject;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.search.SearchOperator;
import nu.marginalia.search.model.SimpleSearchResults;
import nu.marginalia.search.model.UrlDetails;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
@ -42,10 +43,10 @@ public class SearchCrosstalkService {
parts[i] = parts[i].trim();
}
var resAtoB = searchOperator.doLinkSearch(parts[0], parts[1]);
var resBtoA = searchOperator.doLinkSearch(parts[1], parts[0]);
SimpleSearchResults resAtoB = searchOperator.doLinkSearch(parts[0], parts[1]);
SimpleSearchResults resBtoA = searchOperator.doLinkSearch(parts[1], parts[0]);
var model = new CrosstalkResult(parts[0], parts[1], resAtoB, resBtoA);
CrosstalkResult model = new CrosstalkResult(parts[0], parts[1], resAtoB.results, resBtoA.results);
return renderer.render(model);
}

View File

@ -1,9 +1,12 @@
package nu.marginalia.search.svc;
import com.google.inject.Inject;
import nu.marginalia.index.api.IndexMqClient;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.search.JteRenderer;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.model.NavbarModel;
import nu.marginalia.search.model.SearchErrorMessageModel;
import nu.marginalia.search.model.SearchFilters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
@ -13,35 +16,39 @@ import java.io.IOException;
import java.util.Map;
public class SearchErrorPageService {
private final IndexMqClient indexMqClient;
private final WebsiteUrl websiteUrl;
private final JteRenderer jteRenderer;
private final Logger logger = LoggerFactory.getLogger(getClass());
private final MustacheRenderer<Object> renderer;
@Inject
public SearchErrorPageService(IndexMqClient indexMqClient,
RendererFactory rendererFactory) throws IOException {
renderer = rendererFactory.renderer("search/error-page-search");
this.indexMqClient = indexMqClient;
public SearchErrorPageService(WebsiteUrl websiteUrl,
JteRenderer jteRenderer) throws IOException {
this.websiteUrl = websiteUrl;
this.jteRenderer = jteRenderer;
}
public void serveError(Request request, Response rsp) {
rsp.body(renderError(request, "Internal error",
var params = SearchParameters.forRequest(
request.queryParamOrDefault("query", ""),
websiteUrl,
request);
rsp.body(jteRenderer.render("serp/error.jte",
Map.of("navbar", NavbarModel.LIMBO,
"model", new SearchErrorMessageModel(
"An error occurred when communicating with the search engine index.",
"""
An error occurred when communicating with the search engine index.
<p>
This is hopefully a temporary state of affairs. It may be due to
an upgrade. The index typically takes a about two or three minutes
to reload from a cold restart. Thanks for your patience.
"""));
}
private String renderError(Request request, String title, String message) {
return renderer.render(Map.of("title", title, "message", message,
"profile", request.queryParamOrDefault("profile", ""),
"js", request.queryParamOrDefault("js", ""),
"query", request.queryParamOrDefault("query", "")
""",
params,
new SearchFilters(params)
)
)
));
}
}

View File

@ -19,7 +19,7 @@ public class SearchFlagSiteService {
private final CategoryItem unknownCategory = new CategoryItem("unknown", "Unknown");
private final List<CategoryItem> categories =
public static final List<CategoryItem> categories =
List.of(
new CategoryItem("spam", "Spam"),
new CategoryItem("freebooting", "Reposting Stolen Content"),

View File

@ -3,9 +3,10 @@ package nu.marginalia.search.svc;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.search.svc.SearchQueryCountService;
import nu.marginalia.search.JteRenderer;
import nu.marginalia.search.model.NavbarModel;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
@ -19,34 +20,41 @@ import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/** Renders the front page (index) */
@Singleton
public class SearchFrontPageService {
private final MustacheRenderer<IndexModel> template;
private final HikariDataSource dataSource;
private final JteRenderer jteRenderer;
private final SearchQueryCountService searchVisitorCount;
private final WebsiteUrl websiteUrl;
private final Logger logger = LoggerFactory.getLogger(getClass());
@Inject
public SearchFrontPageService(RendererFactory rendererFactory,
HikariDataSource dataSource,
SearchQueryCountService searchVisitorCount
JteRenderer jteRenderer,
SearchQueryCountService searchVisitorCount, WebsiteUrl websiteUrl
) throws IOException {
this.template = rendererFactory.renderer("search/index/index");
this.dataSource = dataSource;
this.jteRenderer = jteRenderer;
this.searchVisitorCount = searchVisitorCount;
this.websiteUrl = websiteUrl;
}
public String render(Request request, Response response) {
response.header("Cache-control", "public,max-age=3600");
return template.render(new IndexModel(
getNewsItems(),
searchVisitorCount.getQueriesPerMinute()
));
return jteRenderer.render("serp/first.jte",
Map.of("navbar", NavbarModel.SEARCH, "websiteUrl", websiteUrl)
);
// return template.render(new IndexModel(
// getNewsItems(),
// searchVisitorCount.getQueriesPerMinute()
// ));
}

View File

@ -50,7 +50,7 @@ public class SearchQueryService {
throw new RedirectException(websiteUrl.url());
}
return new SearchParameters(queryParam.trim(), request);
return SearchParameters.forRequest(queryParam.trim(), websiteUrl, request);
}
catch (Exception ex) {
// Bots keep sending bad requests, suppress the error otherwise it will

View File

@ -1,6 +1,7 @@
package nu.marginalia.search.svc;
import com.google.inject.Inject;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.api.domains.DomainInfoClient;
import nu.marginalia.api.domains.model.DomainInformation;
import nu.marginalia.api.domains.model.SimilarDomain;
@ -13,7 +14,11 @@ import nu.marginalia.model.EdgeDomain;
import nu.marginalia.renderer.MustacheRenderer;
import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.screenshot.ScreenshotService;
import nu.marginalia.search.JteRenderer;
import nu.marginalia.search.SearchOperator;
import nu.marginalia.search.model.GroupedUrlDetails;
import nu.marginalia.search.model.NavbarModel;
import nu.marginalia.search.model.ResultsPage;
import nu.marginalia.search.model.UrlDetails;
import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData;
import org.slf4j.Logger;
@ -23,8 +28,7 @@ import spark.Response;
import java.io.IOException;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;
import java.util.*;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
@ -42,6 +46,9 @@ public class SearchSiteInfoService {
private final LiveCaptureClient liveCaptureClient;
private final ScreenshotService screenshotService;
private final HikariDataSource dataSource;
private final JteRenderer jteRenderer;
@Inject
public SearchSiteInfoService(SearchOperator searchOperator,
DomainInfoClient domainInfoClient,
@ -50,7 +57,9 @@ public class SearchSiteInfoService {
DbDomainQueries domainQueries,
FeedsClient feedsClient,
LiveCaptureClient liveCaptureClient,
ScreenshotService screenshotService) throws IOException
ScreenshotService screenshotService,
HikariDataSource dataSource,
JteRenderer jteRenderer) throws IOException
{
this.searchOperator = searchOperator;
this.domainInfoClient = domainInfoClient;
@ -62,6 +71,32 @@ public class SearchSiteInfoService {
this.feedsClient = feedsClient;
this.liveCaptureClient = liveCaptureClient;
this.screenshotService = screenshotService;
this.dataSource = dataSource;
this.jteRenderer = jteRenderer;
}
public Object handleOverview(Request request, Response response) {
List<SiteOverviewModel.DiscoveredDomain> domains = new ArrayList<>();
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, DISCOVER_DATE FROM EC_DOMAIN WHERE NODE_AFFINITY = 0 ORDER BY ID DESC LIMIT 10")) {
var rs = stmt.executeQuery();
while (rs.next()) {
domains.add(new SiteOverviewModel.DiscoveredDomain(rs.getString("DOMAIN_NAME"), rs.getString("DISCOVER_DATE")));
}
}
catch (SQLException ex) {
throw new RuntimeException();
}
return jteRenderer.render("siteinfo/start.jte",
Map.of("navbar", NavbarModel.SITEINFO,
"model", new SiteOverviewModel(domains)));
}
public record SiteOverviewModel(List<DiscoveredDomain> domains) {
public record DiscoveredDomain(String name, String timestamp) {}
}
public Object handle(Request request, Response response) throws SQLException {
@ -72,15 +107,18 @@ public class SearchSiteInfoService {
return null;
}
var model = switch (view) {
case "links" -> listLinks(domainName);
case "docs" -> listDocs(domainName);
int page = Integer.parseInt(request.queryParamOrDefault("page", "1"));
SiteInfoModel model = switch (view) {
case "links" -> listLinks(domainName, page);
case "docs" -> listDocs(domainName, page);
case "info" -> listInfo(domainName);
case "report" -> reportSite(domainName);
default -> listInfo(domainName);
};
return renderer.render(model);
return jteRenderer.render("siteinfo/main.jte",
Map.of("model", model, "navbar", NavbarModel.SITEINFO));
}
public Object handlePost(Request request, Response response) throws SQLException {
@ -108,10 +146,11 @@ public class SearchSiteInfoService {
var model = new ReportDomain(domainName, domainId, complaints, List.of(), true);
return renderer.render(model);
return jteRenderer.render("siteinfo/main.jte",
Map.of("model", model, "navbar", NavbarModel.SITEINFO));
}
private Object reportSite(String domainName) throws SQLException {
private ReportDomain reportSite(String domainName) throws SQLException {
int domainId = domainQueries.getDomainId(new EdgeDomain(domainName));
var existingComplaints = flagSiteService.getExistingComplaints(domainId);
@ -123,15 +162,20 @@ public class SearchSiteInfoService {
}
private Backlinks listLinks(String domainName) {
private Backlinks listLinks(String domainName, int page) {
var results = searchOperator.doBacklinkSearch(domainName, page);
return new Backlinks(domainName,
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
searchOperator.doBacklinkSearch(domainName));
GroupedUrlDetails.groupResults(results.results),
results.resultPages
);
}
private SiteInfoWithContext listInfo(String domainName) {
final int domainId = domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1);
var domain = new EdgeDomain(domainName);
final int domainId = domainQueries.tryGetDomainId(domain).orElse(-1);
boolean viableAliasDomain = domain.aliasDomain().map(alias -> domainQueries.tryGetDomainId(alias).isPresent()).orElse(false);
final Future<DomainInformation> domainInfoFuture;
final Future<List<SimilarDomain>> similarSetFuture;
@ -161,12 +205,13 @@ public class SearchSiteInfoService {
feedItemsFuture = feedsClient.getFeed(domainId);
}
List<UrlDetails> sampleResults = searchOperator.doSiteSearch(domainName, domainId,5);
List<UrlDetails> sampleResults = searchOperator.doSiteSearch(domainName, domainId,5, 1).results;
if (!sampleResults.isEmpty()) {
url = sampleResults.getFirst().url.withPathAndParam("/", null).toString();
}
var result = new SiteInfoWithContext(domainName,
viableAliasDomain ? domain.aliasDomain().map(EdgeDomain::toString) : Optional.empty(),
domainId,
url,
hasScreenshot,
@ -240,20 +285,21 @@ public class SearchSiteInfoService {
.build();
}
private Docs listDocs(String domainName) {
private Docs listDocs(String domainName, int page) {
int domainId = domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1);
var results = searchOperator.doSiteSearch(domainName, domainId, 100, page);
return new Docs(domainName,
domainQueries.tryGetDomainId(new EdgeDomain(domainName)).orElse(-1),
searchOperator.doSiteSearch(domainName, domainId, 100));
results.results.stream().sorted(Comparator.comparing(deets -> -deets.topology)).toList(),
results.resultPages
);
}
public record Docs(Map<String, Boolean> view,
String domain,
public record Docs(String domain,
long domainId,
List<UrlDetails> results) {
public Docs(String domain, long domainId, List<UrlDetails> results) {
this(Map.of("docs", true), domain, domainId, results);
}
List<UrlDetails> results,
List<ResultsPage> pages) implements SiteInfoModel {
public String focusDomain() { return domain; }
@ -264,11 +310,12 @@ public class SearchSiteInfoService {
}
}
public record Backlinks(Map<String, Boolean> view, String domain, long domainId, List<UrlDetails> results) {
public Backlinks(String domain, long domainId, List<UrlDetails> results) {
this(Map.of("links", true), domain, domainId, results);
}
public record Backlinks(String domain,
long domainId,
List<GroupedUrlDetails> results,
List<ResultsPage> pages
) implements SiteInfoModel
{
public String query() { return "links:" + domain; }
public boolean isKnown() {
@ -276,9 +323,12 @@ public class SearchSiteInfoService {
}
}
public record SiteInfoWithContext(Map<String, Boolean> view,
Map<String, Boolean> domainState,
String domain,
public interface SiteInfoModel {
String domain();
}
public record SiteInfoWithContext(String domain,
Optional<String> aliasDomain,
int domainId,
String siteUrl,
boolean hasScreenshot,
@ -286,68 +336,12 @@ public class SearchSiteInfoService {
List<SimilarDomain> similar,
List<SimilarDomain> linking,
FeedItems feed,
List<UrlDetails> samples
) {
public SiteInfoWithContext(String domain,
int domainId,
String siteUrl,
boolean hasScreenshot,
DomainInformation domainInformation,
List<SimilarDomain> similar,
List<SimilarDomain> linking,
FeedItems feedInfo,
List<UrlDetails> samples
)
List<UrlDetails> samples)
implements SiteInfoModel
{
this(Map.of("info", true),
Map.of(domainInfoState(domainInformation), true),
domain,
domainId,
siteUrl,
hasScreenshot,
domainInformation,
similar,
linking,
feedInfo,
samples);
}
public String getLayout() {
// My CSS is too weak to handle this in CSS alone, so I guess we're doing layout in Java...
if (similar != null && similar.size() < 25) {
return "lopsided";
}
else if (feed != null && !feed.items().isEmpty()) {
return "lopsided";
}
else if (samples != null && !samples.isEmpty()) {
return "lopsided";
}
else {
return "balanced";
}
}
public String query() { return "site:" + domain; }
private static String domainInfoState(DomainInformation info) {
if (info.isBlacklisted()) {
return "blacklisted";
}
if (!info.isUnknownDomain() && info.isSuggestForCrawling()) {
return "suggestForCrawling";
}
if (info.isInCrawlQueue()) {
return "inCrawlQueue";
}
if (info.isUnknownDomain()) {
return "unknownDomain";
}
else {
return "indexed";
}
}
public boolean isKnown() {
return domainId > 0;
}
@ -391,21 +385,12 @@ public class SearchSiteInfoService {
}
public record ReportDomain(
Map<String, Boolean> view,
String domain,
int domainId,
List<SearchFlagSiteService.FlagSiteComplaintModel> complaints,
List<SearchFlagSiteService.CategoryItem> category,
boolean submitted)
boolean submitted) implements SiteInfoModel
{
public ReportDomain(String domain,
int domainId,
List<SearchFlagSiteService.FlagSiteComplaintModel> complaints,
List<SearchFlagSiteService.CategoryItem> category,
boolean submitted) {
this(Map.of("report", true), domain, domainId, complaints, category, submitted);
}
public String query() { return "site:" + domain; }
public boolean isKnown() {

View File

@ -0,0 +1,58 @@
@import nu.marginalia.browse.model.BrowseResult
@import nu.marginalia.search.model.NavbarModel
@import nu.marginalia.browse.model.BrowseResultSet
@param NavbarModel navbar
@param BrowseResultSet results
<!DOCTYPE html>
<html lang="en">
@template.part.head(title = "Marginalia Search - Explore")
<body class="min-h-screen bg-slate-100 font-sans" >
@template.part.navbar(navbar = navbar)
<header class="border-gray-300 bg-white shadow-md">
<div class="max-w-[1400px] mx-auto px-4 py-4">
<div class="flex items-center space-x-2">
<h1 class="text-base md:text-xl mr-2 md:mr-8 font-serif">Explore Websites</h1>
</div>
</div>
</header>
<div class="max-w-[1400px] mx-auto flex flex-col gap-1 place-items-center">
<div class="border bg-white my-4 p-3 rounded overflow-hidden flex flex-col space-y-4">
@if (results.hasFocusDomain())
<div class="flex space-x-1">
<span>Showing websites similar to <a class="font-mono text-liteblue" href="/site/${results.focusDomain()}"><i class="fas fa-globe"></i> <span class="underline">${results.focusDomain()}</span></a></span>
<span class="grow w-32"></span>
<a href="/explore/random" title="Return to random results"><i class="fas fa-close"></i></a>
</div>
@else
Showing a random selection of websites. Refresh the website to get a new selection.
@endif
</div>
<div class="grid-cols-1 gap-4 sm:grid sm:grid-cols-2 md:gap-8 md:grid-cols-3 xl:grid-cols-4 mx-auto p-4">
@for (BrowseResult result : results.results())
<div class="bg-white border rounded overflow-hidden m-4 sm:m-0">
<div class="bg-margeblue text-white p-2 flex space-x-4 text-sm">
<span class="break-words">${result.displayDomain()}</span>
<div class="grow"></div>
<a href="/site/${result.displayDomain()}" title="Show website information"><i class="fas fa-info"></i></a>
<a href="/explore/${result.displayDomain()}" title="Show results similar to this website"><i class="fas fa-shuffle"></i></a>
</div>
<a href="${result.url().toString()}">
<img class="p-2" width="800" height="600" src="/screenshot/${result.domainId()}">
</a>
</div>
@endfor
</div>
</div>
@template.part.footerLegal()
</body>
</html>

View File

@ -0,0 +1,36 @@
<footer class="max-w-[1400px] mx-auto bg-white p-5 text-sm border-gray-300 border rounded flex my-5 md:space-x-8 space-y-4 md:flex-row flex-col">
<div class="flex space-y-4 flex-col">
<p class="text-base"><i class="fas fa-briefcase mr-2 text-margeblue"></i>Policies</p>
<span class="text-sm text-slate-900">
This website complies with the GDPR by not collecting any personal information,
and with the EU Cookie Directive by not using cookies for any purpose other than
to provide service functionality.
</span>
<span>
Access logs containing IP-addresses are retained for up to 24 hours,
anonymized logs with source addresses removed are sometimes kept longer
for to help diagnosing bugs.
</span>
</div>
<div class="flex space-y-4 flex-col">
<p class="text-base"><i class="fas fa-envelope mr-2 text-margeblue"></i>Contact</p>
<span class="text-sm text-slate-900">
You can reach the webmaster of the search engine at <a class="underline text-liteblue" href="mailto:kontakt@marginalia.nu">kontakt@marginalia.nu</a>.
</span>
<p class="text-base"><i class="fas fa-gear mr-2 text-margeblue"></i>Sources</p>
<span class="text-sm text-slate-900">
The search engine is open source with an AGPL license. The sources can be perused at
<a class="underline text-liteblue" rel="external noopener nofollow" href="https://git.marginalia.nu/">https://git.marginalia.nu/</a>.
</span>
</div>
<div class="flex space-y-4 flex-col">
<p class="text-base"><i class="fas fa-database mr-2 text-margeblue"></i>Data</p>
<span class="text-sm text-slate-900">
IP geolocation is sourced from the IP2Location LITE data available from
<a rel="external noopener nofollow" class="underline text-liteblue" href="https://lite.ip2location.com/">https://lite.ip2location.com/</a>
under
<a rel="external noopener nofollow" class="underline text-liteblue" href="https://creativecommons.org/licenses/by-sa/4.0/">CC-BY-SA&nbsp;4.0</a>.
</span>
</div>
</footer>

View File

@ -0,0 +1,30 @@
@param String title
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Marginalia Search Engine - ${title}</title>
<script src="https://cdn.tailwindcss.com"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.5.1/css/all.min.css">
<script>
tailwind.config = {
theme: {
extend: {
colors: {
nicotine: '#f8f8ee',
margeblue: '#3e5f6f',
liteblue: '#0066cc',
}
},
screens: {
'xs': '440px',
'sm': '640px',
'md': '768px',
'lg': '1024px',
'xl': '1280px',
'2xl': '1536px',
},
}
}
</script>
</head>

View File

@ -0,0 +1,33 @@
@import nu.marginalia.search.model.NavbarModel
@param NavbarModel navbar
<header class="bg-margeblue text-white">
<div class="max-w-[1400px] mx-auto">
<nav>
<div class="px-4">
<div class="flex flex-row xs:text-sm text-xs gap-1 xs:gap-2">
@for (var item : navbar.first().entries())
<a href="${item.url()}" class="has-[:checked]:underline py-2 px-2 hover:text-slate-200">
@if (item.active())
<input type="checkbox" checked readonly class="sr-only" />
@endif
${item.name()}
</a>
@endfor
<div class="grow"></div>
@for (var item : navbar.second().entries())
<a href="${item.url()}" class="has-[:checked]:underline py-2 px-2 hover:text-slate-200">
@if (item.active())
<input type="checkbox" checked readonly class="sr-only" />
@endif
${item.name()}
</a>
@endfor
</div>
</div>
</nav>
</div>
</header>

View File

@ -0,0 +1,48 @@
@import nu.marginalia.search.model.NavbarModel
@import nu.marginalia.search.model.SearchErrorMessageModel
@param SearchErrorMessageModel model
@param NavbarModel navbar
<!DOCTYPE html>
<html lang="en">
@template.part.head(title = "Marginalia Search - Error")
<body class="min-h-screen bg-gray-50 font-sans" >
@template.part.navbar(navbar = navbar)
<header class="border-b border-gray-300 bg-white">
<div class="max-w-[1400px] mx-auto px-4 py-4">
<div class="flex items-center">
<h1 class="text-xl mr-8 font-serif">Marginalia Search</h1>
@template.serp.part.searchform(query = model.parameters().query(), profile = model.parameters().profileStr(), filters = model.filters())
</div>
</div>
</header>
<div class="max-w-[1400px] mx-auto flex gap-6">
<!-- Main content -->
<main class="flex-1 p-4 max-w-2xl space-y-4">
<div class="border rounded bg-white text-black text-m p-4">
<div class="flex space-x-3 place-items-baseline">
<i class="fa fa-circle-exclamation text-red-800"></i>
<div class="grow">${model.errorTitle()}</div>
</div>
@if (!model.errorRest().isBlank())
<p class="pt-5 text-gray-800 text-sm">
${model.errorRest()}
</p>
@endif
</div>
</main>
</div>
@template.part.footerLegal()
</body>
</html>

View File

@ -0,0 +1,69 @@
@import nu.marginalia.WebsiteUrl
@import nu.marginalia.search.model.NavbarModel
@import nu.marginalia.search.model.SearchFilters
@import nu.marginalia.search.model.SearchProfile
@param NavbarModel navbar
@param WebsiteUrl websiteUrl
<!DOCTYPE html>
<html lang="en">
@template.part.head(title = "Marginalia Search")
<body class="min-h-screen bg-slate-100 font-sans" >
@template.part.navbar(navbar = navbar)
<header class="border-b border-gray-300 bg-white">
<div class="max-w-[1400px] mx-auto px-4 py-4">
<div class="flex items-center">
<h1 class="hidden sm:block text-xl mr-8 font-serif">Marginalia Search</h1>
@template.serp.part.searchform(query = "", profile = SearchProfile.NO_FILTER.filterId, filters = new SearchFilters(websiteUrl))
</div>
</div>
</header>
<div class="max-w-1000 mx-auto flex flex-row space-y-4 fill-w">
<div class="mx-auto flex flex-col sm:flex-row my-4 sm:space-x-2 space-y-2 sm:space-y-0 w-full md:w-auto px-2">
<div class="flex flex-col border rounded overflow-hidden bg-white p-6 space-y-3">
<div><i class="fas fa-sailboat mx-2 text-margeblue"></i>Explore the Web</div>
<ul class="list-disc ml-6 text-slate-700 text-xs leading-5">
<li>Prioritizes non-commercial content</li>
<li>Tools for both search and discovery</li>
<li>Find lost old websites</li>
</ul>
</div>
<div class="flex flex-col border rounded overflow-hidden bg-white p-6 space-y-3 ">
<div><i class="fas fa-hand-holding-hand mx-2 text-margeblue"></i>Open Source</div>
<ul class="list-disc ml-6 text-slate-700 text-xs leading-5">
<li>AGPL license</li>
<li>Custom index software</li>
<li>Custom crawler software</li>
</ul>
<div class="text-xs text-liteblue pt-4">
<i class="fas fa-link"></i>
<a href="https://git.marginalia.nu/" class="underline">Git Repository</a>
</div>
</div>
<div class="flex flex-col border rounded overflow-hidden bg-white p-6 space-y-3 ">
<div><i class="fas fa-lock mx-2 text-margeblue"></i> Privacy by default</div>
<ul class="list-disc ml-6 text-slate-700 text-xs leading-5">
<li>Filter out tracking and adtech</li>
<li>No user or search data shared with 3rd parties</li>
<li>No long-term retention of queries or IP addresses</li>
</ul>
<div class="text-xs text-liteblue pt-4">
<i class="fas fa-link"></i>
<a href="#" class="underline">Privacy Statement</a> <!-- TODO -->
</div>
</div>
</div>
</div>
@template.part.footerLegal()
</body>
</html>

View File

@ -0,0 +1,91 @@
@import nu.marginalia.search.model.DecoratedSearchResults
@import nu.marginalia.search.model.ClusteredUrlDetails
@import nu.marginalia.search.model.NavbarModel
@import nu.marginalia.search.model.ResultsPage
@param DecoratedSearchResults results
@param NavbarModel navbar
<!DOCTYPE html>
<html lang="en">
@template.part.head(title = "Marginalia Search - " + results.getQuery())
<body class="min-h-screen bg-slate-100 font-sans" >
@template.part.navbar(navbar = navbar)
<div>
<header class="border-b border-gray-300 bg-white sm:static sticky top-0 shadow-md">
<div class="max-w-[1400px] mx-auto px-4 py-2 sm:py-4">
<div class="flex items-center">
<h1 class="text-md sm:text-xl mr-8 font-serif whitespace-nowrap">Marginalia Search</h1>
<div class="fixed sm:static bottom-0 left-0 w-full p-5 sm:p-0 border sm:border-none bg-gray-50/90 backdrop-blur-sm sm:bg-white">
@template.serp.part.searchform(query = results.getParams().query(), profile = results.getProfile(), filters = results.getFilters())
</div>
<div class="grow"></div>
<div class="sticky top-0">
<button class="sm:hidden text-sm bg-margeblue text-white p-2 sm:p-3 rounded" id="filter-button">
<i class="fas fa-filter mr-3"></i>
Filters
</button>
</div>
</div>
</div>
</header>
<div class="max-w-[1400px] mx-auto flex gap-6">
<!-- Sidebar -->
@template.serp.part.sidebar(filters = results.getFilters())
<!-- Main content -->
<main class="flex-1 p-4 max-w-3xl space-y-4">
@if (results.hasFocusDomain())
<div class="border rounded flex space-x-4 bg-white text-gray-600 text-sm p-4 items-center">
<div>Showing results from <a class="underline text-liteblue" href="/site/${results.getFocusDomain()}">${results.getFocusDomain()}</a></div>
<div class="grow"></div>
<a href="${results.getParams().renderUrlWithoutSiteFocus()}" class="fa fa-remove"></a>
</div>
@endif
<div class="space-y-6">
<!-- Result items -->
@for(ClusteredUrlDetails details : results.results)
@template.serp.part.result(result = details, parameters = results.getParams(), domainSearch = results.hasFocusDomain())
@endfor
</div>
<!-- Pagination -->
@if (results.getResultPages().size() > 1)
<div class="mt-8 flex justify-center space-x-2 font-mono text-sm">
@for(ResultsPage page : results.getResultPages())
@if (page.current())
<a href="${results.getParams().withPage(page.number()).renderUrl()}" class="px-3 py-1 border border-gray-300 bg-gray-100">${page.number()}</a>
@else
<a href="${results.getParams().withPage(page.number()).renderUrl()}" class="px-3 py-1 bg-white border border-gray-300 hover:bg-gray-100">${page.number()}</a>
@endif
@endfor
</div>
@endif
</main>
</div>
</div>
@template.serp.part.footerHowto()
@template.part.footerLegal()
<%-- Put this last to not bother SR users with double menus --%>
@template.serp.part.mobile-menu(filters = results.getFilters())
</body>
<script lang="javascript">
document.getElementById("filter-button").addEventListener("click", function() {
document.getElementById("mobile-menu").classList.toggle("hidden");
});
document.getElementById("hide-filter-button").addEventListener("click", function() {
document.getElementById("mobile-menu").classList.toggle("hidden");
});
</script>
</html>

View File

@ -0,0 +1,91 @@
<footer class="max-w-[1400px] mx-auto text-slate-900 bg-white p-5 text-sm border-gray-300 border rounded flex my-5 md:space-x-8 space-y-4 md:flex-row flex-col leading-6">
<div class="flex space-y-4 flex-col flex-1">
<p class="text-base"><i class="fas fa-book mr-2 text-margeblue"></i>Syntax</p>
<p>This is a keyword-based search engine. When entering multiple search terms, the search engine will attempt to match them against documents where the terms occur in close proximity.</p>
<p>Search terms can be excluded with a hyphen.</p>
<p>While the search engine at present does not allow full text search, quotes can be used to specifically search for names or terms in the title. Using quotes will also cause the search engine to be as literal as possible in interpreting the query.</p>
<p>Parentheses can be used to add terms to the query without giving weight to the terms when ranking the search results.</p>
<p>Samples</p>
<dl>
<dt class="font-mono text-red-800">soup -chicken</dt>
<dd class="ml-4">Look for keywords that contain <span class="font-mono text-red-800">soup</span>, but not
<span class="font-mono text-red-800">chicken</span>.</dd>
<dt class="font-mono text-red-800">"keyboard"</dt>
<dd class="ml-4">Look for pages containing the exact word
<span class="font-mono text-red-800">keyboard</span>, not <span class="font-mono text-red-800">keyboards</span> or the like.</dd>
<dt class="font-mono text-red-800">"steve mcqueen"</dt>
<dd class="ml-4">Look for pages containing the exact words <span class="font-mono text-red-800">steve mcqueen</span>
in that order, with no words in between.</dd>
<dt class="font-mono text-red-800">apology (plato)</dt>
<dd class="ml-4">Look for pages containing <span class="font-mono text-red-800">apology</span> and <span class="font-mono text-slate-900">plato</span>, but only rank them
based on their relevance to <span class="font-mono text-red-800">apology</span></dd>
</dl>
<p class="text-base"><i class="fas fa-flag mr-2 text-margeblue"></i>Language Limitations</p>
<p>The search engine currently does not support any languages other than English. </p>
<p> Support for other languages is planned,
but not available right now. Adding support for additional languages and making it work well is somewhat time-consuming,
meanwhile having bad support for a language won't make anyone happy.
</p>
<p class="text-base"><i class="fas fa-server mr-2 text-margeblue"></i>Webmaster Information</p>
<p>If you wish to add your website to the index, follow the instructions in this <a class="underline text-liteblue" href="https://github.com/MarginaliaSearch/submit-site-to-marginalia-search">git repository</a>,
if you do not want to mess with git, you can also email <a class="underline text-liteblue" href="mailto:kontakt@marginalia.nu">kontakt@marginalia.nu</a> with the domain name.</p>
<p>The search engine's crawler uses the user-agent string <span class="font-mono text-red-800">search.marginalia.nu</span>, and requests come from the IPs indicated in
<a class="underline text-liteblue" href="https://search.marginalia.nu/crawler-ips.txt">https://search.marginalia.nu/crawler-ips.txt</a>.</p>
<p>If you do not want your website to be crawled, the search engine respects robots.txt. In case of questions, bug reports or concerns, email <a class="underline text-liteblue" href="mailto:kontakt@marginalia.nu">kontakt@marginalia.nu</a>.
</p>
</div>
<div class="flex space-y-4 flex-col flex-1">
<p class="text-base"><i class="fas fa-code mr-2 text-margeblue"></i>Special Keywords</p>
<table>
<thead>
<tr><th>Keyword</th><th>Meaning</th></tr>
</thead>
<tbody>
<tr><td>site:<em>example.com</em></td><td>Display site information about <em>example.com</em></td></tr>
<tr><td>site:<em>example.com</em> <em>keyword</em></td><td>Search <em>example.com</em> for <em>keyword</em></td></tr>
<tr><td>browse:<em>example.com</em></td><td>Show similar websites to <em>example.com</em></td></tr>
<tr><td>ip:<em>127.0.0.1</em></td><td>Search documents hosted at <em>127.0.0.1</em></td></tr>
<tr><td>links:<em>example.com</em></td><td>Search documents linking to <em>example.com</em></td></tr>
<tr><td>tld:<em>edu</em> <em>keyword</em></td><td>Search documents with the top level domain <em>edu</em>.</td></tr>
<tr><td>?tld:<em>edu</em> <em>keyword</em></td><td>Prefer but do not require results with the top level domain <em>edu</em>.
This syntax is also possible for links:..., ip:... and site:...</td></tr>
<tr><td>q&gt;5</td><td>The amount of javascript and modern features is at least 5 (on a scale 0 to 25)</td></tr>
<tr><td>q&lt;5</td><td>The amount of javascript and modern features is at most 5 (on a scale 0 to 25)</td></tr>
<tr><td>year&gt;2005</td><td>(beta) The document was ostensibly published in or after 2005</td></tr>
<tr><td>year=2005</td><td>(beta) The document was ostensibly published in 2005</td></tr>
<tr><td>year&lt;2005</td><td>(beta) The document was ostensibly published in or before 2005</td></tr>
<tr><td>rank&gt;50</td><td>The ranking of the website is at least 50 in a span of 1 - 255</td></tr>
<tr><td>rank&lt;50</td><td>The ranking of the website is at most 50 in a span of 1 - 255</td></tr>
<tr><td>count&gt;10</td><td> The search term must appear in at least 10 results form the domain</td></tr>
<tr><td>count&lt;10</td><td> The search term must appear in at most 10 results from the domain</td></tr>
<tr><td>format:html5</td><td>Filter documents using the HTML5 standard. This is typically modern websites.</td></tr>
<tr><td>format:xhtml</td><td>Filter documents using the XHTML standard</td></tr>
<tr><td>format:html123</td><td>Filter documents using the HTML standards 1, 2, and 3. This is typically very old websites. </td></tr>
<tr><td>generator:wordpress</td><td>Filter documents with the specified generator, in this case wordpress</td></tr>
<tr><td>file:zip</td><td>Filter documents containing a link to a zip file (most file-endings work)</td></tr>
<tr><td>file:audio</td><td>Filter documents containing a link to an audio file</td></tr>
<tr><td>file:video</td><td>Filter documents containing a link to a video file</td></tr>
<tr><td>file:archive</td><td>Filter documents containing a link to a compressed archive</td></tr>
<tr><td>file:document</td><td>Filter documents containing a link to a document</td></tr>
<tr><td>-special:media</td><td>Filter out documents with audio or video tags</td></tr>
<tr><td>-special:scripts</td><td>Filter out documents with javascript</td></tr>
<tr><td>-special:affiliate</td><td>Filter out documents with likely Amazon affiliate links</td></tr>
<tr><td>-special:tracking</td><td>Filter out documents with analytics or tracking code</td></tr>
<tr><td>-special:cookies</td><td>Filter out documents with cookies</td></tr>
</tbody>
</table>
</div>
</footer>

View File

@ -0,0 +1,24 @@
@import java.util.stream.IntStream
@param long mask
<svg width="40" height="40">
<circle
cx="18"
cy="18"
r="16"
fill="none"
stroke="#E5E7EB"
stroke-width="2"
/>
@for (int bit : IntStream.range(0, 56).filter(bit -> (mask & (1L << bit)) != 0).toArray())
<line
x1="${18 + 15*Math.sin(2 * Math.PI * bit / 56.)}"
y1="${18 - 15*Math.cos(2 * Math.PI * bit / 56.)}"
x2="${18 + 17*Math.sin(2 * Math.PI * bit / 56.)}"
y2="${18 - 17*Math.cos(2 * Math.PI * bit / 56.)}"
stroke="#3B82F6"
stroke-width="2"
/>
@endfor
</svg>

View File

@ -0,0 +1,50 @@
@import nu.marginalia.search.model.SearchFilters
@import java.util.List
@param SearchFilters filters
<aside id="mobile-menu" class="fixed inset-0 z-50 flex justify-center bg-white/90 backdrop-blur-sm hidden">
<button id="hide-filter-button" class="fixed top-5 right-5 bg-margeblue text-white p-3 rounded text-sm">
<i class="fas fa-close"></i>
</button>
<div class="flex mx-auto flex-col my-10 px-4">
<div class="text-xl my-4"><i class="fas fa-filter mr-2 text-margeblue"></i>Filters</div>
<div class="grid grid-cols-3 gap-4">
@for (List<SearchFilters.Filter> filterGroup : filters.getFilterGroups())
@for (SearchFilters.Filter filter : filterGroup)
<label class="flex items-center">
<button onclick="document.location='$unsafe{filter.url}'" class="flex-1 py-2 rounded flex flex-col place-items-center has-[:checked]:bg-gray-100 has-[:checked]:text-slate-900 hover:bg-gray-50 text-margeblue">
@if (filter.current)
<input type="checkbox" checked class="sr-only" aria-checked="true" />
@else
<input type="checkbox" class="sr-only" aria-checked="false" />
@endif
<i class="fas ${filter.icon} text-xl"></i>
<a tabindex="-1" href="$unsafe{filter.url}" class="text-sm" href="#">${filter.displayName}</a>
<div class="grow"></div>
</button>
</label>
@endfor
@endfor
</div>
<div class="text-xl my-4"><i class="fas fa-cog mr-2 text-margeblue"></i>Advanced Options</div>
<div class="grid grid-cols-3 gap-4">
@for (SearchFilters.SearchOption option : filters.searchOptions())
<label class="flex items-center">
<button onclick="document.location='$unsafe{option.getUrl()}'" class="flex-1 py-2 rounded flex flex-col place-items-center has-[:checked]:bg-gray-100 has-[:checked]:text-slate-900 hover:bg-gray-50 text-margeblue">
@if (option.isSet())
<input type="checkbox" checked class="sr-only" aria-checked="true" />
@else
<input type="checkbox" class="sr-only" aria-checked="false" />
@endif
<i class="fas ${option.icon()} text-xl"></i>
<a tabindex="-1" href="$unsafe{option.getUrl()}" class="text-sm" href="#">${option.name()}</a>
<div class="grow"></div>
</button>
</label>
@endfor
</div>
</div>
</aside>

View File

@ -0,0 +1,108 @@
@import nu.marginalia.model.idx.DocumentFlags
@import nu.marginalia.search.command.SearchParameters
@import nu.marginalia.search.model.ClusteredUrlDetails
@import nu.marginalia.search.model.UrlDetails
@param ClusteredUrlDetails result
@param SearchParameters parameters
@param boolean domainSearch
<div class="${result.colorScheme.backgroundColor} p-4 border border-gray-300 flex rounded">
<div class="flex flex-col grow">
<div class="flex">
<div class="flex flex-col grow" >
<div class="flex flex-row space-x-2 place-items-center">
<div class="flex-0">
@template.serp.part.matchogram(mask = result.first.positionsMask)
</div>
<div class="flex grow justify-between items-start">
<div class="flex-1">
<h2 class="text-xl ${result.colorScheme.textColor} font-serif mr-4 break-words">
<a href="${result.first.url.toString()}" rel="noopener noreferrer">${result.first.title}</a>
</h2>
<div class="text-sm mt-1 text-gray-600">
<a class="text-liteblue underline break-all" href="${result.first.url.toString()}"
rel="noopener noreferrer" tabindex="-1">${result.first.url.toString()}</a>
</div>
</div>
</div>
</div>
<p class="mt-2 text-sm text-slate-900 leading-relaxed">
${result.first.description}
</p>
</div>
@if (!domainSearch)
<div class="flex flex-col ml-5">
<a href="/site/${result.getDomain().toString()}" class="p-1.5 text-gray-600 hover:text-gray-900 rounded" title="About this domain">
<i class="fas fa-info text-sm"></i>
</a>
<div class="grow"></div>
<button class="p-1.5 text-gray-600 hover:bg-gray-100 rounded" title="Promote this domain">
<i class="fas fa-thumbs-up text-sm"></i>
</button>
<button class="p-1.5 text-gray-600 hover:bg-gray-100 rounded" title="Demote this domain">
<i class="fas fa-thumbs-down text-sm"></i>
</button>
</div>
@endif
</div>
@if (result.hasMultiple() && !domainSearch)
<div class="flex mt-2 text-sm flex flex-col space-y-2">
<p class="${result.colorScheme.textColor2} ${result.colorScheme.backgroundColor2} p-1 rounded">Also from ${result.getDomain().toString()}:</p>
<ul class="pl-2 mt-2 underline text-liteblue">
@for(UrlDetails item : result.rest)
<li class="-indent-4 pl-4 mb-1">
<a href="${item.url.toString()}" rel="noopener noreferrer">${item.title}</a>
</li>
@endfor
</ul>
</div>
@endif
<span class="flex space-x-1 flex-row text-xs mt-4">
@if (!domainSearch && result.remainingCount() > 0)
<span>
<a class="text-liteblue underline" href="${parameters.renderUrlWithSiteFocus(result.getDomain())}">${result.remainingCount()} more</a>
</span>
@endif
<div class="grow"></div>
@if (DocumentFlags.PlainText.isPresent(result.getFirst().resultItem.encodedDocMetadata))
<span class="px-1 bg-blue-100 text-blue-700 rounded">Plain text</span>
@endif
@if (DocumentFlags.GeneratorForum.isPresent(result.getFirst().resultItem.encodedDocMetadata))
<span class="px-1 bg-blue-100 text-blue-700 rounded">Forum</span>
@endif
@if (DocumentFlags.GeneratorWiki.isPresent(result.getFirst().resultItem.encodedDocMetadata))
<span class="px-1 bg-blue-100 text-blue-700 rounded">Wiki</span>
@endif
@if(result.getFirst().isCookies())
<span class="px-1 bg-yellow-100 text-yellow-700 rounded" title="Contains Cookies">Cookies</span>
@endif
@if(result.getFirst().isTracking())
<span class="px-1 bg-yellow-100 text-yellow-700 rounded" title="Uses tracking scripts">Track</span>
@endif
@if(result.getFirst().isScripts())
<span class="px-1 bg-yellow-100 text-yellow-700 rounded" title="Contains JavaScript">JS</span>
@endif
@if(result.getFirst().isAds())
<span class="px-1 bg-red-100 text-red-700 rounded" title="Contains adtech">Ads</span>
@endif
@if(result.getFirst().isAffiliate())
<span class="px-1 bg-red-100 text-red-700 rounded" title="Contains Affiliate Link">Affiliate</span>
@endif
</span>
</div>
</div>

View File

@ -0,0 +1,41 @@
@import nu.marginalia.search.model.SearchFilters
@param String query
@param String profile
@param SearchFilters filters
<form class="flex-1 max-w-2xl" action="/search">
<div class="flex">
@if (query.isBlank())
<%-- Add autofocus if the query is blank --%>
<input type="text"
class="shadow-inner flex-1 bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-sm block w-full p-2.5"
value="${query}"
autofocus
placeholder="Search..."
name="query"
id="query" />
@else
<input type="text"
class="shadow-inner flex-1 bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-sm block w-full p-2.5"
value="${query}"
placeholder="Search..."
name="query"
id="query" />
@endif
<button class="px-4 py-2 bg-margeblue text-white ml-2 rounded whitespace-nowrap ">
<i class="fas fa-search text-sm sm:mr-3"></i>
Search
</button>
<div id="suggestions-anchor" class="hidden"></div>
</div>
<input type="hidden" name="js" value="${filters.removeJsOption.value()}">
<input type="hidden" name="adtech" value="${filters.reduceAdtechOption.value()}">
<input type="hidden" name="searchTitle" value="${filters.searchTitleOption.value()}">
<input type="hidden" name="profile" value="${profile}">
<input type="hidden" name="recent" value="${filters.showRecentOption.value()}">
</form>

View File

@ -0,0 +1,59 @@
@import nu.marginalia.search.model.SearchFilters
@import java.util.List
@param SearchFilters filters
<aside class="md:w-64 py-4 shrink-0 hidden sm:block">
<div class="space-y-6 sticky top-4">
<div class="bg-white p-4 border border-gray-300">
<h2 class="font-medium mb-3 flex items-center font-serif hidden md:block">
<i class="fas fa-filter text-sm mr-2"></i> Filter
</h2>
<div class="space-y-2">
@for (List<SearchFilters.Filter> filterGroup : filters.getFilterGroups())
@for (SearchFilters.Filter filter : filterGroup)
<label class="flex items-center">
<button title="${filter.displayName}" onclick="document.location='$unsafe{filter.url}'" class="flex-1 py-2 pl-2 rounded flex space-x-2 has-[:checked]:bg-gray-100 has-[:checked]:text-slate-900 hover:bg-gray-50 bg-white text-margeblue">
@if (filter.current)
<input type="checkbox" checked class="sr-only" aria-checked="true" />
@else
<input type="checkbox" class="sr-only" aria-checked="false" />
@endif
<a tabindex="-1" href="$unsafe{filter.url}" class="text-sm" href="#">
<i class="fas ${filter.icon} text-sm mr-3"></i>
<span class="hidden md:inline">${filter.displayName}</span></a>
</button>
</label>
@endfor
<div class="[&:not(:last-child)]:border-b hidden md:block"></div>
@endfor
</div>
</div>
<div class="bg-white p-4 border border-gray-300">
<h2 class="font-medium mb-3 flex items-center font-serif hidden md:block">
<i class="fas fa-cog text-sm mr-2"></i> Advanced Settings
</h2>
<div class="space-y-2">
@for (SearchFilters.SearchOption option : filters.searchOptions())
<label class="flex items-center">
<button title="${option.name()}" onclick="document.location='$unsafe{option.getUrl()}'" class="flex-1 py-2 pl-2 rounded flex space-x-2 has-[:checked]:bg-gray-100 has-[:checked]:text-slate-900 hover:bg-gray-50 bg-white text-margeblue">
@if (option.isSet())
<input type="checkbox" checked class="sr-only" aria-checked="true" />
@else
<input type="checkbox" class="sr-only" aria-checked="false" />
@endif
<a href="$unsafe{option.getUrl()}" class="text-sm" href="#">
<i class="fas ${option.icon()} text-sm mr-3"></i>
<span class="hidden md:inline">${option.name()}</span>
</a>
</button>
</label>
@endfor
</div>
</div>
</div>
</aside>

View File

@ -0,0 +1,108 @@
@import nu.marginalia.search.model.NavbarModel
@import nu.marginalia.search.svc.*
@param SearchSiteInfoService.SiteInfoModel model
@param NavbarModel navbar
<!DOCTYPE html>
<html lang="en">
@template.part.head(title = "Marginalia Search - " + model.domain())
<body class="min-h-screen bg-slate-100 font-sans" >
@template.part.navbar(navbar = navbar)
<header class="border-gray-300 bg-white shadow-md">
<div class="max-w-[1400px] mx-auto px-4 py-4">
<div class="flex items-center space-x-2">
<h1 class="text-base md:text-xl mr-2 md:mr-8 font-serif">Site Information</h1>
<div id="suggestions-anchor" class="hidden"></div>
<input type="text" class="shadow-inner max-w-64 flex-1 bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-sm block w-full p-2.5" value="${model.domain()}" placeholder="www.example.com" name="query" id="query" >
<button class="px-4 py-2 bg-margeblue text-white ml-2 rounded">
<i class="fas fa-search text-sm mr-3"></i> Inspect
</button>
</div>
</div>
<div class="mx-auto md:px-4 border bg-slate-50">
<div class="flex md:space-x-2 max-w-[1000px] mx-auto">
<div class="has-[:checked]:bg-slate-200 py-1 px-2">
<a href="?view=info" class="text-sm whitespace-nowrap place-items-baseline space-x-1 text-gray-700 text-xs hover:text-gray-900">
@if (model instanceof SearchSiteInfoService.SiteInfoWithContext)
<input type="checkbox" class="sr-only hidden absolute" checked readonly />
@else
<span></span>
@endif
<i class="fas fa-circle-info"></i>
<span>Summary</span>
</a>
</div>
<div class="has-[:checked]:bg-slate-200 py-1 px-2">
<a href="?view=docs" class="text-sm whitespace-nowrap place-items-baseline space-x-1 text-gray-700 text-xs hover:text-gray-900">
@if (model instanceof SearchSiteInfoService.Docs)
<input type="checkbox" class="sr-only hidden absolute" checked readonly />
@else
<span></span>
@endif
<i class="fa-regular fa-file"></i>
<span>Documents</span>
</a>
</div>
<div class="has-[:checked]:bg-slate-200 py-1 px-2">
<a href="?view=links" class="text-sm whitespace-nowrap place-items-baseline space-x-1 text-gray-700 text-xs hover:text-gray-900">
@if (model instanceof SearchSiteInfoService.Backlinks)
<input type="checkbox" class="sr-only hidden absolute" checked readonly />
@else
<span></span>
@endif
<i class="fas fa-link"></i>
<span>Backlinks</span>
</a>
</div>
<div class="grow"></div>
<div class="has-[:checked]:bg-slate-200 py-1 px-2">
<a href="?view=report" class="text-sm whitespace-nowrap place-items-baseline space-x-1 text-red-800 text-xs hover:text-red-600">
@if (model instanceof SearchSiteInfoService.ReportDomain)
<input type="checkbox" class="sr-only hidden absolute" checked readonly />
@else
<span></span>
@endif
<i class="fa fa-ban"></i>
<span>Report</span>
</a>
</div>
</div>
</div>
</header>
<div class="max-w-[1000px] mx-auto flex gap-1 flex-col md:flex-row place-items-center md:place-items-start">
@if (model instanceof SearchSiteInfoService.SiteInfoWithContext siteInfo)
@template.siteinfo.view.overview(siteInfo = siteInfo)
@elseif (model instanceof SearchSiteInfoService.ReportDomain reportDomain)
@template.siteinfo.view.reportDomain(reportDomain = reportDomain)
@elseif (model instanceof SearchSiteInfoService.Backlinks backlinks)
@template.siteinfo.view.backlinks(backlinks = backlinks)
@elseif (model instanceof SearchSiteInfoService.ReportDomain reportDomain)
@template.siteinfo.view.reportDomain(reportDomain = reportDomain)
@elseif (model instanceof SearchSiteInfoService.Docs docs)
@template.siteinfo.view.docs(docs = docs)
@endif
</div>
@template.part.footerLegal()
</body>
</html>

View File

@ -0,0 +1,67 @@
@import nu.marginalia.api.domains.model.SimilarDomain
@import java.util.List
@param String title
@param String domainName
@param List<SimilarDomain> list
@if (!list.isEmpty())
<div class="bg-white shadow-sm rounded overflow-hidden border">
<div class="px-4 py-2 bg-margeblue text-white border-b border-gray-200">
<h2 class="text-xs font-semibold">${title}</h2>
</div>
<div class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200">
<thead>
<tr class="bg-gray-50">
<th scope="col" class="px-2 py-1 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Link</th>
<th scope="col" class="px-2 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Rank</th>
<th scope="col" class="px-2 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Domain</th>
<th scope="col" class="px-2 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Similarity</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200 text-xs">
@for(SimilarDomain item : list)
<tr class="hover:bg-gray-50">
<td class="px-3 py-3 whitespace-nowrap">
@if(item.linkType().isLinked())
<a href="/crosstalk/?domains=${domainName},${item.url().getDomain().toString()}"
class="text-liteblue ${item.linkType().faIcon()}"
title="${item.linkType().getDescription()}">
</a>
@endif
</td>
<td class="px-3 py-3 whitespace-nowrap">
<div class="flex items-center">
<span title="${item.rank()}%" class="text-sm text-gray-600">
$unsafe{item.getRankSymbols()}
</span>
</div>
</td>
<td class="px-3 py-3 whitespace-nowrap">
<a href="/site/${item.url().getDomain().toString()}?view=similar"
rel="external noopener nofollow"
class="text-liteblue hover:text-liteblue">
${item.url().getDomain().toString()}
</a>
</td>
<td class="px-3 py-3">
<div class="w-16">
<div class="bg-gray-200 rounded-full h-2 overflow-hidden">
<div class="bg-margeblue h-2 rounded-full"
style="width: ${item.relatedness()}%">
</div>
</div>
</div>
</td>
</tr>
@endfor
</tbody>
</table>
</div>
</div>
@endif

View File

@ -0,0 +1,91 @@
@import nu.marginalia.search.model.NavbarModel
@import nu.marginalia.search.svc.*
@import nu.marginalia.search.svc.SearchSiteInfoService.SiteOverviewModel
@import nu.marginalia.search.svc.SearchSiteInfoService.SiteOverviewModel.DiscoveredDomain
@param NavbarModel navbar
@param SiteOverviewModel model
<!DOCTYPE html>
<html lang="en">
@template.part.head(title = "Marginalia Search - Site Viewer")
<body class="min-h-screen bg-slate-100 font-sans" >
@template.part.navbar(navbar = navbar)
<header class="border-gray-300 bg-white shadow-md">
<div class="max-w-[1400px] mx-auto px-4 py-4">
<div class="flex items-center space-x-2">
<h1 class="text-base md:text-xl mr-2 md:mr-8 font-serif">Site Information</h1>
<div id="suggestions-anchor" class="hidden"></div>
<input type="text" class="shadow-inner max-w-64 flex-1 bg-gray-50 border border-gray-300 text-gray-900 text-sm rounded-sm block w-full p-2.5" value="" placeholder="www.example.com" name="query" id="query" >
<button class="px-4 py-2 bg-margeblue text-white ml-2 rounded">
<i class="fas fa-search text-sm mr-3"></i> Inspect
</button>
</div>
</div>
</header>
<div class="max-w-[1000px] mx-auto flex gap-1 flex-col md:flex-row place-items-center md:place-items-start">
<div class="border rounded m-4 overflow-hidden">
<div class="bg-margeblue text-white p-2 text-sm">Recently Discovered Domains</div>
<div class="bg-white">
<table class="w-full divide-y divide-gray-200">
<thead>
<tr class="bg-gray-50">
<th scope="col" class="px-2 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Domain Name</th>
<th scope="col" class="px-2 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Discover Time</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200 text-xs">
@for (DiscoveredDomain domain : model.domains())
<tr class="hover:bg-gray-50">
<td class="px-3 py-3 whitespace-nowrap">
<a class="text-liteblue underline" href="site/${domain.name()}">${domain.name()}</a>
</td>
<td class="px-3 py-3 whitespace-nowrap">
${domain.timestamp()}
</td>
</tr>
@endfor
</tbody>
</table>
</div>
</div>
<div class="border rounded m-4 overflow-hidden">
<div class="bg-margeblue text-white p-2 text-sm">Recently Discovered Domains</div>
<div class="bg-white">
<table class="w-full divide-y divide-gray-200">
<thead>
<tr class="bg-gray-50">
<th scope="col" class="px-2 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Domain Name</th>
<th scope="col" class="px-2 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Discover Time</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200 text-xs">
@for (DiscoveredDomain domain : model.domains())
<tr class="hover:bg-gray-50">
<td class="px-3 py-3 whitespace-nowrap">
<a class="text-liteblue underline" href="site/${domain.name()}">${domain.name()}</a>
</td>
<td class="px-3 py-3 whitespace-nowrap">
${domain.timestamp()}
</td>
</tr>
@endfor
</tbody>
</table>
</div>
</div>
</div>
@template.part.footerLegal()
</body>
</html>

View File

@ -0,0 +1,60 @@
@import nu.marginalia.search.model.GroupedUrlDetails
@import nu.marginalia.search.model.UrlDetails
@import nu.marginalia.search.model.ResultsPage
@import nu.marginalia.search.svc.SearchSiteInfoService.*
@param Backlinks backlinks
<div class="flex flex-col space-y-4 my-4 w-full">
@if (backlinks.results().isEmpty())
<div class="border rounded bg-white flex flex-col overflow-hidden p-4 mx-4 text-gray-800 text-sm ">
The search engine isn't aware of any backlinks to ${backlinks.domain()}!
</div>
@else
<div class="border rounded bg-white flex flex-col overflow-hidden p-4 mx-4 text-gray-800 text-sm">
Showing documents linking to ${backlinks.domain()}
</div>
@endif
@for (GroupedUrlDetails group : backlinks.results())
<div class="border rounded bg-white flex flex-col overflow-hidden mx-4">
<div class="flex space-x-2 flex-row place-items-baseline bg-margeblue text-white p-2 text-sm">
<span class="fas fa-globe"></span>
<a href="/site/${group.domain().toString()}">${group.domain().toString()}</a>
<span class="grow"></span>
<a rel="nofollow noopener external" href="${group.domain().toRootUrlHttps().toString()}" class="fa-solid fa-arrow-up-right-from-square" ></a>
</div>
@for (UrlDetails details : group.urlDetails())
<div class="p-2 font-medium text-sm text-gray-800 mx-2 mt-2">${details.title}</div>
<div class="p-2 mx-2 text-gray-700 text-sm">
${details.description}
</div>
<div class="p-2 text-sm border-b pb-6">
<a rel="external noopener nofollow" href="${details.url.toString()}" class="mx-3 text-liteblue flex space-x-2 place-items-baseline">
<i class="fa fa-link"></i>
<span>${details.url.toString()}</span>
</a>
</div>
@endfor
</div>
@endfor
<!-- Pagination -->
@if (backlinks.pages().size() > 1)
<div class="mt-8 flex justify-center space-x-2 font-mono text-sm">
@for(ResultsPage page : backlinks.pages())
@if (page.current())
<a href="?view=links&page=${page.number()}" class="px-3 py-1 border border-gray-300 bg-gray-100">${page.number()}</a>
@else
<a href="?view=links&page=${page.number()}" class="px-3 py-1 bg-white border border-gray-300 hover:bg-gray-100">${page.number()}</a>
@endif
@endfor
</div>
@endif
</div>
<!-- -->

View File

@ -0,0 +1,89 @@
@import nu.marginalia.search.svc.SearchSiteInfoService
@import nu.marginalia.search.svc.SearchSiteInfoService.*
@import nu.marginalia.search.model.UrlDetails
@import nu.marginalia.search.model.ResultsPage
@import nu.marginalia.model.idx.DocumentFlags
@param Docs docs
<!-- -->
<div class="flex flex-col space-y-4 my-4">
@if (docs.results().isEmpty())
<div class="border rounded bg-white flex flex-col overflow-hidden p-4 mx-4 text-gray-800 text-sm">
The search engine doesn't index any documents from ${docs.domain()}
</div>
@else
<div class="border rounded bg-white flex flex-col overflow-hidden p-4 mx-4 text-gray-800 text-sm">
Showing documents from ${docs.domain()}
</div>
@endif
@for (UrlDetails details : docs.results())
<div class="border rounded bg-white flex flex-col overflow-hidden mx-4">
<div class="flex grow justify-between items-start p-4">
<div class="flex-1">
<h2 class="text-xl text-gray-800 font-serif mr-4">
<a href="${details.url.toString()}" rel="noopener noreferrer">${details.title}</a>
</h2>
<div class="text-sm mt-1 text-slate-800">
<a class="text-liteblue underline" href="${details.url.toString()}"
rel="noopener noreferrer" tabindex="-1">${details.url.toString()}</a>
</div>
</div>
</div>
<p class="mt-2 text-sm text-slate-900 leading-relaxed mx-4 mb-4">
${details.description}
</p>
<div class="flex text-xs space-x-2 p-2">
<div class="grow"></div>
@if (DocumentFlags.PlainText.isPresent(details.resultItem.encodedDocMetadata))
<span class="px-1 bg-blue-100 text-blue-700 rounded">Plain text</span>
@endif
@if (DocumentFlags.GeneratorForum.isPresent(details.resultItem.encodedDocMetadata))
<span class="px-1 bg-blue-100 text-blue-700 rounded">Forum</span>
@endif
@if (DocumentFlags.GeneratorWiki.isPresent(details.resultItem.encodedDocMetadata))
<span class="px-1 bg-blue-100 text-blue-700 rounded">Wiki</span>
@endif
@if(details.isCookies())
<span class="px-1 bg-yellow-100 text-yellow-700 rounded" title="Contains Cookies">Cookies</span>
@endif
@if(details.isTracking())
<span class="px-1 bg-yellow-100 text-yellow-700 rounded" title="Uses tracking scripts">Track</span>
@endif
@if(details.isScripts())
<span class="px-1 bg-yellow-100 text-yellow-700 rounded" title="Contains JavaScript">JS</span>
@endif
@if(details.isAds())
<span class="px-1 bg-red-100 text-red-700 rounded" title="Contains adtech">Ads</span>
@endif
@if(details.isAffiliate())
<span class="px-1 bg-red-100 text-red-700 rounded" title="Contains Affiliate Link">Affiliate</span>
@endif
</div>
</div>
@endfor
<!-- Pagination -->
@if (docs.pages().size() > 1)
<div class="mt-8 flex justify-center space-x-2 font-mono text-sm">
@for(ResultsPage page : docs.pages())
@if (page.current())
<a href="?view=docs&page=${page.number()}" class="px-3 py-1 border border-gray-300 bg-gray-100">${page.number()}</a>
@else
<a href="?view=docs&page=${page.number()}" class="px-3 py-1 bg-white border border-gray-300 hover:bg-gray-100">${page.number()}</a>
@endif
@endfor
</div>
@endif
</div>

View File

@ -0,0 +1,195 @@
@import nu.marginalia.search.svc.SearchSiteInfoService
@import nu.marginalia.search.svc.SearchSiteInfoService.*
@import nu.marginalia.search.model.UrlDetails
@param SiteInfoWithContext siteInfo
<!-- Main content -->
<div class="flex-1 p-4 space-y-4 mx-auto w-full md:w-auto">
<div class="flex border rounded bg-white flex-col space-y-4 pb-4 overflow-hidden md:max-w-lg" >
<div class="flex place-items-baseline space-x-2 p-2 text-sm border-b mb-2 bg-margeblue text-white">
<i class="fa fa-globe"></i>
<span>${siteInfo.domain()}</span>
<div class="grow">
</div>
<a rel="nofollow noopener external" href="${siteInfo.siteUrl()}" class="fa-solid fa-arrow-up-right-from-square" ></a>
</div>
@if (siteInfo.hasScreenshot())
<a class="mx-3 " tabindex="-1" rel="nofollow noopener external" href="${siteInfo.siteUrl()}"><img src="/screenshot/${siteInfo.domainId()}" alt="Screenshot of ${siteInfo.domain()}"></a>
@elseif (siteInfo.aliasDomain().isPresent() && siteInfo.domainInformation().getNodeAffinity() < 1)
<div class="mx-3 my-3 text-xs text-slate-800">
The search engine is also aware of links to <a class="underline text-liteblue" href="/site/${siteInfo.aliasDomain().get()}">${siteInfo.aliasDomain().get()}</a>,
this may be the canonical address.
</div>
@endif
@if (siteInfo.feed() != null && !siteInfo.feed().items().isEmpty())
<div class="mx-3 flex place-items-baseline space-x-2 p-2 bg-gray-100 rounded">
<i class="fas fa-rss text-orange-500"></i>
<span class="grow">Feed</span>
<i class="fa-regular fa-bookmark mr-2 text-gray-600 hover:text-blue-800 cursor-pointer" title="Add content to this feed to the front page"></i>
</div>
<dl class="mx-3 text-gray-800">
@for (SearchSiteInfoService.FeedItem item : siteInfo.feed().items())
<dt class="ml-2 flex space-x-4">
<a class="grow underline text-liteblue text-sm" rel="noopener nofollow external ugc" href="${item.url()}">${item.title()}</a>
<span>${item.pubDay()}</span>
</dt>
<dd class="ml-6 text-sm mb-4">${item.description()}</dd>
@endfor
</dl>
@endif
@if (siteInfo.samples() != null && !siteInfo.samples().isEmpty())
<div class="mx-3 flex place-items-baseline space-x-2 p-2 bg-gray-100 rounded">
<i class="fas fa-magnifying-glass"></i>
<span>Sample</span>
</div>
<dl class="mx-3 text-gray-800">
@for (UrlDetails item : siteInfo.samples())
<dt class="ml-2">
<a class="underline text-liteblue text-sm" rel="noopener nofollow external ugc" href="${item.url.toString()}">${item.title}</a>
</dt>
<dd class="ml-6 text-sm mb-4">${item.description}</dd>
@endfor
</dl>
@endif
@if (siteInfo.domainInformation().isUnknownDomain())
<div class="mx-3 flex place-items-baseline space-x-2 p-2 bg-gray-100 rounded">
<i class="fa-regular fa-circle-question"></i>
<span>Unknown Domain</span>
</div>
<div class="mx-5 flex flex-col space-y-2">
<p>This website is not known to the search engine.</p>
<p>To submit the website for crawling, follow <a class="text-liteblue underline"
rel="noopener noreferrer"
target="_blank"
href="https://github.com/MarginaliaSearch/submit-site-to-marginalia-search">these instructions</a>.</p>
</div>
@endif
@if (siteInfo.domainInformation().isBlacklisted())
<div class="mx-3 flex place-items-baseline space-x-2 p-2 bg-gray-100 rounded">
<i class="fas fa-ban"></i>
<span>Blacklisted</span>
</div>
<div class="mx-5 flex flex-col space-y-2">
<p>This website is <em>blacklisted</em>. This excludes it from crawling and indexing.</p>
<p>This is usually because of some form of misbehavior on the webmaster's end,
either annoying search engine spam, or tasteless content bad faith content.</p>
</div>
@endif
@if (siteInfo.domainInformation().isSuggestForCrawling())
<form method="POST"
action="/site/suggest/">
<div class="max-w-md mx-auto p-6 bg-white shadow border border-gray-200 mx-1 flex space-y-4 flex-col">
<div class="text-sm text-gray-600 mb-4">
This website is not queued for crawling. If you would like it to be crawled,
use the checkbox and button below.
</div>
<input type="hidden" name="id" value="${siteInfo.domainId()}" />
<div class="flex items-center space-x-2">
<input type="checkbox"
id="nomisclick"
name="nomisclick"
class="h-4 w-4 rounded border-gray-300 text-margeblue focus:margeblue" />
<label for="nomisclick" class="text-sm text-gray-700">
This is not a mis-click
</label>
</div>
<button type="submit"
class="place-items-baseline space-x-4 w-full flex py-2 px-4 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-margeblue focus:outline-none focus:ring-2 focus:ring-offset-2 focus:margeblue">
<i class="fas fa-plus"></i>
<span>Add ${siteInfo.domain()} to queue</span>
</button>
</div>
</form>
@endif
@if (siteInfo.isKnown())
<div class="mx-3 flex place-items-baseline space-x-2 p-2 bg-gray-100 rounded">
<i class="fas fa-chart-simple"></i>
<span>Crawl Statistics</span>
</div>
<div class="mx-3 grid grid-cols-2 md:grid-cols-3 gap-4 mx-8">
<div>
<h3 class="text-sm text-gray-500">Pages Known</h3>
<p class="text-xl font-medium">${siteInfo.domainInformation().getPagesKnown()}</p>
</div>
<div>
<h3 class="text-sm text-gray-500">Pages Fetched</h3>
<p class="text-xl font-medium">${siteInfo.domainInformation().getPagesFetched()}</p>
</div>
<div>
<h3 class="text-sm text-gray-500">Pages Indexed</h3>
<p class="text-xl font-medium">${siteInfo.domainInformation().getPagesIndexed()}</p>
</div>
<div>
<h3 class="text-sm text-gray-500">Incoming Links</h3>
<p class="text-xl font-medium">${siteInfo.domainInformation().getIncomingLinks()}</p>
</div>
<div>
<h3 class="text-sm text-gray-500">Outbound Links</h3>
<p class="text-xl font-medium ">${siteInfo.domainInformation().getOutboundLinks()}</p>
</div>
<div title="Which index partition the domain is indexed by">
<h3 class="text-sm text-gray-500">Node Affinity</h3>
<p class="text-xl font-medium">${siteInfo.domainInformation().getNodeAffinity()}</p>
</div>
</div>
@if (siteInfo.domainInformation().getPagesKnown() >= 5_000_000)
<div class="mx-5 text-xs text-gray-600">
This website is very large, and the system can not accurately report the number of crawled
and indexed documents without affecting performance.
</div>
@endif
<div class="mx-3 flex place-items-baseline space-x-2 p-2 bg-gray-100 rounded">
<i class="fas fa-network-wired"></i>
<span>Network Details</span>
</div>
<div class="mx-3 grid grid-cols-2 grid-cols-2 gap-2 mx-8">
<div>
<h3 class="text-sm font-medium text-gray-500">IP Address</h3>
<p class="text-lg font-mono">${siteInfo.domainInformation().getIp()}</p>
<p class="text-sm text-gray-600">${siteInfo.domainInformation().getIpCountry()} ${siteInfo.domainInformation().getIpFlag()}</p>
</div>
<div title="Autonomous system">
<h3 class="text-sm font-medium text-gray-500">ASN Details</h3>
<p class="text-lg font-mono">AS${siteInfo.domainInformation().getAsn()} - ${siteInfo.domainInformation().getAsnOrg()}</p>
<p class="text-sm text-gray-600 ">${siteInfo.domainInformation().getAsnCountry()} ${siteInfo.domainInformation().getAsnFlag()}</p>
</div>
</div>
@endif
</div>
</div>
@if (!siteInfo.similar().isEmpty() || !siteInfo.linking().isEmpty())
<div class="mx-auto md:py-4 px-4 flex-1 space-y-4 content md:block w-full md:w-auto">
@template.siteinfo.part.linkedDomains("Similar Domains", siteInfo.domain(), siteInfo.similar())
@template.siteinfo.part.linkedDomains("Linked Domains", siteInfo.domain(), siteInfo.linking())
</div>
@endif

View File

@ -0,0 +1,111 @@
@import nu.marginalia.search.svc.SearchSiteInfoService.*
@param ReportDomain reportDomain
<div class="flex-col mx-auto">
<div class="max-w-2xl mx-auto bg-white border rounded overflow-auto shadow-sm my-4 space-y-4 w-full">
<div class="px-4 py-2 bg-margeblue text-white border-b border-gray-200">
<h2 class="text-sm font-semibold">Report Domain Issue</h2>
</div>
@if (reportDomain.submitted())
<div class="mx-4 pb-4">Your complaint has been submitted and will be reviewed in a few weeks.
For urgent issues, email <a href="mailto:kontakt@marginalia.nu" class="text-blue-600 hover:underline">kontakt@marginalia.nu</a>
instead of using this form.
</div>
@else
<form class="space-y-6 p-4" method="post">
<div>
<label class="block text-sm font-medium text-slate-700 mb-1">
Category
</label>
<select required name="category" class="w-full px-3 py-2 bg-white border border-slate-300 rounded-md shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500">
<option value="">Select issue type...</option>
<option value="spam">Spam</option>
<option value="dead-link">Dead Link</option>
<option value="inappropriate">Inappropriate Content</option>
<option value="other">Other</option>
</select>
</div>
<div>
<label class="block text-sm font-medium text-slate-700 mb-1">
Description
</label>
<textarea name="description" class="w-full h-32 w-64 px-3 py-2 bg-white border border-slate-300 rounded-md shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500" placeholder="Describe the issue..."></textarea>
</div>
<div>
<label class="block text-sm font-medium text-slate-700 mb-1">
Search Query (Optional)
</label>
<input name="samplequery"
type="text"
class="w-full px-3 py-2 bg-white border border-slate-300 rounded-md shadow-sm focus:outline-none focus:ring-2 focus:ring-blue-500"
placeholder="Enter search query..."
/>
</div>
<div class="space-y-4">
<button
type="submit"
class="px-4 py-2 bg-blue-600 text-white font-medium rounded-md hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-blue-500"
>
Submit Report
</button>
<p class="text-sm text-slate-600">
For urgent issues, email <a href="mailto:kontakt@marginalia.nu" class="text-blue-600 hover:underline">kontakt@marginalia.nu</a>
instead of using this form.
</p>
</div>
</form>
@endif
</div>
@if (!reportDomain.complaints().isEmpty())
<div class="max-w-2xl mx-auto bg-slate-50 rounded overflow-auto shadow-sm my-4 space-y-2 w-full">
<div class="px-4 py-2 bg-margeblue text-white border-b border-gray-200">
<h2 class="text-sm font-semibold">Existing Complaints</h2>
</div>
<div class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200">
<thead>
<tr class="bg-gray-50">
<th scope="col" class="px-2 py-1 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Category</th>
<th scope="col" class="px-2 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Timestamp</th>
<th scope="col" class="px-2 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Reviewed</th>
<th scope="col" class="px-2 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider">Decision</th>
</tr>
</thead>
<tbody class="bg-white divide-y divide-gray-200 text-xs">
@for (var complaint : reportDomain.complaints())
<tr>
<td class="px-3 py-3 whitespace-nowrap">
${complaint.category()}
</td>
<td class="px-3 py-3 whitespace-nowrap">
${complaint.submitTime()}
</td>
<td class="px-3 py-3 whitespace-nowrap">
@if(complaint.isReviewed())
<i class="fas fa-check"></i>
@endif
</td>
<td class="px-3 py-3 whitespace-nowrap">
${complaint.decision()}
</td>
</tr>
@endfor
</tbody>
</table>
</div>
</div>
@endif
</div>
<!-- -->

View File

@ -15,7 +15,7 @@ class BangCommandTest {
public void testG() {
try {
bangCommand.process(null,
new SearchParameters(" !g test",
new SearchParameters(null, " !g test",
null, null, null, null, null, false, 1)
);
Assertions.fail("Should have thrown RedirectException");

View File

@ -0,0 +1,122 @@
package nu.marginalia.search.paperdoll;
import gg.jte.CodeResolver;
import gg.jte.ContentType;
import gg.jte.TemplateEngine;
import gg.jte.output.StringOutput;
import gg.jte.resolve.DirectoryCodeResolver;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.search.model.NavbarModel;
import nu.marginalia.search.rendering.MockedSearchResults;
import org.junit.jupiter.api.Test;
import spark.Spark;
import java.nio.file.Path;
import java.util.Map;
public class JtePaperDoll {
final CodeResolver codeResolver = new DirectoryCodeResolver(Path.of(".").toAbsolutePath().resolve("resources/jte"));
final TemplateEngine templateEngine = TemplateEngine.create(codeResolver, ContentType.Html);
private String render(String template, Object obj) {
var str = new StringOutput();
templateEngine.render(template, obj, str);
return str.toString();
}
private String render(String template, Map<String, Object> map) {
var str = new StringOutput();
templateEngine.render(template, map, str);
return str.toString();
}
@Test
public void searchResults() {
System.out.println(Path.of(".").toAbsolutePath());
Spark.port(9999);
Spark.after((rq, rs) -> {
rs.header("Content-Encoding", "gzip");
});
Spark.get("/",
(rq, rs) -> MockedSearchResults.mockRegularSearchResults(),
ret -> this.render("serp/main.jte", Map.of("results", ret, "navbar", NavbarModel.SEARCH))
);
Spark.get("/site-focus",
(rq, rs) -> MockedSearchResults.mockSiteFocusResults(),
ret -> this.render("serp/main.jte", Map.of("results", ret, "navbar", NavbarModel.SEARCH))
);
Spark.get("/errors",
(rq, rs) -> MockedSearchResults.mockErrorData(),
ret -> this.render("serp/error.jte", Map.of("model", ret, "navbar", NavbarModel.LIMBO))
);
Spark.get("/first",
(rq, rs) -> new Object(),
ret -> this.render("serp/first.jte", Map.of( "navbar", NavbarModel.SEARCH,
"websiteUrl", new WebsiteUrl("https://localhost:9999/")
))
);
Spark.get("/explore",
(rq, rs) -> MockedSearchResults.mockBrowseResults(32),
ret -> this.render("explore/main.jte", Map.of( "navbar", NavbarModel.EXPLORE,
"results", ret)
)
);
Spark.get("/site-info",
(rq, rs) -> {
if ("links".equals(rq.queryParams("view"))) {
return MockedSearchResults.mockBacklinkData();
}
else if ("docs".equals(rq.queryParams("view"))) {
return MockedSearchResults.mockDocsData();
}
else if ("report".equals(rq.queryParams("view"))) {
return MockedSearchResults.mockReportDomain();
}
else return MockedSearchResults.mockSiteInfoData();
},
ret -> this.render("siteinfo/main.jte", Map.of("model", ret, "navbar", NavbarModel.EXPLORE))
);
Spark.get("/screenshot/*", (rq, rsp) -> {
rsp.type("image/svg+xml");
return """
<svg viewBox="0 0 800 600" xmlns="http://www.w3.org/2000/svg">
<!-- Browser Window Frame -->
<rect x="0" y="0" width="800" height="600" rx="8" fill="#f1f5f9"/>
<rect x="0" y="0" width="800" height="40" rx="8" fill="#e2e8f0"/>
<!-- Browser Controls -->
<circle cx="20" cy="20" r="6" fill="#ef4444"/>
<circle cx="40" cy="20" r="6" fill="#fbbf24"/>
<circle cx="60" cy="20" r="6" fill="#22c55e"/>
<!-- Address Bar -->
<rect x="120" y="10" width="560" height="20" rx="4" fill="#ffffff"/>
<!-- Content Area -->
<rect x="20" y="60" width="760" height="80" rx="4" fill="#ffffff"/>
<rect x="40" y="80" width="400" height="16" rx="2" fill="#cbd5e1"/>
<rect x="40" y="104" width="300" height="16" rx="2" fill="#cbd5e1"/>
<!-- Navigation -->
<rect x="20" y="160" width="180" height="420" rx="4" fill="#ffffff"/>
<rect x="40" y="180" width="140" height="12" rx="2" fill="#cbd5e1"/>
<rect x="40" y="200" width="120" height="12" rx="2" fill="#cbd5e1"/>
<rect x="40" y="220" width="130" height="12" rx="2" fill="#cbd5e1"/>
<!-- Main Content -->
<rect x="220" y="160" width="560" height="420" rx="4" fill="#ffffff"/>
<rect x="240" y="180" width="520" height="180" rx="2" fill="#cbd5e1"/>
<rect x="240" y="380" width="520" height="12" rx="2" fill="#cbd5e1"/>
<rect x="240" y="400" width="480" height="12" rx="2" fill="#cbd5e1"/>
<rect x="240" y="420" width="500" height="12" rx="2" fill="#cbd5e1"/>
<rect x="240" y="440" width="460" height="12" rx="2" fill="#cbd5e1"/>
</svg>
""";
});
Spark.init();
for (;;);
}
}

View File

@ -0,0 +1,252 @@
package nu.marginalia.search.rendering;
import nu.marginalia.WebsiteUrl;
import nu.marginalia.api.domains.model.DomainInformation;
import nu.marginalia.api.domains.model.SimilarDomain;
import nu.marginalia.api.searchquery.model.results.SearchResultItem;
import nu.marginalia.browse.model.BrowseResult;
import nu.marginalia.browse.model.BrowseResultSet;
import nu.marginalia.model.EdgeDomain;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.model.crawl.DomainIndexingState;
import nu.marginalia.search.command.SearchParameters;
import nu.marginalia.search.model.*;
import nu.marginalia.search.svc.SearchFlagSiteService;
import nu.marginalia.search.svc.SearchSiteInfoService;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.ThreadLocalRandom;
public class MockedSearchResults {
private static UrlDetails mockUrlDetails(String url, String title) throws URISyntaxException {
return mockUrlDetails(url, title, "Sing, Goddess, sing the rage of Achilles, son of Peleus—\n" +
"that murderous anger which condemned Achaeans\n" +
"to countless agonies and threw many warrior souls\n" +
"deep into Hades, leaving their dead bodies\n" +
"carrion food for dogs and birds—\n" +
"all in fulfilment of the will of Zeus.");
}
private static UrlDetails mockUrlDetails(String url, String title, String desc) throws URISyntaxException {
return new UrlDetails(
1,
1,
new EdgeUrl(url),
title,
desc,
"HTML5",
ThreadLocalRandom.current().nextInt(),
DomainIndexingState.ACTIVE,
0.5,
8,
"",
mockPositionsMask(),
2,
new SearchResultItem(0, 0, 0, 0, 0),
null);
}
private static long mockPositionsMask() {
int hits = ThreadLocalRandom.current().nextInt(1, 24);
long mask = 0;
for (int i = 0; i < hits; i++) {
mask |= 1L << ThreadLocalRandom.current().nextInt(0, 64);
}
return mask;
}
private static List<ClusteredUrlDetails> mockSearchResultsList() throws URISyntaxException {
return List.of(
// Non-clustered result
new ClusteredUrlDetails(
mockUrlDetails("https://clustered.marginalia.nu", "Non-clustered-result")
),
new ClusteredUrlDetails(
mockUrlDetails("https://clustered.marginalia.nu", "Short Result", "Short")
),
new ClusteredUrlDetails(
mockUrlDetails("https://clustered.marginalia.nu", "Clustered-result"),
List.of(
mockUrlDetails("https://clustered.marginalia.nu", "Additional result"),
mockUrlDetails("https://clustered.marginalia.nu", "One more result")
)
)
);
}
public static DecoratedSearchResults mockRegularSearchResults() throws URISyntaxException {
SearchParameters params = SearchParameters.defaultsForQuery(new WebsiteUrl("https://localhost:9999/"), "test", 1);
return new DecoratedSearchResults(
params,
List.of("Not enough search engine oil"),
null,
mockSearchResultsList(),
"",
-1,
new SearchFilters(params),
List.of(new ResultsPage(1, true, "#"),
new ResultsPage(2, false, "#")));
}
public static DecoratedSearchResults mockSiteFocusResults() throws URISyntaxException {
SearchParameters params = SearchParameters.defaultsForQuery(new WebsiteUrl("https://localhost:9999/"), "test site:example.marginalia.nu", 1);
return new DecoratedSearchResults(
params,
List.of("Not enough search engine oil"),
null,
mockSearchResultsList(),
"example.marginalia.nu",
1,
new SearchFilters(params),
List.of(new ResultsPage(1, true, "#"),
new ResultsPage(2, false, "#")));
}
public static SearchErrorMessageModel mockErrorData() {
var params = SearchParameters.defaultsForQuery(new WebsiteUrl("https://localhost:9999/"), "test site:example.marginalia.nu", 1);
return new SearchErrorMessageModel(
"An error occurred when communicating with the search engine index.",
"""
This is hopefully a temporary state of affairs. It may be due to
an upgrade. The index typically takes a about two or three minutes
to reload from a cold restart. Thanks for your patience.
""",
params,
new SearchFilters(params)
);
}
public static SearchSiteInfoService.SiteInfoWithContext mockSiteInfoData() throws URISyntaxException {
return new SearchSiteInfoService.SiteInfoWithContext(
"www.example.com",
Optional.of("other.example.com"),
14,
"https://www.example.com",
true,
new DomainInformation(
new EdgeDomain("www.example.com"),
false,
14,
23,
55,
10,
20,
1,
0.5,
false,
true,
false,
"127.0.0.1",
4041,
"ACME INC",
"SE",
"SE",
"INDEXED"
),
List.of(
new SimilarDomain(new EdgeUrl("https://www.other.com"), 4,65, 20, true, true, true, SimilarDomain.LinkType.BIDIRECTIONAL)
),
List.of(
new SimilarDomain(new EdgeUrl("https://www.other.com"), 4,65, 80, true, true, true, SimilarDomain.LinkType.BIDIRECTIONAL),
new SimilarDomain(new EdgeUrl("https://www.other.com"), 4,35, 40, true, true, false, SimilarDomain.LinkType.BACKWARD),
new SimilarDomain(new EdgeUrl("https://www.other.com"), 4,25, 20, true, true, false, SimilarDomain.LinkType.FOWARD),
new SimilarDomain(new EdgeUrl("https://www.other.com"), 4,25, 20, true, true, false, SimilarDomain.LinkType.FOWARD)
),
new SearchSiteInfoService.FeedItems("www.example.com",
"https://www.example.com/rss.xml",
"2024-01-01",
List.of(
new SearchSiteInfoService.FeedItem("Test Post", "2024-01-01", "Lorem ipsum dolor sit amet", "https://www.example.com/1"),
new SearchSiteInfoService.FeedItem("Other Post", "2024-01-04", "Sing, Goddess, sing the rage of Achilles, son of Peleus—\n" +
"that murderous anger which condemned Achaeans\n" +
"to countless agonies and threw many warrior souls\n" +
"deep into Hades, leaving their dead bodies\n" +
"carrion food for dogs and birds—\n" +
"all in fulfilment of the will of Zeus.",
"https://www.example.com/1")
)),
List.of());
}
public static Object mockBacklinkData() throws URISyntaxException {
return new SearchSiteInfoService.Backlinks(
"www.example.com",
4,
List.of(
new GroupedUrlDetails(
List.of(
mockUrlDetails("https://www.example.com/", "lorem ipsum"),
mockUrlDetails("https://www.example.com/", "dolor sit"),
mockUrlDetails("https://www.example.com/", "amet quia")
)
),
new GroupedUrlDetails(
List.of(
mockUrlDetails("https://other.example.com", "single link result")
)
)
),
List.of(
new ResultsPage(1, true, "#"),
new ResultsPage(2, false, "#")
)
);
}
public static SearchSiteInfoService.Docs mockDocsData() throws URISyntaxException {
return new SearchSiteInfoService.Docs(
"www.example.com",
1,
List.of(
mockUrlDetails("https://www.example.com/", "lorem ipsum"),
mockUrlDetails("https://www.example.com/", "dolor sit"),
mockUrlDetails("https://www.example.com/", "amet quia")
),
List.of(
new ResultsPage(1, true, "#"),
new ResultsPage(2, false, "#")
)
);
}
public static SearchSiteInfoService.ReportDomain mockReportDomain() {
return new SearchSiteInfoService.ReportDomain(
"www.example.com",
1,
List.of(new SearchFlagSiteService.FlagSiteComplaintModel(
"BAD",
"2024-10-01",
true,
"Appealed"
)),
SearchFlagSiteService.categories,
false
);
}
public static BrowseResultSet mockBrowseResults(int n) {
List<BrowseResult> results = new ArrayList<>();
for (int i = 0; i < n; i++) {
results.add(new BrowseResult(
new EdgeUrl("https", new EdgeDomain(i+".example.com"), null, "/", null),
i,
0.5,
true
));
}
return new BrowseResultSet(results);
}
}

View File

@ -0,0 +1,30 @@
package nu.marginalia.search.rendering;
import gg.jte.CodeResolver;
import gg.jte.ContentType;
import gg.jte.TemplateEngine;
import gg.jte.output.StringOutput;
import gg.jte.resolve.DirectoryCodeResolver;
import org.junit.jupiter.api.Test;
import java.net.URISyntaxException;
import java.nio.file.Path;
/** This test class verifies that the templates render successfully.
* It does not perform checks that the output is correct */
public class RenderingTest {
final CodeResolver codeResolver = new DirectoryCodeResolver(Path.of(".").toAbsolutePath().resolve("resources/jte"));
final TemplateEngine templateEngine = TemplateEngine.create(codeResolver, ContentType.Html);
@Test
public void testSerp_Main() throws URISyntaxException {
templateEngine.render("serp/main.jte", MockedSearchResults.mockRegularSearchResults(), new StringOutput());
}
@Test
public void testSerp_SiteFocus() throws URISyntaxException {
templateEngine.render("serp/main.jte", MockedSearchResults.mockSiteFocusResults(), new StringOutput());
}
}

View File

@ -230,6 +230,8 @@ dependencyResolutionManagement {
library('jetty-util','org.eclipse.jetty','jetty-util').version('9.4.54.v20240208')
library('jetty-servlet','org.eclipse.jetty','jetty-servlet').version('9.4.54.v20240208')
library('jte','gg.jte','jte').version('3.1.15')
library('slop', 'nu.marginalia', 'slop').version('0.0.8-SNAPSHOT')
bundle('jetty', ['jetty-server', 'jetty-util', 'jetty-servlet'])
@ -250,6 +252,8 @@ dependencyResolutionManagement {
bundle('flyway', ['flyway.core', 'flyway.mysql'])
bundle('curator', ['curator-framework', 'curator-x-discovery'])
}
}
}