diff --git a/code/features-search/feedlot-client/build.gradle b/code/features-search/feedlot-client/build.gradle new file mode 100644 index 00000000..808c9ca6 --- /dev/null +++ b/code/features-search/feedlot-client/build.gradle @@ -0,0 +1,22 @@ +plugins { + id 'java' + id 'jvm-test-suite' +} + +java { + toolchain { + languageVersion.set(JavaLanguageVersion.of(21)) + } +} + +dependencies { + implementation libs.bundles.slf4j + + implementation libs.notnull + implementation libs.gson + + testImplementation libs.bundles.slf4j.test + testImplementation libs.bundles.junit + testImplementation libs.mockito + +} \ No newline at end of file diff --git a/code/features-search/feedlot-client/readme.md b/code/features-search/feedlot-client/readme.md new file mode 100644 index 00000000..76fafff8 --- /dev/null +++ b/code/features-search/feedlot-client/readme.md @@ -0,0 +1,20 @@ +Client for [FeedlotTheFeedBot](https://github.com/MarginaliaSearch/FeedLotTheFeedBot), +the RSS/Atom feed fetcher and cache for Marginalia Search. + +This service is external to the Marginalia Search codebase, +as it is not a core part of the search engine and has other +utilities. + +## Example + +```java + +import java.time.Duration; + +var client = new FeedlotClient("localhost", 8080, + gson, + Duration.ofMillis(100), // connect timeout + Duration.ofMillis(100)); // request timeout + +CompleteableFuture items = client.getFeedItems("www.marginalia.nu"); +``` \ No newline at end of file diff --git a/code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/FeedlotClient.java b/code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/FeedlotClient.java new file mode 100644 index 00000000..3392a8d2 --- /dev/null +++ b/code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/FeedlotClient.java @@ -0,0 +1,58 @@ +package nu.marginalia.feedlot; + +import com.google.gson.Gson; +import nu.marginalia.feedlot.model.FeedItems; + +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.time.Duration; +import java.util.concurrent.Executors; +import java.util.concurrent.CompletableFuture; + +public class FeedlotClient { + private final String feedlotHost; + private final int feedlotPort; + private final Gson gson; + private final HttpClient httpClient; + private final Duration requestTimeout; + + public FeedlotClient(String feedlotHost, + int feedlotPort, + Gson gson, + Duration connectTimeout, + Duration requestTimeout + ) + { + this.feedlotHost = feedlotHost; + this.feedlotPort = feedlotPort; + this.gson = gson; + + httpClient = HttpClient.newBuilder() + .executor(Executors.newVirtualThreadPerTaskExecutor()) + .connectTimeout(connectTimeout) + .build(); + this.requestTimeout = requestTimeout; + } + + public CompletableFuture getFeedItems(String domainName) { + return httpClient.sendAsync( + HttpRequest.newBuilder() + .uri(URI.create("http://%s:%d/feed/%s".formatted(feedlotHost, feedlotPort, domainName))) + .GET() + .timeout(requestTimeout) + .build(), + HttpResponse.BodyHandlers.ofString() + ).thenApply(HttpResponse::body) + .thenApply(this::parseFeedItems); + } + + private FeedItems parseFeedItems(String s) { + return gson.fromJson(s, FeedItems.class); + } + + public void stop() { + httpClient.close(); + } +} diff --git a/code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/model/FeedItem.java b/code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/model/FeedItem.java new file mode 100644 index 00000000..95ea8fe3 --- /dev/null +++ b/code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/model/FeedItem.java @@ -0,0 +1,17 @@ +package nu.marginalia.feedlot.model; + +public record FeedItem(String title, String date, String description, String url) { + + public String pubDay() { // Extract the date from an ISO style date string + if (date.length() > 10) { + return date.substring(0, 10); + } + return date; + } + + public String descriptionSafe() { + return description + .replace("<", "<") + .replace(">", ">"); + } +} diff --git a/code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/model/FeedItems.java b/code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/model/FeedItems.java new file mode 100644 index 00000000..fcf06345 --- /dev/null +++ b/code/features-search/feedlot-client/src/main/java/nu/marginalia/feedlot/model/FeedItems.java @@ -0,0 +1,6 @@ +package nu.marginalia.feedlot.model; + +import java.util.List; + +public record FeedItems(String domain, String feedUrl, String updated, List items) { +} diff --git a/code/services-application/search-service/build.gradle b/code/services-application/search-service/build.gradle index 805a7b34..ee504bcb 100644 --- a/code/services-application/search-service/build.gradle +++ b/code/services-application/search-service/build.gradle @@ -47,6 +47,7 @@ dependencies { implementation project(':code:features-search:screenshots') implementation project(':code:features-search:random-websites') + implementation project(':code:features-search:feedlot-client') implementation libs.bundles.slf4j diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchModule.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchModule.java index 090884ba..d832503c 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchModule.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/SearchModule.java @@ -1,10 +1,15 @@ package nu.marginalia.search; import com.google.inject.AbstractModule; +import com.google.inject.Provides; import nu.marginalia.LanguageModels; import nu.marginalia.WebsiteUrl; import nu.marginalia.WmsaHome; +import nu.marginalia.model.gson.GsonFactory; import nu.marginalia.renderer.config.HandlebarsConfigurator; +import nu.marginalia.feedlot.FeedlotClient; + +import java.time.Duration; public class SearchModule extends AbstractModule { @@ -17,4 +22,14 @@ public class SearchModule extends AbstractModule { System.getProperty("website-url", "https://search.marginalia.nu/"))); } + @Provides + public FeedlotClient provideFeedlotClient() { + return new FeedlotClient( + System.getProperty("ext-svc-feedlot-host", "feedlot"), + Integer.getInteger("ext-svc-feedlot-port", 80), + GsonFactory.get(), + Duration.ofMillis(250), + Duration.ofMillis(100) + ); + } } diff --git a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchSiteInfoService.java b/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchSiteInfoService.java index badaaeed..290bef50 100644 --- a/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchSiteInfoService.java +++ b/code/services-application/search-service/src/main/java/nu/marginalia/search/svc/SearchSiteInfoService.java @@ -5,13 +5,17 @@ import nu.marginalia.assistant.client.AssistantClient; import nu.marginalia.assistant.client.model.SimilarDomain; import nu.marginalia.client.Context; import nu.marginalia.db.DbDomainQueries; +import nu.marginalia.feedlot.model.FeedItems; import nu.marginalia.model.EdgeDomain; import nu.marginalia.renderer.MustacheRenderer; import nu.marginalia.renderer.RendererFactory; import nu.marginalia.search.SearchOperator; import nu.marginalia.assistant.client.model.DomainInformation; +import nu.marginalia.feedlot.FeedlotClient; import nu.marginalia.search.model.UrlDetails; import nu.marginalia.search.svc.SearchFlagSiteService.FlagSiteFormData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import spark.Request; import spark.Response; @@ -21,19 +25,23 @@ import java.util.List; import java.util.Map; public class SearchSiteInfoService { + private static final Logger logger = LoggerFactory.getLogger(SearchSiteInfoService.class); private final SearchOperator searchOperator; private final AssistantClient assistantClient; private final SearchFlagSiteService flagSiteService; private final DbDomainQueries domainQueries; private final MustacheRenderer renderer; + private final FeedlotClient feedlotClient; @Inject public SearchSiteInfoService(SearchOperator searchOperator, AssistantClient assistantClient, RendererFactory rendererFactory, SearchFlagSiteService flagSiteService, - DbDomainQueries domainQueries) throws IOException { + DbDomainQueries domainQueries, + FeedlotClient feedlotClient) throws IOException + { this.searchOperator = searchOperator; this.assistantClient = assistantClient; this.flagSiteService = flagSiteService; @@ -41,6 +49,7 @@ public class SearchSiteInfoService { this.renderer = rendererFactory.renderer("search/site-info/site-info"); + this.feedlotClient = feedlotClient; } public Object handle(Request request, Response response) throws SQLException { @@ -121,6 +130,7 @@ public class SearchSiteInfoService { final List linkingDomains; String url = "https://" + domainName + "/";; + var feedItemsFuture = feedlotClient.getFeedItems(domainName); if (domainId < 0 || !assistantClient.isAccepting()) { domainInfo = createDummySiteInfo(domainName); similarSet = List.of(); @@ -134,11 +144,18 @@ public class SearchSiteInfoService { linkingDomains = assistantClient .linkedDomains(ctx, domainId, 100) .blockingFirst(); + } - List sampleResults = searchOperator.doSiteSearch(ctx, domainName, 1); - if (!sampleResults.isEmpty()) { - url = sampleResults.getFirst().url.withPathAndParam("/", null).toString(); - } + List sampleResults = searchOperator.doSiteSearch(ctx, domainName, 5); + if (!sampleResults.isEmpty()) { + url = sampleResults.getFirst().url.withPathAndParam("/", null).toString(); + } + + FeedItems feedItems = null; + try { + feedItems = feedItemsFuture.get(); + } catch (Exception e) { + logger.debug("Failed to get feed items for {}: {}", domainName, e.getMessage()); } return new SiteInfoWithContext(domainName, @@ -146,7 +163,9 @@ public class SearchSiteInfoService { url, domainInfo, similarSet, - linkingDomains + linkingDomains, + feedItems, + sampleResults ); } @@ -200,13 +219,18 @@ public class SearchSiteInfoService { String siteUrl, DomainInformation domainInformation, List similar, - List linking) { + List linking, + FeedItems feed, + List samples + ) { public SiteInfoWithContext(String domain, long domainId, String siteUrl, DomainInformation domainInformation, List similar, - List linking + List linking, + FeedItems feedInfo, + List samples ) { this(Map.of("info", true), @@ -216,7 +240,9 @@ public class SearchSiteInfoService { siteUrl, domainInformation, similar, - linking); + linking, + feedInfo, + samples); } public String getLayout() { @@ -224,6 +250,12 @@ public class SearchSiteInfoService { if (similar.size() < 25) { return "lopsided"; } + else if (!feed.items().isEmpty()) { + return "lopsided"; + } + else if (!samples.isEmpty()) { + return "lopsided"; + } else { return "balanced"; } diff --git a/code/services-application/search-service/src/main/resources/static/search/rss.svg b/code/services-application/search-service/src/main/resources/static/search/rss.svg new file mode 100644 index 00000000..2c01c8b3 --- /dev/null +++ b/code/services-application/search-service/src/main/resources/static/search/rss.svg @@ -0,0 +1,17 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-feed.hdb b/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-feed.hdb new file mode 100644 index 00000000..f458e380 --- /dev/null +++ b/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-feed.hdb @@ -0,0 +1,20 @@ +{{#with feed}} +

Feed (Experimental)

+ +
+ {{#each items}} +
{{title}}
+
{{pubDay}}
{{{descriptionSafe}}}
+ {{/each}} +
+{{/with}} + +{{#unless feed}}{{#if samples}} +

Sample

+
+{{#each samples}} +
{{title}}
+
{{{description}}}
+{{/each}} +
+{{/if}}{{/unless}} \ No newline at end of file diff --git a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-summary.hdb b/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-summary.hdb index fd1c7590..fba7adad 100644 --- a/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-summary.hdb +++ b/code/services-application/search-service/src/main/resources/templates/search/site-info/site-info-summary.hdb @@ -12,11 +12,58 @@ Screenshot of {{domain}} {{#with domainInformation}} + {{> search/site-info/site-info-feed}} {{> search/site-info/site-info-index}} {{> search/site-info/site-info-links}} {{/with}} + {{#if linking}} + + {{/if}} + + {{#if similar}}

Similar Domains

@@ -67,48 +114,4 @@
{{/if}} - {{#if linking}} - - {{/if}} \ No newline at end of file diff --git a/settings.gradle b/settings.gradle index 42ae0f47..dbc0c855 100644 --- a/settings.gradle +++ b/settings.gradle @@ -28,6 +28,7 @@ include 'code:libraries:message-queue' include 'code:features-search:screenshots' include 'code:features-search:random-websites' +include 'code:features-search:feedlot-client' include 'code:features-qs:query-parser' include 'code:features-index:result-ranking'