From b5469bd8a1994af929d1063c685fff4279ee5fd2 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Mon, 6 Jan 2025 16:56:24 +0100 Subject: [PATCH] (search) Turn relative feed URLs absolute when dealing with RSS/Atom item URLs --- .../search/svc/SearchFrontPageService.java | 10 +++++++++- .../search/svc/SearchSiteInfoService.java | 19 ++++++++++++++++--- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchFrontPageService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchFrontPageService.java index c974e624..3721d278 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchFrontPageService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchFrontPageService.java @@ -85,8 +85,16 @@ public class SearchFrontPageService { title = "[Missing Title]"; } + + String url = item.getUrl(); + if (url.startsWith("/")) { // relative URL + url = "https://" + feed.getDomain() + url; + } else if (!url.contains(":")) { // no schema, assume relative URL + url = "https://" + feed.getDomain() + "/" + url; + } + itemsAll.add( - new NewsItem(title, item.getUrl(), feed.getDomain(), item.getDescription(), item.getDate()) + new NewsItem(title, url, feed.getDomain(), item.getDescription(), item.getDate()) ); } } diff --git a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchSiteInfoService.java b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchSiteInfoService.java index 3aeb9946..8831aaaa 100644 --- a/code/services-application/search-service/java/nu/marginalia/search/svc/SearchSiteInfoService.java +++ b/code/services-application/search-service/java/nu/marginalia/search/svc/SearchSiteInfoService.java @@ -381,11 +381,23 @@ public class SearchSiteInfoService { public record FeedItem(String title, String date, String description, String url) { - public FeedItem(RpcFeedItem rpcFeedItem) { + public FeedItem(String domain, RpcFeedItem rpcFeedItem) { this(rpcFeedItem.getTitle(), rpcFeedItem.getDate(), rpcFeedItem.getDescription(), - rpcFeedItem.getUrl()); + absoluteFeedUrl(domain, rpcFeedItem.getUrl()) + ); + } + + + private static String absoluteFeedUrl(String domain, String url) { + if (url.startsWith("/")) { // relative URL + url = "https://" + domain + url; + } else if (!url.contains(":")) { // no schema, assume relative URL + url = "https://" + domain + "/" + url; + } + + return url; } public String pubDay() { // Extract the date from an ISO style date string @@ -412,8 +424,9 @@ public class SearchSiteInfoService { this(rpcFeedItems.getDomain(), rpcFeedItems.getFeedUrl(), rpcFeedItems.getUpdated(), - rpcFeedItems.getItemsList().stream().map(FeedItem::new).toList()); + rpcFeedItems.getItemsList().stream().map(item -> new FeedItem(rpcFeedItems.getDomain(), item)).toList()); } + } public record ReportDomain(