diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java index 35f5bcd0..18035d52 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java @@ -312,17 +312,6 @@ public class CrawlerRetreiver implements AutoCloseable { || proto.equalsIgnoreCase("https"); } - // FIXME this does not belong in the crawler - private Optional findCanonicalUrl(EdgeUrl baseUrl, Document parsed) { - baseUrl = baseUrl.domain.toRootUrl(); - - for (var link : parsed.select("link[rel=canonical]")) { - return linkParser.parseLink(baseUrl, link); - } - - return Optional.empty(); - } - private String findIp(String domain) { try { return InetAddress.getByName(domain).getHostAddress();