mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-22 20:48:59 +00:00
(crawler) Fix urlencoding in sitemap fetcher
This commit is contained in:
parent
5b347e17ac
commit
4c74e280d3
@ -339,14 +339,14 @@ public class HttpFetcherImpl implements HttpFetcher {
|
||||
case "sitemapindex" -> {
|
||||
List<String> references = new ArrayList<>();
|
||||
for (var locTag : parsedSitemap.getElementsByTag("loc")) {
|
||||
references.add(URLDecoder.decode(locTag.text().trim(), StandardCharsets.UTF_8));
|
||||
references.add(locTag.text().trim());
|
||||
}
|
||||
yield new SitemapResult.SitemapReferences(Collections.unmodifiableList(references));
|
||||
}
|
||||
case "urlset" -> {
|
||||
List<String> urls = new ArrayList<>();
|
||||
for (var locTag : parsedSitemap.select("url > loc")) {
|
||||
urls.add(URLDecoder.decode(locTag.text().trim(), StandardCharsets.UTF_8));
|
||||
urls.add(locTag.text().trim());
|
||||
}
|
||||
yield new SitemapResult.SitemapUrls(Collections.unmodifiableList(urls));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user