mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(link-parser) Filter out URLs with binary file suffixes in LinkParser
Added an additional filter step to ensure URLs with binary suffixes are excluded during crawling. This prevents unnecessary processing of non-HTML content, improving the efficiency of the link parsing process.
This commit is contained in:
parent
a97c05107e
commit
3b99cffb3d
@ -42,7 +42,8 @@ public class LinkParser {
|
||||
.flatMap(this::createURI)
|
||||
.map(URI::normalize)
|
||||
.map(this::renormalize)
|
||||
.flatMap(this::createEdgeUrl);
|
||||
.flatMap(this::createEdgeUrl)
|
||||
.filter(url -> !hasBinarySuffix(url.path));
|
||||
}
|
||||
|
||||
@Contract(pure=true)
|
||||
|
Loading…
Reference in New Issue
Block a user