(link-parser) Make mailing list blocking optional

This commit is contained in:
Viktor Lofgren 2024-10-15 13:48:32 +02:00
parent ab486323f2
commit 89dd201a7b

View File

@ -1,7 +1,7 @@
package nu.marginalia.ip_blocklist; package nu.marginalia.ip_blocklist;
import nu.marginalia.model.EdgeUrl;
import nu.marginalia.gregex.GuardedRegexFactory; import nu.marginalia.gregex.GuardedRegexFactory;
import nu.marginalia.model.EdgeUrl;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
@ -16,6 +16,8 @@ public class UrlBlocklist {
"instagram.com", "youtube.com", "instagram.com", "youtube.com",
"youtu.be", "amzn.to"); "youtu.be", "amzn.to");
private static final boolean BLOCK_MAILING_LISTS = Boolean.getBoolean("links.block_mailing_lists");
public UrlBlocklist() { public UrlBlocklist() {
// Don't deep-crawl git repos // Don't deep-crawl git repos
patterns.add(s -> s.contains(".git/")); patterns.add(s -> s.contains(".git/"));
@ -92,6 +94,10 @@ public class UrlBlocklist {
} }
public boolean isMailingListLink(EdgeUrl linkUrl) { public boolean isMailingListLink(EdgeUrl linkUrl) {
if (!BLOCK_MAILING_LISTS) {
return false;
}
var path = linkUrl.path; var path = linkUrl.path;
if (path.startsWith("/lists/")) { if (path.startsWith("/lists/")) {
return true; return true;