mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
Merge pull request 'master' (#92) from master into release
Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/92
This commit is contained in:
commit
f708fa643b
@ -19,6 +19,7 @@ public class UrlBlocklist {
|
|||||||
public UrlBlocklist() {
|
public UrlBlocklist() {
|
||||||
// Don't deep-crawl git repos
|
// Don't deep-crawl git repos
|
||||||
patterns.add(Pattern.compile("\\.git/.+").asPredicate());
|
patterns.add(Pattern.compile("\\.git/.+").asPredicate());
|
||||||
|
patterns.add(Pattern.compile("wp-content/upload").asPredicate());
|
||||||
|
|
||||||
// long base64-strings in URLs are typically git hashes or the like, rarely worth crawling
|
// long base64-strings in URLs are typically git hashes or the like, rarely worth crawling
|
||||||
patterns.add(Pattern.compile(".*/[^/]*[a-f0-9]{32,}(/|$)").asPredicate());
|
patterns.add(Pattern.compile(".*/[^/]*[a-f0-9]{32,}(/|$)").asPredicate());
|
||||||
|
@ -9,21 +9,22 @@ import java.util.List;
|
|||||||
import java.util.concurrent.ConcurrentHashMap;
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
public class Cookies {
|
public class Cookies {
|
||||||
final ThreadLocal<ConcurrentHashMap<HttpUrl, List<Cookie>>> cookieJar = ThreadLocal.withInitial(ConcurrentHashMap::new);
|
final ThreadLocal<ConcurrentHashMap<String, List<Cookie>>> cookieJar = ThreadLocal.withInitial(ConcurrentHashMap::new);
|
||||||
|
|
||||||
public CookieJar getJar() {
|
public CookieJar getJar() {
|
||||||
return new CookieJar() {
|
return new CookieJar() {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void saveFromResponse(HttpUrl url, List<Cookie> cookies) {
|
public void saveFromResponse(HttpUrl url, List<Cookie> cookies) {
|
||||||
|
|
||||||
if (!cookies.isEmpty()) {
|
if (!cookies.isEmpty()) {
|
||||||
cookieJar.get().put(url, cookies);
|
cookieJar.get().put(url.host(), cookies);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<Cookie> loadForRequest(HttpUrl url) {
|
public List<Cookie> loadForRequest(HttpUrl url) {
|
||||||
return cookieJar.get().getOrDefault(url, Collections.emptyList());
|
return cookieJar.get().getOrDefault(url.host(), Collections.emptyList());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -16,6 +16,7 @@ class UrlBlocklistTest {
|
|||||||
UrlBlocklist blocklist = new UrlBlocklist();
|
UrlBlocklist blocklist = new UrlBlocklist();
|
||||||
assertTrue(blocklist.isUrlBlocked(new EdgeUrl("https://memex.marginalia.nu/ghc/ghc/blob/1b1067d14b656bbbfa7c47f156ec2700c9751549/compiler/main/UpdateCafInfos.hs")));
|
assertTrue(blocklist.isUrlBlocked(new EdgeUrl("https://memex.marginalia.nu/ghc/ghc/blob/1b1067d14b656bbbfa7c47f156ec2700c9751549/compiler/main/UpdateCafInfos.hs")));
|
||||||
assertTrue(blocklist.isUrlBlocked(new EdgeUrl("https://memex.marginalia.nu//gn/+/d62642c920e6a0d1756316d225a90fd6faa9e21e")));
|
assertTrue(blocklist.isUrlBlocked(new EdgeUrl("https://memex.marginalia.nu//gn/+/d62642c920e6a0d1756316d225a90fd6faa9e21e")));
|
||||||
|
assertTrue(blocklist.isUrlBlocked(new EdgeUrl("http://www.marginalia.nu/wp-content/uploads/test.jpg")));
|
||||||
assertTrue(blocklist.isUrlBlocked(new EdgeUrl("http://yelenasimone.com/pdf/download-a-course-in-algebra.html")));
|
assertTrue(blocklist.isUrlBlocked(new EdgeUrl("http://yelenasimone.com/pdf/download-a-course-in-algebra.html")));
|
||||||
assertFalse(blocklist.isUrlBlocked(new EdgeUrl("http://yelenasimone.com/nope/x-a-course-in-algebra.html")));
|
assertFalse(blocklist.isUrlBlocked(new EdgeUrl("http://yelenasimone.com/nope/x-a-course-in-algebra.html")));
|
||||||
assertTrue(blocklist.isUrlBlocked(new EdgeUrl("http://yelenasimone.com/_module/slide/pqPan/library/american-sour-beer-innovative-techniques-for-mixed-fermentations/")));
|
assertTrue(blocklist.isUrlBlocked(new EdgeUrl("http://yelenasimone.com/_module/slide/pqPan/library/american-sour-beer-innovative-techniques-for-mixed-fermentations/")));
|
||||||
|
Loading…
Reference in New Issue
Block a user