mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(crawler) Correct content type probing to only run on URLs that are suspected to be binary
This commit is contained in:
parent
69ad6287b1
commit
e4a41f7dd1
@ -139,7 +139,7 @@ public class HttpFetcherImpl implements HttpFetcher {
|
||||
public ContentTypeProbeResult probeContentType(EdgeUrl url,
|
||||
WarcRecorder warcRecorder,
|
||||
ContentTags tags) throws RateLimitException {
|
||||
if (tags.isEmpty()) {
|
||||
if (tags.isEmpty() && contentTypeLogic.isUrlLikeBinary(url)) {
|
||||
var headBuilder = new Request.Builder().head()
|
||||
.addHeader("User-agent", userAgentString)
|
||||
.addHeader("Accept-Encoding", "gzip")
|
||||
|
Loading…
Reference in New Issue
Block a user