mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-22 20:48:59 +00:00
(crawler) Be more lenient when performing a domain probe
This commit is contained in:
parent
5354e034bf
commit
eee73ab16c
@ -45,6 +45,7 @@ public class HttpFetcherImpl implements HttpFetcher {
|
||||
private static final ContentTypeLogic contentTypeLogic = new ContentTypeLogic();
|
||||
|
||||
private final Duration requestTimeout = Duration.ofSeconds(10);
|
||||
private final Duration probeTimeout = Duration.ofSeconds(30);
|
||||
|
||||
@Override
|
||||
public void setAllowAllContentTypes(boolean allowAllContentTypes) {
|
||||
@ -107,23 +108,27 @@ public class HttpFetcherImpl implements HttpFetcher {
|
||||
.HEAD()
|
||||
.uri(url.asURI())
|
||||
.header("User-agent", userAgentString)
|
||||
.timeout(requestTimeout)
|
||||
.timeout(probeTimeout)
|
||||
.build();
|
||||
} catch (URISyntaxException e) {
|
||||
return new DomainProbeResult.Error(CrawlerDomainStatus.ERROR, "Invalid URL");
|
||||
}
|
||||
|
||||
try {
|
||||
var rsp = client.send(head, HttpResponse.BodyHandlers.discarding());
|
||||
EdgeUrl rspUri = new EdgeUrl(rsp.uri());
|
||||
for (int tries = 0;; tries++) {
|
||||
try {
|
||||
var rsp = client.send(head, HttpResponse.BodyHandlers.discarding());
|
||||
EdgeUrl rspUri = new EdgeUrl(rsp.uri());
|
||||
|
||||
if (!Objects.equals(rspUri.domain, url.domain)) {
|
||||
return new DomainProbeResult.Redirect(rspUri.domain);
|
||||
if (!Objects.equals(rspUri.domain, url.domain)) {
|
||||
return new DomainProbeResult.Redirect(rspUri.domain);
|
||||
}
|
||||
return new DomainProbeResult.Ok(rspUri);
|
||||
} catch (Exception ex) {
|
||||
if (tries > 3) {
|
||||
return new DomainProbeResult.Error(CrawlerDomainStatus.ERROR, ex.getMessage());
|
||||
}
|
||||
// else try again ...
|
||||
}
|
||||
return new DomainProbeResult.Ok(rspUri);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
return new DomainProbeResult.Error(CrawlerDomainStatus.ERROR, ex.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user