mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(crawler) Content type prober should not swallow exceptions
This commit is contained in:
parent
4d71c776fc
commit
70e2e41955
@ -291,7 +291,7 @@ public class CrawlerRetreiver implements AutoCloseable {
|
||||
crawlFrontier.addVisited(top);
|
||||
}
|
||||
}
|
||||
else if (fetchedDoc instanceof HttpFetchResult.ResultException ex) {
|
||||
else if (fetchedDoc instanceof HttpFetchResult.ResultException) {
|
||||
errorCount ++;
|
||||
}
|
||||
}
|
||||
|
@ -69,7 +69,7 @@ public class ContentTypeProber {
|
||||
return new ContentTypeProbeResult.Ok(ret);
|
||||
|
||||
} catch (SocketTimeoutException ex) {
|
||||
return new ContentTypeProbeResult.Timeout();
|
||||
return new ContentTypeProbeResult.Timeout(ex);
|
||||
} catch (Exception ex) {
|
||||
logger.error("Error during fetching {}[{}]", ex.getClass().getSimpleName(), ex.getMessage());
|
||||
|
||||
@ -80,7 +80,7 @@ public class ContentTypeProber {
|
||||
public sealed interface ContentTypeProbeResult {
|
||||
record Ok(EdgeUrl resolvedUrl) implements ContentTypeProbeResult { }
|
||||
record BadContentType(String contentType, int statusCode) implements ContentTypeProbeResult { }
|
||||
record Timeout() implements ContentTypeProbeResult { }
|
||||
record Timeout(java.lang.Exception ex) implements ContentTypeProbeResult { }
|
||||
record Exception(java.lang.Exception ex) implements ContentTypeProbeResult { }
|
||||
}
|
||||
}
|
||||
|
@ -162,11 +162,13 @@ public class HttpFetcherImpl implements HttpFetcher {
|
||||
}
|
||||
else if (probeResult instanceof ContentTypeProbeResult.BadContentType.Timeout timeout) {
|
||||
warcRecorder.flagAsTimeout(url);
|
||||
return new HttpFetchResult.ResultNone();
|
||||
|
||||
return new HttpFetchResult.ResultException(timeout.ex());
|
||||
}
|
||||
else if (probeResult instanceof ContentTypeProbeResult.Exception exception) {
|
||||
warcRecorder.flagAsError(url, exception.ex());
|
||||
return new HttpFetchResult.ResultNone();
|
||||
|
||||
return new HttpFetchResult.ResultException(exception.ex());
|
||||
}
|
||||
}
|
||||
else {
|
||||
@ -200,7 +202,7 @@ public class HttpFetcherImpl implements HttpFetcher {
|
||||
}
|
||||
}
|
||||
|
||||
return new HttpFetchResult.ResultNone();
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
Loading…
Reference in New Issue
Block a user