MarginaliaSearch/code/process-models/crawling-model/build.gradle
Viktor Lofgren 2e7db61808 (warc) More accurate filering of advisory records
We want to mute some of these records so that they don't produce documents, but in some cases we want a document to be produced for accounting purposes.

Added improved tests that reach for known resources on www.marginalia.nu to test the behavior when encountering bad content type and 404s.

The commit also adds some safety try-catch:es around the charset handling, as it may sometimes explode when fed incorrect data, and we do be guessing...
2023-12-15 21:31:16 +01:00

45 lines
1.2 KiB
Groovy

plugins {
id 'java'
id 'jvm-test-suite'
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(21))
}
}
dependencies {
implementation project(':code:common:model')
implementation project(':code:common:db')
implementation project(':code:common:process')
implementation project(':code:libraries:big-string')
implementation project(':code:api:index-api')
implementation project(':code:common:service-discovery')
implementation project(':code:common:service-client')
implementation project(':code:features-crawl:content-type')
implementation project(':code:libraries:language-processing')
implementation project(':third-party:parquet-floor')
implementation project(':third-party:commons-codec')
implementation libs.bundles.slf4j
implementation libs.notnull
implementation libs.bundles.parquet
implementation libs.jwarc
implementation libs.gson
implementation libs.commons.io
implementation libs.okhttp3
implementation libs.jsoup
implementation libs.snakeyaml
implementation libs.zstd
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
}