MarginaliaSearch/code/processes/crawling-process/build.gradle
Viktor Lofgren 895cee7004 (crawler) Improved feed discovery, new domain state db per crawlset
Feed discover is improved with by probing a few likely endpoints when no feed link tag is provided.  To store the feed URLs, a sqlite database is added to each crawlset that stores a simple summary of the crawl job, including any feed URLs that have been discovered.

Solves issue #135
2024-12-26 15:05:52 +01:00

72 lines
2.1 KiB
Groovy

plugins {
id 'java'
id 'application'
id 'jvm-test-suite'
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
}
}
application {
mainClass = 'nu.marginalia.crawl.CrawlerMain'
applicationName = 'crawler-process'
}
tasks.distZip.enabled = false
apply from: "$rootProject.projectDir/srcsets.gradle"
dependencies {
implementation project(':code:common:db')
implementation project(':code:common:model')
implementation project(':code:common:config')
implementation project(':code:common:service')
implementation project(':code:libraries:blocking-thread-pool')
implementation project(':code:index:api')
implementation project(':code:processes:process-mq-api')
implementation project(':code:libraries:message-queue')
implementation project(':code:libraries:language-processing')
implementation project(':code:libraries:easy-lsh')
implementation project(':code:processes:crawling-process:model')
implementation project(':code:processes:crawling-process:model')
implementation project(':code:processes:converting-process:ft-anchor-keywords')
implementation project(':code:processes:crawling-process:ft-crawl-blocklist')
implementation project(':code:processes:crawling-process:ft-link-parser')
implementation project(':code:processes:crawling-process:ft-content-type')
implementation project(':third-party:commons-codec')
implementation libs.bundles.slf4j
implementation libs.notnull
implementation libs.guava
implementation libs.sqlite
implementation dependencies.create(libs.guice.get()) {
exclude group: 'com.google.guava'
}
implementation libs.gson
implementation libs.zstd
implementation libs.jwarc
implementation libs.crawlercommons
implementation libs.okhttp3
implementation libs.jsoup
implementation libs.opencsv
implementation libs.fastutil
implementation libs.bundles.mariadb
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
testImplementation project(':code:processes:test-data')
}