mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 13:19:02 +00:00

Refactoring keyword extraction to extract spans information. Modifying the intermediate storage of converted data to use the new slop library, which is allows for easier storage of ad-hoc binary data like spans and positions. This is a bit of a katamari damacy commit that ended up dragging along a bunch of other fairly tangentially related changes that are hard to break out into separate commits after the fact. Will push as-is to get back to being able to do more isolated work.
71 lines
2.1 KiB
Groovy
71 lines
2.1 KiB
Groovy
plugins {
|
|
id 'java'
|
|
|
|
id 'application'
|
|
id 'jvm-test-suite'
|
|
}
|
|
|
|
java {
|
|
toolchain {
|
|
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
|
}
|
|
}
|
|
|
|
application {
|
|
mainClass = 'nu.marginalia.crawl.CrawlerMain'
|
|
applicationName = 'crawler-process'
|
|
}
|
|
|
|
tasks.distZip.enabled = false
|
|
|
|
apply from: "$rootProject.projectDir/srcsets.gradle"
|
|
|
|
dependencies {
|
|
implementation project(':code:common:process')
|
|
|
|
implementation project(':code:common:db')
|
|
implementation project(':code:common:model')
|
|
implementation project(':code:common:config')
|
|
implementation project(':code:common:service')
|
|
implementation project(':code:libraries:blocking-thread-pool')
|
|
implementation project(':code:index:api')
|
|
implementation project(':code:processes:process-mq-api')
|
|
implementation project(':code:libraries:message-queue')
|
|
implementation project(':code:libraries:language-processing')
|
|
implementation project(':code:libraries:easy-lsh')
|
|
implementation project(':code:processes:crawling-process:model')
|
|
implementation project(':code:processes:crawling-process:model')
|
|
|
|
|
|
implementation project(':code:features-convert:anchor-keywords')
|
|
implementation project(':code:features-crawl:crawl-blocklist')
|
|
implementation project(':code:features-crawl:link-parser')
|
|
implementation project(':code:features-crawl:content-type')
|
|
implementation project(':third-party:commons-codec')
|
|
|
|
implementation libs.bundles.slf4j
|
|
|
|
implementation libs.notnull
|
|
implementation libs.guava
|
|
implementation dependencies.create(libs.guice.get()) {
|
|
exclude group: 'com.google.guava'
|
|
}
|
|
implementation libs.gson
|
|
implementation libs.zstd
|
|
implementation libs.jwarc
|
|
implementation libs.crawlercommons
|
|
implementation libs.okhttp3
|
|
implementation libs.jsoup
|
|
implementation libs.opencsv
|
|
implementation libs.fastutil
|
|
|
|
implementation libs.bundles.mariadb
|
|
|
|
testImplementation libs.bundles.slf4j.test
|
|
testImplementation libs.bundles.junit
|
|
testImplementation libs.mockito
|
|
|
|
testImplementation project(':code:processes:test-data')
|
|
}
|
|
|