diff --git a/build.gradle b/build.gradle index dad52fa3..a560016b 100644 --- a/build.gradle +++ b/build.gradle @@ -44,8 +44,8 @@ subprojects.forEach {it -> } ext { - jvmVersion=21 - dockerImageBase='container-registry.oracle.com/graalvm/jdk:21@sha256:1fd33d4d4eba3a9e1a41a728e39ea217178d257694eea1214fec68d2ed4d3d9b' + jvmVersion=22 + dockerImageBase='container-registry.oracle.com/graalvm/jdk:22' dockerImageTag='latest' dockerImageRegistry='marginalia' } diff --git a/code/execution/build.gradle b/code/execution/build.gradle index 354334f3..8e17bfec 100644 --- a/code/execution/build.gradle +++ b/code/execution/build.gradle @@ -40,10 +40,8 @@ dependencies { implementation project(':code:processes:crawling-process:model') implementation project(':code:processes:crawling-process:model') - implementation project(':code:features-crawl:link-parser') - implementation project(':code:features-convert:data-extractors') - implementation project(':code:features-convert:stackexchange-xml') - implementation project(':code:features-convert:reddit-json') + implementation project(':code:processes:crawling-process:ft-link-parser') + implementation project(':code:execution:data-extractors') implementation project(':code:index:index-journal') implementation project(':code:index:api') implementation project(':code:processes:process-mq-api') diff --git a/code/features-convert/data-extractors/build.gradle b/code/execution/data-extractors/build.gradle similarity index 88% rename from code/features-convert/data-extractors/build.gradle rename to code/execution/data-extractors/build.gradle index 82bf536a..2a0c08c6 100644 --- a/code/features-convert/data-extractors/build.gradle +++ b/code/execution/data-extractors/build.gradle @@ -22,8 +22,8 @@ dependencies { implementation project(':code:libraries:language-processing') implementation project(':code:libraries:term-frequency-dict') implementation project(':code:libraries:blocking-thread-pool') - implementation project(':code:features-crawl:link-parser') - implementation project(':code:features-convert:anchor-keywords') + implementation project(':code:processes:crawling-process:ft-link-parser') + implementation project(':code:processes:converting-process:ft-anchor-keywords') implementation project(':code:processes:crawling-process:model') implementation project(':code:processes:converting-process') implementation project(':third-party:commons-codec') diff --git a/code/features-convert/data-extractors/java/nu/marginalia/extractor/AtagExporter.java b/code/execution/data-extractors/java/nu/marginalia/extractor/AtagExporter.java similarity index 100% rename from code/features-convert/data-extractors/java/nu/marginalia/extractor/AtagExporter.java rename to code/execution/data-extractors/java/nu/marginalia/extractor/AtagExporter.java diff --git a/code/features-convert/data-extractors/java/nu/marginalia/extractor/ExporterIf.java b/code/execution/data-extractors/java/nu/marginalia/extractor/ExporterIf.java similarity index 100% rename from code/features-convert/data-extractors/java/nu/marginalia/extractor/ExporterIf.java rename to code/execution/data-extractors/java/nu/marginalia/extractor/ExporterIf.java diff --git a/code/features-convert/data-extractors/java/nu/marginalia/extractor/FeedExporter.java b/code/execution/data-extractors/java/nu/marginalia/extractor/FeedExporter.java similarity index 100% rename from code/features-convert/data-extractors/java/nu/marginalia/extractor/FeedExporter.java rename to code/execution/data-extractors/java/nu/marginalia/extractor/FeedExporter.java diff --git a/code/features-convert/data-extractors/java/nu/marginalia/extractor/SampleDataExporter.java b/code/execution/data-extractors/java/nu/marginalia/extractor/SampleDataExporter.java similarity index 100% rename from code/features-convert/data-extractors/java/nu/marginalia/extractor/SampleDataExporter.java rename to code/execution/data-extractors/java/nu/marginalia/extractor/SampleDataExporter.java diff --git a/code/features-convert/data-extractors/java/nu/marginalia/extractor/TermFrequencyExporter.java b/code/execution/data-extractors/java/nu/marginalia/extractor/TermFrequencyExporter.java similarity index 100% rename from code/features-convert/data-extractors/java/nu/marginalia/extractor/TermFrequencyExporter.java rename to code/execution/data-extractors/java/nu/marginalia/extractor/TermFrequencyExporter.java diff --git a/code/features-convert/data-extractors/readme.md b/code/execution/data-extractors/readme.md similarity index 100% rename from code/features-convert/data-extractors/readme.md rename to code/execution/data-extractors/readme.md diff --git a/code/features-convert/adblock/build.gradle b/code/features-convert/adblock/build.gradle deleted file mode 100644 index d88d86d3..00000000 --- a/code/features-convert/adblock/build.gradle +++ /dev/null @@ -1,33 +0,0 @@ -plugins { - id 'java' - - - id "de.undercouch.download" version "5.1.0" - - id 'jvm-test-suite' -} - -java { - toolchain { - languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion)) - } -} - -apply from: "$rootProject.projectDir/srcsets.gradle" - -dependencies { - implementation project(':code:common:config') - - implementation libs.bundles.slf4j - implementation libs.guava - implementation dependencies.create(libs.guice.get()) { - exclude group: 'com.google.guava' - } - implementation libs.notnull - implementation libs.jsoup - - testImplementation libs.bundles.slf4j.test - testImplementation libs.bundles.junit - testImplementation libs.mockito -} - diff --git a/code/features-convert/adblock/readme.md b/code/features-convert/adblock/readme.md deleted file mode 100644 index 32919300..00000000 --- a/code/features-convert/adblock/readme.md +++ /dev/null @@ -1,8 +0,0 @@ -# Adblock - -Contains an adblock simulator that reads an adblock specifications file and -uses it to identify if a document has ads. - -## Central Classes - -* [AdblockSimulator](java/nu/marginalia/adblock/AdblockSimulator.java) \ No newline at end of file diff --git a/code/features-convert/pubdate/build.gradle b/code/features-convert/pubdate/build.gradle deleted file mode 100644 index aeafcd99..00000000 --- a/code/features-convert/pubdate/build.gradle +++ /dev/null @@ -1,34 +0,0 @@ -plugins { - id 'java' - - - id "de.undercouch.download" version "5.1.0" - - id 'jvm-test-suite' -} - -java { - toolchain { - languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion)) - } -} - -apply from: "$rootProject.projectDir/srcsets.gradle" - -dependencies { - implementation project(':code:common:model') - - implementation libs.bundles.slf4j - implementation libs.guava - implementation dependencies.create(libs.guice.get()) { - exclude group: 'com.google.guava' - } - implementation libs.notnull - implementation libs.bundles.gson - implementation libs.jsoup - - testImplementation libs.bundles.slf4j.test - testImplementation libs.bundles.junit - testImplementation libs.mockito - testImplementation project(':code:common:config') -} diff --git a/code/features-convert/pubdate/readme.md b/code/features-convert/pubdate/readme.md deleted file mode 100644 index add657ee..00000000 --- a/code/features-convert/pubdate/readme.md +++ /dev/null @@ -1,7 +0,0 @@ -# Pubdate - -Contains advanced haruspicy for figuring out when a document was published. - -## Central Classes - -* [PubDateSniffer](java/nu/marginalia/pubdate/PubDateSniffer.java) \ No newline at end of file diff --git a/code/features-convert/readme.md b/code/features-convert/readme.md deleted file mode 100644 index 2979fdab..00000000 --- a/code/features-convert/readme.md +++ /dev/null @@ -1,13 +0,0 @@ -# Converter Features - -## Major features - -* [keyword-extraction](keyword-extraction/) - Identifies keywords to index in a document -* [summary-extraction](summary-extraction/) - Generate an excerpt/quote from a website to display on the search results page. - - -## Smaller features: - -* [adblock](adblock/) - Simulates Adblock -* [pubdate](pubdate/) - Determines when a document was published -* [topic-detection](topic-detection/) - Tries to identify the topic of a website diff --git a/code/features-convert/reddit-json/build.gradle b/code/features-convert/reddit-json/build.gradle deleted file mode 100644 index fed33f4f..00000000 --- a/code/features-convert/reddit-json/build.gradle +++ /dev/null @@ -1,44 +0,0 @@ -plugins { - id 'java' - - id 'jvm-test-suite' -} - -java { - toolchain { - languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion)) - } -} - -apply from: "$rootProject.projectDir/srcsets.gradle" - -dependencies { - implementation libs.bundles.slf4j - - implementation project(':code:libraries:blocking-thread-pool') - implementation project(':code:common:model') - implementation libs.notnull - - implementation libs.jsoup - implementation libs.sqlite - - implementation libs.guava - implementation dependencies.create(libs.guice.get()) { - exclude group: 'com.google.guava' - } - implementation libs.guava - implementation libs.gson - implementation libs.zstd - implementation libs.trove - implementation libs.commons.compress - implementation libs.xz - - testImplementation libs.bundles.slf4j.test - testImplementation libs.bundles.junit - testImplementation libs.mockito -} - -test { - maxHeapSize = "8G" - useJUnitPlatform() -} diff --git a/code/features-convert/stackexchange-xml/build.gradle b/code/features-convert/stackexchange-xml/build.gradle deleted file mode 100644 index 62e289b0..00000000 --- a/code/features-convert/stackexchange-xml/build.gradle +++ /dev/null @@ -1,43 +0,0 @@ -plugins { - id 'java' - - id 'jvm-test-suite' -} - -java { - toolchain { - languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion)) - } -} - -apply from: "$rootProject.projectDir/srcsets.gradle" - -dependencies { - implementation libs.bundles.slf4j - - implementation project(':code:libraries:blocking-thread-pool') - implementation project(':code:common:model') - implementation libs.notnull - - implementation libs.jsoup - implementation libs.sqlite - - implementation libs.guava - implementation dependencies.create(libs.guice.get()) { - exclude group: 'com.google.guava' - } - implementation libs.guava - implementation libs.zstd - implementation libs.trove - implementation libs.commons.compress - implementation libs.xz - - testImplementation libs.bundles.slf4j.test - testImplementation libs.bundles.junit - testImplementation libs.mockito -} - -test { - maxHeapSize = "8G" - useJUnitPlatform() -} diff --git a/code/features-convert/stackexchange-xml/readme.md b/code/features-convert/stackexchange-xml/readme.md deleted file mode 100644 index 1701ad7f..00000000 --- a/code/features-convert/stackexchange-xml/readme.md +++ /dev/null @@ -1,18 +0,0 @@ -Stackexchange's data is a jumble of questions and answers, -where the answers refer to the questions with a parentId field. - -e.g. -```xml - - - - - - -``` - -Since the search engine wants to extract keywords for each thread -holistically, not by question or answer, it is necessary to re-arrange -the data (which is very large). SQLite does a decent job of enabling -this task. - diff --git a/code/features-convert/summary-extraction/build.gradle b/code/features-convert/summary-extraction/build.gradle deleted file mode 100644 index 24eec1ca..00000000 --- a/code/features-convert/summary-extraction/build.gradle +++ /dev/null @@ -1,42 +0,0 @@ -plugins { - id 'java' - - id 'jvm-test-suite' -} - -java { - toolchain { - languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion)) - } -} - -apply from: "$rootProject.projectDir/srcsets.gradle" - -dependencies { - implementation libs.bundles.slf4j - - implementation libs.notnull - - implementation libs.jsoup - - implementation libs.guava - implementation dependencies.create(libs.guice.get()) { - exclude group: 'com.google.guava' - } - implementation libs.guava - implementation libs.bundles.gson - implementation libs.trove - implementation libs.fastutil - implementation libs.commons.lang3 - - testImplementation libs.bundles.slf4j.test - testImplementation libs.bundles.junit - testImplementation libs.mockito - - testImplementation project(':code:features-convert:keyword-extraction') - testImplementation project(':code:libraries:language-processing') - testImplementation project(':code:libraries:term-frequency-dict') - testImplementation project(':code:common:config') - testImplementation project(':code:common:model') -} - diff --git a/code/features-convert/summary-extraction/readme.md b/code/features-convert/summary-extraction/readme.md deleted file mode 100644 index b617d947..00000000 --- a/code/features-convert/summary-extraction/readme.md +++ /dev/null @@ -1,25 +0,0 @@ -# Summary Extraction - -This feature attempts to find a descriptive passage of text that summarizes -what a search result "is about". It's the text you see below a search result. - -It must solve two problems: - -1. Identify which part of the document that contains "the text". -The crux is that the document may be anywhere from 1993 to the present, with era-appropriate -formatting. It may be formatted with <center>ed <font>-tags, or semantic HTML5. - -2. Identify which part of "the text" best describes the document. - -It uses several naive heuristics to try to find something that makes sense, -and there is probably room for improvement. - -There are many good techniques for doing this, but they've sadly not proved -particularly fast. Whatever solution is used needs to be able to summarize of -order of a 100,000,000 documents with a time budget of a couple of hours. - - -## Central Classes - -* [SummaryExtractor](java/nu/marginalia/summary/SummaryExtractor.java) - diff --git a/code/features-convert/topic-detection/build.gradle b/code/features-convert/topic-detection/build.gradle deleted file mode 100644 index ef29d275..00000000 --- a/code/features-convert/topic-detection/build.gradle +++ /dev/null @@ -1,34 +0,0 @@ -plugins { - id 'java' - - - id "de.undercouch.download" version "5.1.0" - - id 'jvm-test-suite' -} - -java { - toolchain { - languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion)) - } -} - -apply from: "$rootProject.projectDir/srcsets.gradle" - -dependencies { - implementation project(':code:common:config') - implementation project(':code:libraries:language-processing') - implementation project(':third-party:porterstemmer') - - implementation libs.bundles.slf4j - implementation libs.guava - implementation dependencies.create(libs.guice.get()) { - exclude group: 'com.google.guava' - } - implementation libs.notnull - implementation libs.jsoup - - testImplementation libs.bundles.slf4j.test - testImplementation libs.bundles.junit - testImplementation libs.mockito -} diff --git a/code/features-convert/topic-detection/readme.md b/code/features-convert/topic-detection/readme.md deleted file mode 100644 index db9a0000..00000000 --- a/code/features-convert/topic-detection/readme.md +++ /dev/null @@ -1,4 +0,0 @@ -# Topic Detection - -This is an experiment in using hand-crafted naive bayesian filters to detecting the topic of a website. -It's noteworthy it detects recipes very well. \ No newline at end of file diff --git a/code/features-crawl/readme.md b/code/features-crawl/readme.md deleted file mode 100644 index 4566e980..00000000 --- a/code/features-crawl/readme.md +++ /dev/null @@ -1,8 +0,0 @@ -# Crawl Features - -These are bits of search-engine related code that are relatively isolated pieces of business logic, -that benefit from the clarity of being kept separate from the rest of the crawling code. - -* [content-type](content-type/) - Content Type identification -* [crawl-blocklist](crawl-blocklist/) - IP and URL blocklists -* [link-parser](link-parser/) - Code for parsing and normalizing links diff --git a/code/functions/search-query/build.gradle b/code/functions/search-query/build.gradle index deddc7c9..a2d10a59 100644 --- a/code/functions/search-query/build.gradle +++ b/code/functions/search-query/build.gradle @@ -31,7 +31,7 @@ dependencies { implementation project(':code:libraries:language-processing') implementation project(':code:libraries:term-frequency-dict') - implementation project(':code:features-convert:keyword-extraction') + implementation project(':code:processes:converting-process:ft-keyword-extraction') implementation libs.bundles.slf4j diff --git a/code/libraries/slop/build.gradle b/code/libraries/slop/build.gradle index 03d7f1ea..55b890fd 100644 --- a/code/libraries/slop/build.gradle +++ b/code/libraries/slop/build.gradle @@ -1,5 +1,7 @@ plugins { id 'java' + id 'application' + id 'org.graalvm.buildtools.native' version '0.10.2' } java { @@ -9,7 +11,51 @@ java { } -apply from: "$rootProject.projectDir/srcsets.gradle" +sourceSets { + main { + java { + srcDirs = [ + 'java', + 'build/generated/source/proto/main/grpc', + 'build/generated/source/proto/main/java' + ] + } + resources { + srcDirs = [ 'resources' ] + } + } + test { + java { + srcDirs = [ 'test' ] + } + resources { + srcDirs = [ 'test-resources' ] + } + } + demo { + java { + srcDirs = [ 'demo' ] + } + resources { + srcDirs = [ 'demo-resources' ] + } + + } +} + +application { + mainClass = 'demo.OneBillionRowsDemo' +} + +graalvmNative { + binaries.all { + resources.autodetect() + buildArgs=['-H:+ForeignAPISupport', '-H:+UnlockExperimentalVMOptions'] + + } + + toolchainDetection = false +} dependencies { implementation libs.bundles.slf4j @@ -24,7 +70,14 @@ dependencies { testImplementation libs.bundles.junit testImplementation libs.mockito - testImplementation libs.sqlite + demoImplementation sourceSets.main.output + demoImplementation libs.bundles.slf4j + demoImplementation libs.notnull + demoImplementation libs.commons.lang3 + demoImplementation libs.lz4 + demoImplementation libs.commons.compress + demoImplementation libs.zstd + demoImplementation libs.duckdb } test { diff --git a/code/libraries/slop/readme.md b/code/libraries/slop/readme.md new file mode 100644 index 00000000..99e52782 --- /dev/null +++ b/code/libraries/slop/readme.md @@ -0,0 +1,146 @@ +# Slop + +Slop is a library for columnar data persistence. It is designed to be used for storing large amounts of data in a way +that is both fast and memory-efficient. The data is write-once, and the slop library offers many facilities for +deciding how it should be stored and accessed. + +Slop is designed as a low abstraction what-you-see-is-what-you-do library, the reason for +this is to be able to eliminate copies and other overheads that are common in higher +level libraries. The intent is to get the performance of a hand-rolled solution, but +without the complexity and brittleness that comes with hand-rolling an ad-hoc row-based storage +format. + +A lot of what would commonly be kept in a schema description is instead just +implemented as code. To aid with portability, slop stores schema information +in the file names of the data files, besides the actual name of the column itself. + +A table of demographic information may end up stored in files like this: + +```text +cities.0.dat.s8[].gz +cities.0.dat-len.varint-le.bin +population.0.dat.s32le.bin +average-age.0.dat.f64le.gz +``` + +The slop library offers some facilities to aid with data integrity, such as the SlopTable +class, which is a wrapper that ensures consistent positions for a group of columns, and aids +in closing the columns when they are no longer needed. + +## Why though? + +Slop is fast. + +Depending on compression and encoding choices, it's possible +to get read speeds that are 5-20x faster than reading from a sqlite database. +When compression is disabled, Slop will memory map the data, and depending on the +contents of the column, it's possible to perform zero copy reads. + +Slop is compact. + +Depending on compression and encoding choices, the format will be smaller +than a parquet file containing the equivalent information. + +Slop is simple. + +There isn't much magic going on under the hood in Slop. It's designed with the philosophy that a competent programmer +should be able to reverse engineer the format of the data by just +looking at a directory listing of the data files. + + +### Relaxed 1BRC (no CSV ingestion time) + +Slop is reasonably competitive with DuckDB in terms of read speed, +especially when reading from Parquet, and the data on disk tends +to be smaller. + +This is noteworthy given Slop is a single-threaded JVM application, +and DuckDB is a multi-threaded C++ application. + +| Impl | Runtime | Size On Disk | +|----------------------------|---------|--------------| +| DuckDB in memory | 2.6s | 3.0 GB | +| Slop in vanilla Java s16 | 4.2s | 2.8 GB | +| Slop in vanilla Java s32 | 4.5s | 3.8 GB | +| Parquet (Snappy) in DuckDB | 4.5s | 5.5 GB | +| Parquet (Zstd) in DuckDB | 5.5s | 3.0 GB | + +## Example + +With slop it's desirable to keep the schema information in the code. This is an example of how you might use slop to +store a table of data with three columns: source, dest, and counts. The source and dest columns are strings, and the +counts column is an integer that's stored wit a varint-coding (i.e. like how utf-8 works). + +The data is stored in a directory, and the data is written and read using the `MyData.Writer` and `MyData.Reader` classes. +The `MyData` class is itself is a record, and the schema is stored as static fields in the `MyData` class. + + +```java +record Population(String city, int population, double avgAge) { + + private static final ColumnDesc citiesColumn = + new ColumnDesc<>("cities", ColumnType.STRING, StorageType.GZIP); + private static final ColumnDesc populationColumn = + new ColumnDesc<>("population", ColumnType.INT_LE, StorageType.PLAIN); + private static final ColumnDesc averageAgeColumnn = + new ColumnDesc<>("average-age", ColumnType.DOUBLE_LE, StorageType.PLAIN); + + public static class Writer extends SlopTable { + private final StringColumnWriter citiesWriter; + private final IntColumnWriter populationWriter; + private final DoubleColumnWriter avgAgeWriter; + + public Writer(Path baseDir) throws IOException { + citiesWriter = citiesColumn.create(this, baseDir); + populationWriter = populationColumn.create(this, baseDir); + avgAgeWriter = averageAgeColumnn.create(this, baseDir); + } + + public void write(Population data) throws IOException { + citiesWriter.put(data.city); + populationWriter.put(data.population); + avgAgeWriter.put(data.avgAge); + } + } + + public static class Reader extends SlopTable { + private final StringColumnReader citiesReader; + private final IntColumnReader populationReader; + private final DoubleColumnReader avgAgeReader; + + public Reader(Path baseDir) throws IOException { + citiesReader = citiesColumn.open(this, baseDir); + populationReader = populationColumn.open(this, baseDir); + avgAgeReader = averageAgeColumnn.open(this, baseDir); + } + + public boolean hasRemaining() throws IOException { + return citiesReader.hasRemaining(); + } + + public Population read() throws IOException { + return new Population( + citiesReader.get(), + populationReader.get(), + avgAgeReader.get() + ); + } + } +} +``` + +## Nested Records + +TBW + +## Column Types + +TBW + +## Storage Types + +TBW + +## Extension + +TBW \ No newline at end of file diff --git a/code/processes/converting-process/build.gradle b/code/processes/converting-process/build.gradle index 1dd1edb9..ef728448 100644 --- a/code/processes/converting-process/build.gradle +++ b/code/processes/converting-process/build.gradle @@ -47,18 +47,12 @@ dependencies { implementation project(':code:processes:converting-process:model') implementation project(':code:processes:crawling-process:model') - implementation project(':code:features-convert:adblock') - implementation project(':code:features-convert:anchor-keywords') - implementation project(':code:features-convert:topic-detection') - implementation project(':code:features-convert:pubdate') - implementation project(':code:features-convert:keyword-extraction') - implementation project(':code:features-convert:summary-extraction') - implementation project(':code:features-convert:stackexchange-xml') - implementation project(':code:features-convert:reddit-json') + implementation project(':code:processes:converting-process:ft-anchor-keywords') + implementation project(':code:processes:converting-process:ft-keyword-extraction') - implementation project(':code:features-crawl:crawl-blocklist') - implementation project(':code:features-crawl:link-parser') - implementation project(':code:features-crawl:content-type') + implementation project(':code:processes:crawling-process:ft-crawl-blocklist') + implementation project(':code:processes:crawling-process:ft-link-parser') + implementation project(':code:processes:crawling-process:ft-content-type') testImplementation project(':code:libraries:term-frequency-dict') testImplementation project(':code:processes:crawling-process:model') diff --git a/code/features-convert/anchor-keywords/build.gradle b/code/processes/converting-process/ft-anchor-keywords/build.gradle similarity index 92% rename from code/features-convert/anchor-keywords/build.gradle rename to code/processes/converting-process/ft-anchor-keywords/build.gradle index 1c25bd2e..7572cce0 100644 --- a/code/features-convert/anchor-keywords/build.gradle +++ b/code/processes/converting-process/ft-anchor-keywords/build.gradle @@ -17,7 +17,7 @@ dependencies { implementation project(':code:common:model') implementation project(':code:common:db') implementation project(':code:common:process') - implementation project(':code:features-convert:keyword-extraction') + implementation project(':code:processes:converting-process:ft-keyword-extraction') implementation project(':code:libraries:language-processing') implementation project(':code:libraries:term-frequency-dict') diff --git a/code/features-convert/anchor-keywords/java/nu/marginalia/atags/AnchorTextKeywords.java b/code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/AnchorTextKeywords.java similarity index 100% rename from code/features-convert/anchor-keywords/java/nu/marginalia/atags/AnchorTextKeywords.java rename to code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/AnchorTextKeywords.java diff --git a/code/features-convert/anchor-keywords/java/nu/marginalia/atags/model/DomainLinks.java b/code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/model/DomainLinks.java similarity index 100% rename from code/features-convert/anchor-keywords/java/nu/marginalia/atags/model/DomainLinks.java rename to code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/model/DomainLinks.java diff --git a/code/features-convert/anchor-keywords/java/nu/marginalia/atags/model/Link.java b/code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/model/Link.java similarity index 100% rename from code/features-convert/anchor-keywords/java/nu/marginalia/atags/model/Link.java rename to code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/model/Link.java diff --git a/code/features-convert/anchor-keywords/java/nu/marginalia/atags/model/LinkWithText.java b/code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/model/LinkWithText.java similarity index 100% rename from code/features-convert/anchor-keywords/java/nu/marginalia/atags/model/LinkWithText.java rename to code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/model/LinkWithText.java diff --git a/code/features-convert/anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsImpl.java b/code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsImpl.java similarity index 100% rename from code/features-convert/anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsImpl.java rename to code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsImpl.java diff --git a/code/features-convert/anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSource.java b/code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSource.java similarity index 100% rename from code/features-convert/anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSource.java rename to code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSource.java diff --git a/code/features-convert/anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSourceFactory.java b/code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSourceFactory.java similarity index 100% rename from code/features-convert/anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSourceFactory.java rename to code/processes/converting-process/ft-anchor-keywords/java/nu/marginalia/atags/source/AnchorTagsSourceFactory.java diff --git a/code/features-convert/anchor-keywords/resources/atags-stop-list b/code/processes/converting-process/ft-anchor-keywords/resources/atags-stop-list similarity index 100% rename from code/features-convert/anchor-keywords/resources/atags-stop-list rename to code/processes/converting-process/ft-anchor-keywords/resources/atags-stop-list diff --git a/code/features-convert/anchor-keywords/test/nu/marginalia/atags/DomainAnchorTagsImplTest.java b/code/processes/converting-process/ft-anchor-keywords/test/nu/marginalia/atags/DomainAnchorTagsImplTest.java similarity index 100% rename from code/features-convert/anchor-keywords/test/nu/marginalia/atags/DomainAnchorTagsImplTest.java rename to code/processes/converting-process/ft-anchor-keywords/test/nu/marginalia/atags/DomainAnchorTagsImplTest.java diff --git a/code/features-convert/anchor-keywords/test/nu/marginalia/util/TestLanguageModels.java b/code/processes/converting-process/ft-anchor-keywords/test/nu/marginalia/util/TestLanguageModels.java similarity index 100% rename from code/features-convert/anchor-keywords/test/nu/marginalia/util/TestLanguageModels.java rename to code/processes/converting-process/ft-anchor-keywords/test/nu/marginalia/util/TestLanguageModels.java diff --git a/code/features-convert/keyword-extraction/build.gradle b/code/processes/converting-process/ft-keyword-extraction/build.gradle similarity index 100% rename from code/features-convert/keyword-extraction/build.gradle rename to code/processes/converting-process/ft-keyword-extraction/build.gradle diff --git a/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/DocumentKeywordExtractor.java b/code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/DocumentKeywordExtractor.java similarity index 100% rename from code/features-convert/keyword-extraction/java/nu/marginalia/keyword/DocumentKeywordExtractor.java rename to code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/DocumentKeywordExtractor.java diff --git a/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/KeywordExtractor.java b/code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/KeywordExtractor.java similarity index 100% rename from code/features-convert/keyword-extraction/java/nu/marginalia/keyword/KeywordExtractor.java rename to code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/KeywordExtractor.java diff --git a/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/KeywordMetadata.java b/code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/KeywordMetadata.java similarity index 100% rename from code/features-convert/keyword-extraction/java/nu/marginalia/keyword/KeywordMetadata.java rename to code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/KeywordMetadata.java diff --git a/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/WordReps.java b/code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/WordReps.java similarity index 100% rename from code/features-convert/keyword-extraction/java/nu/marginalia/keyword/WordReps.java rename to code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/WordReps.java diff --git a/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/ArtifactKeywords.java b/code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/extractors/ArtifactKeywords.java similarity index 100% rename from code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/ArtifactKeywords.java rename to code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/extractors/ArtifactKeywords.java diff --git a/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/NameLikeKeywords.java b/code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/extractors/NameLikeKeywords.java similarity index 100% rename from code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/NameLikeKeywords.java rename to code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/extractors/NameLikeKeywords.java diff --git a/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/SubjectLikeKeywords.java b/code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/extractors/SubjectLikeKeywords.java similarity index 100% rename from code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/SubjectLikeKeywords.java rename to code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/extractors/SubjectLikeKeywords.java diff --git a/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/TitleKeywords.java b/code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/extractors/TitleKeywords.java similarity index 100% rename from code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/TitleKeywords.java rename to code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/extractors/TitleKeywords.java diff --git a/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/UrlKeywords.java b/code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/extractors/UrlKeywords.java similarity index 100% rename from code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/UrlKeywords.java rename to code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/extractors/UrlKeywords.java diff --git a/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java b/code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java similarity index 100% rename from code/features-convert/keyword-extraction/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java rename to code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/extractors/WordsTfIdfCounts.java diff --git a/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywords.java b/code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywords.java similarity index 100% rename from code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywords.java rename to code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywords.java diff --git a/code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywordsBuilder.java b/code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywordsBuilder.java similarity index 100% rename from code/features-convert/keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywordsBuilder.java rename to code/processes/converting-process/ft-keyword-extraction/java/nu/marginalia/keyword/model/DocumentKeywordsBuilder.java diff --git a/code/features-convert/keyword-extraction/readme.md b/code/processes/converting-process/ft-keyword-extraction/readme.md similarity index 100% rename from code/features-convert/keyword-extraction/readme.md rename to code/processes/converting-process/ft-keyword-extraction/readme.md diff --git a/code/features-convert/keyword-extraction/test-resources/test-data/java.html b/code/processes/converting-process/ft-keyword-extraction/test-resources/test-data/java.html similarity index 100% rename from code/features-convert/keyword-extraction/test-resources/test-data/java.html rename to code/processes/converting-process/ft-keyword-extraction/test-resources/test-data/java.html diff --git a/code/features-convert/keyword-extraction/test-resources/test-data/keyboards.html b/code/processes/converting-process/ft-keyword-extraction/test-resources/test-data/keyboards.html similarity index 100% rename from code/features-convert/keyword-extraction/test-resources/test-data/keyboards.html rename to code/processes/converting-process/ft-keyword-extraction/test-resources/test-data/keyboards.html diff --git a/code/features-convert/keyword-extraction/test-resources/test-data/madonna.html b/code/processes/converting-process/ft-keyword-extraction/test-resources/test-data/madonna.html similarity index 100% rename from code/features-convert/keyword-extraction/test-resources/test-data/madonna.html rename to code/processes/converting-process/ft-keyword-extraction/test-resources/test-data/madonna.html diff --git a/code/features-convert/keyword-extraction/test-resources/test-data/spam.html b/code/processes/converting-process/ft-keyword-extraction/test-resources/test-data/spam.html similarity index 100% rename from code/features-convert/keyword-extraction/test-resources/test-data/spam.html rename to code/processes/converting-process/ft-keyword-extraction/test-resources/test-data/spam.html diff --git a/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/DocumentKeywordExtractorTest.java b/code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/keyword/DocumentKeywordExtractorTest.java similarity index 100% rename from code/features-convert/keyword-extraction/test/nu/marginalia/keyword/DocumentKeywordExtractorTest.java rename to code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/keyword/DocumentKeywordExtractorTest.java diff --git a/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/SentenceExtractorTest.java b/code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/keyword/SentenceExtractorTest.java similarity index 100% rename from code/features-convert/keyword-extraction/test/nu/marginalia/keyword/SentenceExtractorTest.java rename to code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/keyword/SentenceExtractorTest.java diff --git a/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/ArtifactKeywordsTest.java b/code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/keyword/extractors/ArtifactKeywordsTest.java similarity index 100% rename from code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/ArtifactKeywordsTest.java rename to code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/keyword/extractors/ArtifactKeywordsTest.java diff --git a/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/NameLikeKeywordsTest.java b/code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/keyword/extractors/NameLikeKeywordsTest.java similarity index 100% rename from code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/NameLikeKeywordsTest.java rename to code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/keyword/extractors/NameLikeKeywordsTest.java diff --git a/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/SubjectLikeKeywordsTest.java b/code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/keyword/extractors/SubjectLikeKeywordsTest.java similarity index 100% rename from code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/SubjectLikeKeywordsTest.java rename to code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/keyword/extractors/SubjectLikeKeywordsTest.java diff --git a/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/TitleKeywordsTest.java b/code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/keyword/extractors/TitleKeywordsTest.java similarity index 100% rename from code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/TitleKeywordsTest.java rename to code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/keyword/extractors/TitleKeywordsTest.java diff --git a/code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/UrlKeywordsTest.java b/code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/keyword/extractors/UrlKeywordsTest.java similarity index 100% rename from code/features-convert/keyword-extraction/test/nu/marginalia/keyword/extractors/UrlKeywordsTest.java rename to code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/keyword/extractors/UrlKeywordsTest.java diff --git a/code/features-convert/keyword-extraction/test/nu/marginalia/test/util/TestLanguageModels.java b/code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/test/util/TestLanguageModels.java similarity index 100% rename from code/features-convert/keyword-extraction/test/nu/marginalia/test/util/TestLanguageModels.java rename to code/processes/converting-process/ft-keyword-extraction/test/nu/marginalia/test/util/TestLanguageModels.java diff --git a/code/features-convert/adblock/java/nu/marginalia/adblock/AdblockSimulator.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/classifier/adblock/AdblockSimulator.java similarity index 98% rename from code/features-convert/adblock/java/nu/marginalia/adblock/AdblockSimulator.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/classifier/adblock/AdblockSimulator.java index 1908fda3..74eecdd0 100644 --- a/code/features-convert/adblock/java/nu/marginalia/adblock/AdblockSimulator.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/classifier/adblock/AdblockSimulator.java @@ -1,4 +1,4 @@ -package nu.marginalia.adblock; +package nu.marginalia.converting.processor.classifier.adblock; import com.google.inject.Inject; import com.google.inject.Singleton; diff --git a/code/features-convert/adblock/java/nu/marginalia/adblock/GoogleAnwersSpamDetector.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/classifier/adblock/GoogleAnwersSpamDetector.java similarity index 93% rename from code/features-convert/adblock/java/nu/marginalia/adblock/GoogleAnwersSpamDetector.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/classifier/adblock/GoogleAnwersSpamDetector.java index 4cec3700..8c554c15 100644 --- a/code/features-convert/adblock/java/nu/marginalia/adblock/GoogleAnwersSpamDetector.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/classifier/adblock/GoogleAnwersSpamDetector.java @@ -1,4 +1,4 @@ -package nu.marginalia.adblock; +package nu.marginalia.converting.processor.classifier.adblock; import org.jsoup.nodes.Document; diff --git a/code/features-convert/topic-detection/java/nu/marginalia/topic/RecipeDetector.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/classifier/topic/RecipeDetector.java similarity index 99% rename from code/features-convert/topic-detection/java/nu/marginalia/topic/RecipeDetector.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/classifier/topic/RecipeDetector.java index 8633b4a0..83a3a246 100644 --- a/code/features-convert/topic-detection/java/nu/marginalia/topic/RecipeDetector.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/classifier/topic/RecipeDetector.java @@ -1,4 +1,4 @@ -package nu.marginalia.topic; +package nu.marginalia.converting.processor.classifier.topic; import ca.rmen.porterstemmer.PorterStemmer; import com.google.inject.Inject; diff --git a/code/features-convert/topic-detection/java/nu/marginalia/topic/TextileCraftDetector.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/classifier/topic/TextileCraftDetector.java similarity index 99% rename from code/features-convert/topic-detection/java/nu/marginalia/topic/TextileCraftDetector.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/classifier/topic/TextileCraftDetector.java index 6d8ccff0..4aa339d2 100644 --- a/code/features-convert/topic-detection/java/nu/marginalia/topic/TextileCraftDetector.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/classifier/topic/TextileCraftDetector.java @@ -1,4 +1,4 @@ -package nu.marginalia.topic; +package nu.marginalia.converting.processor.classifier.topic; import ca.rmen.porterstemmer.PorterStemmer; import com.google.inject.Inject; diff --git a/code/features-convert/topic-detection/java/nu/marginalia/topic/WoodworkingDetector.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/classifier/topic/WoodworkingDetector.java similarity index 98% rename from code/features-convert/topic-detection/java/nu/marginalia/topic/WoodworkingDetector.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/classifier/topic/WoodworkingDetector.java index 416f103a..60811d15 100644 --- a/code/features-convert/topic-detection/java/nu/marginalia/topic/WoodworkingDetector.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/classifier/topic/WoodworkingDetector.java @@ -1,4 +1,4 @@ -package nu.marginalia.topic; +package nu.marginalia.converting.processor.classifier.topic; import ca.rmen.porterstemmer.PorterStemmer; import com.google.inject.Inject; diff --git a/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/FeatureExtractor.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/FeatureExtractor.java index c38f63f9..3f08037f 100644 --- a/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/FeatureExtractor.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/logic/FeatureExtractor.java @@ -2,14 +2,14 @@ package nu.marginalia.converting.processor.logic; import com.google.inject.Inject; import com.google.inject.Singleton; -import nu.marginalia.adblock.AdblockSimulator; -import nu.marginalia.adblock.GoogleAnwersSpamDetector; +import nu.marginalia.converting.processor.classifier.adblock.AdblockSimulator; +import nu.marginalia.converting.processor.classifier.adblock.GoogleAnwersSpamDetector; +import nu.marginalia.converting.processor.classifier.topic.RecipeDetector; +import nu.marginalia.converting.processor.classifier.topic.TextileCraftDetector; +import nu.marginalia.converting.processor.classifier.topic.WoodworkingDetector; import nu.marginalia.language.model.DocumentLanguageData; import nu.marginalia.model.EdgeUrl; import nu.marginalia.model.crawl.HtmlFeature; -import nu.marginalia.topic.RecipeDetector; -import nu.marginalia.topic.TextileCraftDetector; -import nu.marginalia.topic.WoodworkingDetector; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; diff --git a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/HtmlDocumentProcessorPlugin.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/HtmlDocumentProcessorPlugin.java index 76b867fb..d423d599 100644 --- a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/HtmlDocumentProcessorPlugin.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/HtmlDocumentProcessorPlugin.java @@ -12,6 +12,7 @@ import nu.marginalia.converting.processor.logic.dom.MeasureLengthVisitor; import nu.marginalia.converting.processor.logic.links.FileLinks; import nu.marginalia.converting.processor.logic.links.LinkProcessor; import nu.marginalia.converting.processor.plugin.specialization.HtmlProcessorSpecializations; +import nu.marginalia.converting.processor.pubdate.PubDateSniffer; import nu.marginalia.gregex.GuardedRegex; import nu.marginalia.gregex.GuardedRegexFactory; import nu.marginalia.keyword.DocumentKeywordExtractor; @@ -29,7 +30,6 @@ import nu.marginalia.model.crawldata.CrawledDocument; import nu.marginalia.model.html.HtmlStandard; import nu.marginalia.model.idx.DocumentFlags; import nu.marginalia.model.idx.DocumentMetadata; -import nu.marginalia.pubdate.PubDateSniffer; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.slf4j.Logger; diff --git a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/BlogSpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/BlogSpecialization.java index f40654bc..feeb2126 100644 --- a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/BlogSpecialization.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/BlogSpecialization.java @@ -6,7 +6,7 @@ import com.google.inject.Singleton; import nu.marginalia.keyword.model.DocumentKeywordsBuilder; import nu.marginalia.model.EdgeUrl; import nu.marginalia.model.idx.WordFlags; -import nu.marginalia.summary.SummaryExtractor; +import nu.marginalia.converting.processor.summary.SummaryExtractor; import org.apache.logging.log4j.util.Strings; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; diff --git a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/DefaultSpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/DefaultSpecialization.java index 5a441639..77f1df12 100644 --- a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/DefaultSpecialization.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/DefaultSpecialization.java @@ -3,7 +3,7 @@ package nu.marginalia.converting.processor.plugin.specialization; import com.google.inject.Inject; import com.google.inject.Singleton; import nu.marginalia.converting.processor.logic.dom.DomPruningFilter; -import nu.marginalia.summary.SummaryExtractor; +import nu.marginalia.converting.processor.summary.SummaryExtractor; import org.jsoup.nodes.Document; import java.util.ArrayList; diff --git a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecialization.java index d930cbd0..38bd415f 100644 --- a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecialization.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecialization.java @@ -2,7 +2,7 @@ package nu.marginalia.converting.processor.plugin.specialization; import com.google.inject.Inject; import com.google.inject.Singleton; -import nu.marginalia.summary.SummaryExtractor; +import nu.marginalia.converting.processor.summary.SummaryExtractor; import org.apache.commons.lang3.StringUtils; import org.jsoup.nodes.Document; import org.slf4j.Logger; diff --git a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/LemmySpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/LemmySpecialization.java index f85847f4..01ec301c 100644 --- a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/LemmySpecialization.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/LemmySpecialization.java @@ -2,7 +2,7 @@ package nu.marginalia.converting.processor.plugin.specialization; import com.google.inject.Inject; import com.google.inject.Singleton; -import nu.marginalia.summary.SummaryExtractor; +import nu.marginalia.converting.processor.summary.SummaryExtractor; import org.jsoup.nodes.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/MariadbKbSpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/MariadbKbSpecialization.java index 3aa35973..26d58775 100644 --- a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/MariadbKbSpecialization.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/MariadbKbSpecialization.java @@ -4,7 +4,7 @@ import com.google.inject.Inject; import com.google.inject.Singleton; import nu.marginalia.keyword.model.DocumentKeywordsBuilder; import nu.marginalia.model.idx.WordFlags; -import nu.marginalia.summary.SummaryExtractor; +import nu.marginalia.converting.processor.summary.SummaryExtractor; import org.jsoup.nodes.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/PhpBBSpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/PhpBBSpecialization.java index 947cc4c0..36584bae 100644 --- a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/PhpBBSpecialization.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/PhpBBSpecialization.java @@ -3,7 +3,7 @@ package nu.marginalia.converting.processor.plugin.specialization; import com.google.inject.Inject; import com.google.inject.Singleton; import nu.marginalia.model.EdgeUrl; -import nu.marginalia.summary.SummaryExtractor; +import nu.marginalia.converting.processor.summary.SummaryExtractor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/WikiSpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/WikiSpecialization.java index c6107870..5c2fd2e7 100644 --- a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/WikiSpecialization.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/WikiSpecialization.java @@ -4,7 +4,7 @@ import com.google.inject.Inject; import com.google.inject.Singleton; import nu.marginalia.keyword.model.DocumentKeywordsBuilder; import nu.marginalia.model.EdgeUrl; -import nu.marginalia.summary.SummaryExtractor; +import nu.marginalia.converting.processor.summary.SummaryExtractor; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; diff --git a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecialization.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecialization.java index 16a222b3..af891889 100644 --- a/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecialization.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecialization.java @@ -2,7 +2,7 @@ package nu.marginalia.converting.processor.plugin.specialization; import com.google.inject.Inject; import com.google.inject.Singleton; -import nu.marginalia.summary.SummaryExtractor; +import nu.marginalia.converting.processor.summary.SummaryExtractor; import org.jsoup.nodes.Document; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateEffortLevel.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/PubDateEffortLevel.java similarity index 50% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateEffortLevel.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/PubDateEffortLevel.java index e2fd4e65..47e22ee0 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateEffortLevel.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/PubDateEffortLevel.java @@ -1,4 +1,4 @@ -package nu.marginalia.pubdate; +package nu.marginalia.converting.processor.pubdate; public enum PubDateEffortLevel { LOW, diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateFromHtmlStandard.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/PubDateFromHtmlStandard.java similarity index 95% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateFromHtmlStandard.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/PubDateFromHtmlStandard.java index dfbab8d3..78c27781 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateFromHtmlStandard.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/PubDateFromHtmlStandard.java @@ -1,4 +1,4 @@ -package nu.marginalia.pubdate; +package nu.marginalia.converting.processor.pubdate; import nu.marginalia.model.html.HtmlStandard; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateHeuristic.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/PubDateHeuristic.java similarity index 87% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateHeuristic.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/PubDateHeuristic.java index 56355806..d348c75a 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateHeuristic.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/PubDateHeuristic.java @@ -1,4 +1,4 @@ -package nu.marginalia.pubdate; +package nu.marginalia.converting.processor.pubdate; import nu.marginalia.model.EdgeUrl; import nu.marginalia.model.crawl.PubDate; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateParser.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/PubDateParser.java similarity index 99% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateParser.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/PubDateParser.java index 1fbade80..5b139e30 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateParser.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/PubDateParser.java @@ -1,7 +1,7 @@ -package nu.marginalia.pubdate; +package nu.marginalia.converting.processor.pubdate; -import nu.marginalia.model.html.HtmlStandard; import nu.marginalia.model.crawl.PubDate; +import nu.marginalia.model.html.HtmlStandard; import java.time.DateTimeException; import java.time.LocalDate; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateSniffer.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/PubDateSniffer.java similarity index 93% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateSniffer.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/PubDateSniffer.java index 90b25915..4ec1c4f9 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/PubDateSniffer.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/PubDateSniffer.java @@ -1,9 +1,9 @@ -package nu.marginalia.pubdate; +package nu.marginalia.converting.processor.pubdate; -import nu.marginalia.model.html.HtmlStandard; -import nu.marginalia.model.crawl.PubDate; +import nu.marginalia.converting.processor.pubdate.heuristic.*; import nu.marginalia.model.EdgeUrl; -import nu.marginalia.pubdate.heuristic.*; +import nu.marginalia.model.crawl.PubDate; +import nu.marginalia.model.html.HtmlStandard; import org.jsoup.nodes.Document; import java.util.ArrayList; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicDOMParsingPass1.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicDOMParsingPass1.java similarity index 94% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicDOMParsingPass1.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicDOMParsingPass1.java index 28059f64..5ab86c17 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicDOMParsingPass1.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicDOMParsingPass1.java @@ -1,11 +1,11 @@ -package nu.marginalia.pubdate.heuristic; +package nu.marginalia.converting.processor.pubdate.heuristic; -import nu.marginalia.model.html.HtmlStandard; -import nu.marginalia.model.crawl.PubDate; -import nu.marginalia.pubdate.PubDateHeuristic; -import nu.marginalia.pubdate.PubDateParser; +import nu.marginalia.converting.processor.pubdate.PubDateEffortLevel; +import nu.marginalia.converting.processor.pubdate.PubDateHeuristic; +import nu.marginalia.converting.processor.pubdate.PubDateParser; import nu.marginalia.model.EdgeUrl; -import nu.marginalia.pubdate.PubDateEffortLevel; +import nu.marginalia.model.crawl.PubDate; +import nu.marginalia.model.html.HtmlStandard; import org.jetbrains.annotations.NotNull; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicDOMParsingPass2.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicDOMParsingPass2.java similarity index 91% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicDOMParsingPass2.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicDOMParsingPass2.java index bb625180..eb42a3c4 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicDOMParsingPass2.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicDOMParsingPass2.java @@ -1,12 +1,12 @@ -package nu.marginalia.pubdate.heuristic; +package nu.marginalia.converting.processor.pubdate.heuristic; -import nu.marginalia.model.html.HtmlStandard; -import nu.marginalia.model.crawl.PubDate; -import nu.marginalia.pubdate.PubDateEffortLevel; -import nu.marginalia.pubdate.PubDateFromHtmlStandard; -import nu.marginalia.pubdate.PubDateHeuristic; -import nu.marginalia.pubdate.PubDateParser; +import nu.marginalia.converting.processor.pubdate.PubDateEffortLevel; +import nu.marginalia.converting.processor.pubdate.PubDateFromHtmlStandard; +import nu.marginalia.converting.processor.pubdate.PubDateHeuristic; +import nu.marginalia.converting.processor.pubdate.PubDateParser; import nu.marginalia.model.EdgeUrl; +import nu.marginalia.model.crawl.PubDate; +import nu.marginalia.model.html.HtmlStandard; import org.jetbrains.annotations.NotNull; import org.jsoup.nodes.Document; import org.jsoup.nodes.Node; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicGuessFromHtmlStandard.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicGuessFromHtmlStandard.java similarity index 69% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicGuessFromHtmlStandard.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicGuessFromHtmlStandard.java index 30486f2f..cffbe178 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicGuessFromHtmlStandard.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicGuessFromHtmlStandard.java @@ -1,11 +1,11 @@ -package nu.marginalia.pubdate.heuristic; +package nu.marginalia.converting.processor.pubdate.heuristic; -import nu.marginalia.model.html.HtmlStandard; -import nu.marginalia.model.crawl.PubDate; -import nu.marginalia.pubdate.PubDateEffortLevel; -import nu.marginalia.pubdate.PubDateHeuristic; -import nu.marginalia.pubdate.PubDateParser; +import nu.marginalia.converting.processor.pubdate.PubDateEffortLevel; +import nu.marginalia.converting.processor.pubdate.PubDateHeuristic; +import nu.marginalia.converting.processor.pubdate.PubDateParser; import nu.marginalia.model.EdgeUrl; +import nu.marginalia.model.crawl.PubDate; +import nu.marginalia.model.html.HtmlStandard; import org.jsoup.nodes.Document; import java.util.Optional; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5AnyTimeTag.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicHtml5AnyTimeTag.java similarity index 77% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5AnyTimeTag.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicHtml5AnyTimeTag.java index 30513a47..1d4d6a90 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5AnyTimeTag.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicHtml5AnyTimeTag.java @@ -1,11 +1,11 @@ -package nu.marginalia.pubdate.heuristic; +package nu.marginalia.converting.processor.pubdate.heuristic; -import nu.marginalia.model.html.HtmlStandard; -import nu.marginalia.model.crawl.PubDate; -import nu.marginalia.pubdate.PubDateHeuristic; -import nu.marginalia.pubdate.PubDateParser; +import nu.marginalia.converting.processor.pubdate.PubDateEffortLevel; +import nu.marginalia.converting.processor.pubdate.PubDateHeuristic; +import nu.marginalia.converting.processor.pubdate.PubDateParser; import nu.marginalia.model.EdgeUrl; -import nu.marginalia.pubdate.PubDateEffortLevel; +import nu.marginalia.model.crawl.PubDate; +import nu.marginalia.model.html.HtmlStandard; import org.jsoup.nodes.Document; import java.util.Optional; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5ArticleDateTag.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicHtml5ArticleDateTag.java similarity index 73% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5ArticleDateTag.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicHtml5ArticleDateTag.java index 45c8b091..e484e40b 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5ArticleDateTag.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicHtml5ArticleDateTag.java @@ -1,11 +1,11 @@ -package nu.marginalia.pubdate.heuristic; +package nu.marginalia.converting.processor.pubdate.heuristic; -import nu.marginalia.model.html.HtmlStandard; -import nu.marginalia.model.crawl.PubDate; -import nu.marginalia.pubdate.PubDateHeuristic; -import nu.marginalia.pubdate.PubDateParser; +import nu.marginalia.converting.processor.pubdate.PubDateEffortLevel; +import nu.marginalia.converting.processor.pubdate.PubDateHeuristic; +import nu.marginalia.converting.processor.pubdate.PubDateParser; import nu.marginalia.model.EdgeUrl; -import nu.marginalia.pubdate.PubDateEffortLevel; +import nu.marginalia.model.crawl.PubDate; +import nu.marginalia.model.html.HtmlStandard; import org.jsoup.nodes.Document; import java.util.Optional; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5ItempropDateTag.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicHtml5ItempropDateTag.java similarity index 73% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5ItempropDateTag.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicHtml5ItempropDateTag.java index aa09d392..0cedf842 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicHtml5ItempropDateTag.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicHtml5ItempropDateTag.java @@ -1,11 +1,11 @@ -package nu.marginalia.pubdate.heuristic; +package nu.marginalia.converting.processor.pubdate.heuristic; -import nu.marginalia.model.html.HtmlStandard; -import nu.marginalia.model.crawl.PubDate; -import nu.marginalia.pubdate.PubDateHeuristic; -import nu.marginalia.pubdate.PubDateParser; +import nu.marginalia.converting.processor.pubdate.PubDateEffortLevel; +import nu.marginalia.converting.processor.pubdate.PubDateHeuristic; +import nu.marginalia.converting.processor.pubdate.PubDateParser; import nu.marginalia.model.EdgeUrl; -import nu.marginalia.pubdate.PubDateEffortLevel; +import nu.marginalia.model.crawl.PubDate; +import nu.marginalia.model.html.HtmlStandard; import org.jsoup.nodes.Document; import java.util.Optional; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicJSONLD.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicJSONLD.java similarity index 89% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicJSONLD.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicJSONLD.java index 3ddf58eb..27d25208 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicJSONLD.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicJSONLD.java @@ -1,16 +1,16 @@ -package nu.marginalia.pubdate.heuristic; +package nu.marginalia.converting.processor.pubdate.heuristic; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.google.gson.JsonSyntaxException; import com.google.gson.annotations.SerializedName; import lombok.ToString; -import nu.marginalia.model.html.HtmlStandard; -import nu.marginalia.model.crawl.PubDate; -import nu.marginalia.pubdate.PubDateHeuristic; -import nu.marginalia.pubdate.PubDateParser; +import nu.marginalia.converting.processor.pubdate.PubDateEffortLevel; +import nu.marginalia.converting.processor.pubdate.PubDateHeuristic; +import nu.marginalia.converting.processor.pubdate.PubDateParser; import nu.marginalia.model.EdgeUrl; -import nu.marginalia.pubdate.PubDateEffortLevel; +import nu.marginalia.model.crawl.PubDate; +import nu.marginalia.model.html.HtmlStandard; import org.jsoup.nodes.Document; import java.util.Collections; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicLastModified.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicLastModified.java similarity index 75% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicLastModified.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicLastModified.java index ca42d469..0bc1a4bc 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicLastModified.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicLastModified.java @@ -1,11 +1,11 @@ -package nu.marginalia.pubdate.heuristic; +package nu.marginalia.converting.processor.pubdate.heuristic; -import nu.marginalia.model.html.HtmlStandard; -import nu.marginalia.model.crawl.PubDate; -import nu.marginalia.pubdate.PubDateHeuristic; -import nu.marginalia.pubdate.PubDateParser; +import nu.marginalia.converting.processor.pubdate.PubDateEffortLevel; +import nu.marginalia.converting.processor.pubdate.PubDateHeuristic; +import nu.marginalia.converting.processor.pubdate.PubDateParser; import nu.marginalia.model.EdgeUrl; -import nu.marginalia.pubdate.PubDateEffortLevel; +import nu.marginalia.model.crawl.PubDate; +import nu.marginalia.model.html.HtmlStandard; import org.jsoup.nodes.Document; import java.util.Optional; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicMicrodata.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicMicrodata.java similarity index 73% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicMicrodata.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicMicrodata.java index 584375f2..04858bbd 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicMicrodata.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicMicrodata.java @@ -1,11 +1,11 @@ -package nu.marginalia.pubdate.heuristic; +package nu.marginalia.converting.processor.pubdate.heuristic; -import nu.marginalia.model.html.HtmlStandard; +import nu.marginalia.converting.processor.pubdate.PubDateEffortLevel; +import nu.marginalia.converting.processor.pubdate.PubDateHeuristic; +import nu.marginalia.converting.processor.pubdate.PubDateParser; import nu.marginalia.model.EdgeUrl; import nu.marginalia.model.crawl.PubDate; -import nu.marginalia.pubdate.PubDateHeuristic; -import nu.marginalia.pubdate.PubDateParser; -import nu.marginalia.pubdate.PubDateEffortLevel; +import nu.marginalia.model.html.HtmlStandard; import org.jsoup.nodes.Document; import java.util.Optional; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicOpenGraph.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicOpenGraph.java similarity index 73% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicOpenGraph.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicOpenGraph.java index 74a7a654..0c1bc6d3 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicOpenGraph.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicOpenGraph.java @@ -1,11 +1,11 @@ -package nu.marginalia.pubdate.heuristic; +package nu.marginalia.converting.processor.pubdate.heuristic; -import nu.marginalia.model.html.HtmlStandard; -import nu.marginalia.model.crawl.PubDate; -import nu.marginalia.pubdate.PubDateEffortLevel; -import nu.marginalia.pubdate.PubDateHeuristic; -import nu.marginalia.pubdate.PubDateParser; +import nu.marginalia.converting.processor.pubdate.PubDateEffortLevel; +import nu.marginalia.converting.processor.pubdate.PubDateHeuristic; +import nu.marginalia.converting.processor.pubdate.PubDateParser; import nu.marginalia.model.EdgeUrl; +import nu.marginalia.model.crawl.PubDate; +import nu.marginalia.model.html.HtmlStandard; import org.jsoup.nodes.Document; import java.util.Optional; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicRDFaTag.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicRDFaTag.java similarity index 72% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicRDFaTag.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicRDFaTag.java index 1ed20019..a158bd9a 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicRDFaTag.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicRDFaTag.java @@ -1,11 +1,11 @@ -package nu.marginalia.pubdate.heuristic; +package nu.marginalia.converting.processor.pubdate.heuristic; -import nu.marginalia.model.html.HtmlStandard; -import nu.marginalia.model.crawl.PubDate; -import nu.marginalia.pubdate.PubDateEffortLevel; -import nu.marginalia.pubdate.PubDateHeuristic; -import nu.marginalia.pubdate.PubDateParser; +import nu.marginalia.converting.processor.pubdate.PubDateEffortLevel; +import nu.marginalia.converting.processor.pubdate.PubDateHeuristic; +import nu.marginalia.converting.processor.pubdate.PubDateParser; import nu.marginalia.model.EdgeUrl; +import nu.marginalia.model.crawl.PubDate; +import nu.marginalia.model.html.HtmlStandard; import org.jsoup.nodes.Document; import java.util.Optional; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicUrlPatternPass1.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicUrlPatternPass1.java similarity index 83% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicUrlPatternPass1.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicUrlPatternPass1.java index 6a6d5630..16a55c5f 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicUrlPatternPass1.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicUrlPatternPass1.java @@ -1,11 +1,11 @@ -package nu.marginalia.pubdate.heuristic; +package nu.marginalia.converting.processor.pubdate.heuristic; -import nu.marginalia.model.html.HtmlStandard; -import nu.marginalia.model.crawl.PubDate; -import nu.marginalia.pubdate.PubDateHeuristic; -import nu.marginalia.pubdate.PubDateParser; +import nu.marginalia.converting.processor.pubdate.PubDateEffortLevel; +import nu.marginalia.converting.processor.pubdate.PubDateHeuristic; +import nu.marginalia.converting.processor.pubdate.PubDateParser; import nu.marginalia.model.EdgeUrl; -import nu.marginalia.pubdate.PubDateEffortLevel; +import nu.marginalia.model.crawl.PubDate; +import nu.marginalia.model.html.HtmlStandard; import org.jsoup.nodes.Document; import java.util.Optional; diff --git a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicUrlPatternPass2.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicUrlPatternPass2.java similarity index 82% rename from code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicUrlPatternPass2.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicUrlPatternPass2.java index ea3ab9d9..e5226266 100644 --- a/code/features-convert/pubdate/java/nu/marginalia/pubdate/heuristic/PubDateHeuristicUrlPatternPass2.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/pubdate/heuristic/PubDateHeuristicUrlPatternPass2.java @@ -1,11 +1,11 @@ -package nu.marginalia.pubdate.heuristic; +package nu.marginalia.converting.processor.pubdate.heuristic; -import nu.marginalia.model.html.HtmlStandard; -import nu.marginalia.model.crawl.PubDate; -import nu.marginalia.pubdate.PubDateHeuristic; -import nu.marginalia.pubdate.PubDateParser; +import nu.marginalia.converting.processor.pubdate.PubDateEffortLevel; +import nu.marginalia.converting.processor.pubdate.PubDateHeuristic; +import nu.marginalia.converting.processor.pubdate.PubDateParser; import nu.marginalia.model.EdgeUrl; -import nu.marginalia.pubdate.PubDateEffortLevel; +import nu.marginalia.model.crawl.PubDate; +import nu.marginalia.model.html.HtmlStandard; import org.jsoup.nodes.Document; import java.util.Optional; diff --git a/code/features-convert/summary-extraction/java/nu/marginalia/summary/SummaryExtractor.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/SummaryExtractor.java similarity index 94% rename from code/features-convert/summary-extraction/java/nu/marginalia/summary/SummaryExtractor.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/summary/SummaryExtractor.java index 0e422390..7a9bd3da 100644 --- a/code/features-convert/summary-extraction/java/nu/marginalia/summary/SummaryExtractor.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/SummaryExtractor.java @@ -1,8 +1,8 @@ -package nu.marginalia.summary; +package nu.marginalia.converting.processor.summary; import com.google.inject.Inject; import com.google.inject.name.Named; -import nu.marginalia.summary.heuristic.*; +import nu.marginalia.converting.processor.summary.heuristic.*; import org.apache.commons.lang3.StringUtils; import org.jsoup.nodes.Document; diff --git a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/DomFilterHeuristic.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/DomFilterHeuristic.java similarity index 91% rename from code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/DomFilterHeuristic.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/DomFilterHeuristic.java index a06d4408..7a1c2be3 100644 --- a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/DomFilterHeuristic.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/DomFilterHeuristic.java @@ -1,4 +1,4 @@ -package nu.marginalia.summary.heuristic; +package nu.marginalia.converting.processor.summary.heuristic; import com.google.inject.Inject; import com.google.inject.name.Named; diff --git a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/FallbackHeuristic.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/FallbackHeuristic.java similarity index 92% rename from code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/FallbackHeuristic.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/FallbackHeuristic.java index caf37137..53d5c656 100644 --- a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/FallbackHeuristic.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/FallbackHeuristic.java @@ -1,4 +1,4 @@ -package nu.marginalia.summary.heuristic; +package nu.marginalia.converting.processor.summary.heuristic; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; diff --git a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/HeuristicTextUtil.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/HeuristicTextUtil.java similarity index 98% rename from code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/HeuristicTextUtil.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/HeuristicTextUtil.java index 6beac2eb..3c7bfa9f 100644 --- a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/HeuristicTextUtil.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/HeuristicTextUtil.java @@ -1,4 +1,4 @@ -package nu.marginalia.summary.heuristic; +package nu.marginalia.converting.processor.summary.heuristic; import org.apache.commons.lang3.StringUtils; diff --git a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/MetaDescriptionHeuristic.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/MetaDescriptionHeuristic.java similarity index 83% rename from code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/MetaDescriptionHeuristic.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/MetaDescriptionHeuristic.java index d48b6c3b..4ccdc09b 100644 --- a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/MetaDescriptionHeuristic.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/MetaDescriptionHeuristic.java @@ -1,4 +1,4 @@ -package nu.marginalia.summary.heuristic; +package nu.marginalia.converting.processor.summary.heuristic; import org.jsoup.nodes.Document; diff --git a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/OpenGraphDescriptionHeuristic.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/OpenGraphDescriptionHeuristic.java similarity index 83% rename from code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/OpenGraphDescriptionHeuristic.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/OpenGraphDescriptionHeuristic.java index 70f56bd3..4bcfd8e6 100644 --- a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/OpenGraphDescriptionHeuristic.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/OpenGraphDescriptionHeuristic.java @@ -1,4 +1,4 @@ -package nu.marginalia.summary.heuristic; +package nu.marginalia.converting.processor.summary.heuristic; import org.jsoup.nodes.Document; diff --git a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/SummarizingDOMFilter.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/SummarizingDOMFilter.java similarity index 97% rename from code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/SummarizingDOMFilter.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/SummarizingDOMFilter.java index f72b0eae..ab327744 100644 --- a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/SummarizingDOMFilter.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/SummarizingDOMFilter.java @@ -1,4 +1,4 @@ -package nu.marginalia.summary.heuristic; +package nu.marginalia.converting.processor.summary.heuristic; import com.google.common.base.Strings; import org.apache.commons.lang3.StringUtils; @@ -10,7 +10,6 @@ import org.jsoup.select.NodeFilter; import java.util.*; import java.util.function.Function; -import static nu.marginalia.summary.heuristic.HeuristicTextUtil.countOccurrencesOfAnyWord; import static org.jsoup.internal.StringUtil.isActuallyWhitespace; import static org.jsoup.internal.StringUtil.isInvisibleChar; @@ -107,8 +106,8 @@ public class SummarizingDOMFilter implements NodeFilter { if (wholeText.length() > 128) return 0; - return countOccurrencesOfAnyWord(wholeText, importantWords) - - countOccurrencesOfAnyWord(wholeText, badWords); + return HeuristicTextUtil.countOccurrencesOfAnyWord(wholeText, importantWords) + - HeuristicTextUtil.countOccurrencesOfAnyWord(wholeText, badWords); }); if (cnt > 0) { diff --git a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/SummaryHeuristic.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/SummaryHeuristic.java similarity index 73% rename from code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/SummaryHeuristic.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/SummaryHeuristic.java index 54b1c33a..c3cef4bb 100644 --- a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/SummaryHeuristic.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/SummaryHeuristic.java @@ -1,4 +1,4 @@ -package nu.marginalia.summary.heuristic; +package nu.marginalia.converting.processor.summary.heuristic; import org.jsoup.nodes.Document; diff --git a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/TagDensityHeuristic.java b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/TagDensityHeuristic.java similarity index 96% rename from code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/TagDensityHeuristic.java rename to code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/TagDensityHeuristic.java index 170afec0..dfea3709 100644 --- a/code/features-convert/summary-extraction/java/nu/marginalia/summary/heuristic/TagDensityHeuristic.java +++ b/code/processes/converting-process/java/nu/marginalia/converting/processor/summary/heuristic/TagDensityHeuristic.java @@ -1,4 +1,4 @@ -package nu.marginalia.summary.heuristic; +package nu.marginalia.converting.processor.summary.heuristic; import com.google.inject.Inject; import com.google.inject.name.Named; diff --git a/code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/RedditEntryReader.java b/code/processes/converting-process/java/nu/marginalia/integration/reddit/RedditEntryReader.java similarity index 100% rename from code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/RedditEntryReader.java rename to code/processes/converting-process/java/nu/marginalia/integration/reddit/RedditEntryReader.java diff --git a/code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/db/RedditDb.java b/code/processes/converting-process/java/nu/marginalia/integration/reddit/db/RedditDb.java similarity index 100% rename from code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/db/RedditDb.java rename to code/processes/converting-process/java/nu/marginalia/integration/reddit/db/RedditDb.java diff --git a/code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/ProcessableRedditComment.java b/code/processes/converting-process/java/nu/marginalia/integration/reddit/model/ProcessableRedditComment.java similarity index 100% rename from code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/ProcessableRedditComment.java rename to code/processes/converting-process/java/nu/marginalia/integration/reddit/model/ProcessableRedditComment.java diff --git a/code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/ProcessableRedditSubmission.java b/code/processes/converting-process/java/nu/marginalia/integration/reddit/model/ProcessableRedditSubmission.java similarity index 100% rename from code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/ProcessableRedditSubmission.java rename to code/processes/converting-process/java/nu/marginalia/integration/reddit/model/ProcessableRedditSubmission.java diff --git a/code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/RawRedditComment.java b/code/processes/converting-process/java/nu/marginalia/integration/reddit/model/RawRedditComment.java similarity index 100% rename from code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/RawRedditComment.java rename to code/processes/converting-process/java/nu/marginalia/integration/reddit/model/RawRedditComment.java diff --git a/code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/RawRedditSubmission.java b/code/processes/converting-process/java/nu/marginalia/integration/reddit/model/RawRedditSubmission.java similarity index 100% rename from code/features-convert/reddit-json/java/nu/marginalia/integration/reddit/model/RawRedditSubmission.java rename to code/processes/converting-process/java/nu/marginalia/integration/reddit/model/RawRedditSubmission.java diff --git a/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/model/StackExchangeComment.java b/code/processes/converting-process/java/nu/marginalia/integration/stackexchange/model/StackExchangeComment.java similarity index 100% rename from code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/model/StackExchangeComment.java rename to code/processes/converting-process/java/nu/marginalia/integration/stackexchange/model/StackExchangeComment.java diff --git a/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/model/StackExchangePost.java b/code/processes/converting-process/java/nu/marginalia/integration/stackexchange/model/StackExchangePost.java similarity index 100% rename from code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/model/StackExchangePost.java rename to code/processes/converting-process/java/nu/marginalia/integration/stackexchange/model/StackExchangePost.java diff --git a/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/sqlite/StackExchangePostsDb.java b/code/processes/converting-process/java/nu/marginalia/integration/stackexchange/sqlite/StackExchangePostsDb.java similarity index 100% rename from code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/sqlite/StackExchangePostsDb.java rename to code/processes/converting-process/java/nu/marginalia/integration/stackexchange/sqlite/StackExchangePostsDb.java diff --git a/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchange7zXmlEventReaderSource.java b/code/processes/converting-process/java/nu/marginalia/integration/stackexchange/xml/StackExchange7zXmlEventReaderSource.java similarity index 100% rename from code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchange7zXmlEventReaderSource.java rename to code/processes/converting-process/java/nu/marginalia/integration/stackexchange/xml/StackExchange7zXmlEventReaderSource.java diff --git a/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReader.java b/code/processes/converting-process/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReader.java similarity index 100% rename from code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReader.java rename to code/processes/converting-process/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReader.java diff --git a/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlIterator.java b/code/processes/converting-process/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlIterator.java similarity index 100% rename from code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlIterator.java rename to code/processes/converting-process/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlIterator.java diff --git a/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReader.java b/code/processes/converting-process/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReader.java similarity index 100% rename from code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReader.java rename to code/processes/converting-process/java/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReader.java diff --git a/code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/XmlEventReaderSource.java b/code/processes/converting-process/java/nu/marginalia/integration/stackexchange/xml/XmlEventReaderSource.java similarity index 100% rename from code/features-convert/stackexchange-xml/java/nu/marginalia/integration/stackexchange/xml/XmlEventReaderSource.java rename to code/processes/converting-process/java/nu/marginalia/integration/stackexchange/xml/XmlEventReaderSource.java diff --git a/code/features-convert/reddit-json/resources/db/reddit.sql b/code/processes/converting-process/resources/db/reddit.sql similarity index 100% rename from code/features-convert/reddit-json/resources/db/reddit.sql rename to code/processes/converting-process/resources/db/reddit.sql diff --git a/code/features-convert/stackexchange-xml/resources/db/stackexchange.sql b/code/processes/converting-process/resources/db/stackexchange.sql similarity index 100% rename from code/features-convert/stackexchange-xml/resources/db/stackexchange.sql rename to code/processes/converting-process/resources/db/stackexchange.sql diff --git a/code/features-convert/summary-extraction/test-resources/html/monadnock.html b/code/processes/converting-process/test-resources/html/monadnock.html similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/monadnock.html rename to code/processes/converting-process/test-resources/html/monadnock.html diff --git a/code/features-convert/summary-extraction/test-resources/html/readme.md b/code/processes/converting-process/test-resources/html/readme.md similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/readme.md rename to code/processes/converting-process/test-resources/html/readme.md diff --git a/code/features-convert/summary-extraction/test-resources/html/summarization/187.shtml b/code/processes/converting-process/test-resources/html/summarization/187.shtml similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/summarization/187.shtml rename to code/processes/converting-process/test-resources/html/summarization/187.shtml diff --git a/code/features-convert/summary-extraction/test-resources/html/summarization/surrey.html b/code/processes/converting-process/test-resources/html/summarization/surrey.html similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/summarization/surrey.html rename to code/processes/converting-process/test-resources/html/summarization/surrey.html diff --git a/code/features-convert/summary-extraction/test-resources/html/summarization/surrey.html.1 b/code/processes/converting-process/test-resources/html/summarization/surrey.html.1 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/summarization/surrey.html.1 rename to code/processes/converting-process/test-resources/html/summarization/surrey.html.1 diff --git a/code/features-convert/summary-extraction/test-resources/html/theregister.html b/code/processes/converting-process/test-resources/html/theregister.html similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/theregister.html rename to code/processes/converting-process/test-resources/html/theregister.html diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/index b/code/processes/converting-process/test-resources/html/work-set/index similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/index rename to code/processes/converting-process/test-resources/html/work-set/index diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1021546012 b/code/processes/converting-process/test-resources/html/work-set/url--1021546012 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1021546012 rename to code/processes/converting-process/test-resources/html/work-set/url--1021546012 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1028592943 b/code/processes/converting-process/test-resources/html/work-set/url--1028592943 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1028592943 rename to code/processes/converting-process/test-resources/html/work-set/url--1028592943 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1081293162 b/code/processes/converting-process/test-resources/html/work-set/url--1081293162 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1081293162 rename to code/processes/converting-process/test-resources/html/work-set/url--1081293162 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1105046394 b/code/processes/converting-process/test-resources/html/work-set/url--1105046394 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1105046394 rename to code/processes/converting-process/test-resources/html/work-set/url--1105046394 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1146923296 b/code/processes/converting-process/test-resources/html/work-set/url--1146923296 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1146923296 rename to code/processes/converting-process/test-resources/html/work-set/url--1146923296 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1194694074 b/code/processes/converting-process/test-resources/html/work-set/url--1194694074 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1194694074 rename to code/processes/converting-process/test-resources/html/work-set/url--1194694074 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1207898281 b/code/processes/converting-process/test-resources/html/work-set/url--1207898281 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1207898281 rename to code/processes/converting-process/test-resources/html/work-set/url--1207898281 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1268145073 b/code/processes/converting-process/test-resources/html/work-set/url--1268145073 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1268145073 rename to code/processes/converting-process/test-resources/html/work-set/url--1268145073 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1294876331 b/code/processes/converting-process/test-resources/html/work-set/url--1294876331 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1294876331 rename to code/processes/converting-process/test-resources/html/work-set/url--1294876331 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1314767420 b/code/processes/converting-process/test-resources/html/work-set/url--1314767420 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1314767420 rename to code/processes/converting-process/test-resources/html/work-set/url--1314767420 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1316269786 b/code/processes/converting-process/test-resources/html/work-set/url--1316269786 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1316269786 rename to code/processes/converting-process/test-resources/html/work-set/url--1316269786 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1316766580 b/code/processes/converting-process/test-resources/html/work-set/url--1316766580 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1316766580 rename to code/processes/converting-process/test-resources/html/work-set/url--1316766580 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1319968043 b/code/processes/converting-process/test-resources/html/work-set/url--1319968043 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1319968043 rename to code/processes/converting-process/test-resources/html/work-set/url--1319968043 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1338576987 b/code/processes/converting-process/test-resources/html/work-set/url--1338576987 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1338576987 rename to code/processes/converting-process/test-resources/html/work-set/url--1338576987 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1341909571 b/code/processes/converting-process/test-resources/html/work-set/url--1341909571 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1341909571 rename to code/processes/converting-process/test-resources/html/work-set/url--1341909571 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1369578579 b/code/processes/converting-process/test-resources/html/work-set/url--1369578579 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1369578579 rename to code/processes/converting-process/test-resources/html/work-set/url--1369578579 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1437315645 b/code/processes/converting-process/test-resources/html/work-set/url--1437315645 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1437315645 rename to code/processes/converting-process/test-resources/html/work-set/url--1437315645 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1458954960 b/code/processes/converting-process/test-resources/html/work-set/url--1458954960 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1458954960 rename to code/processes/converting-process/test-resources/html/work-set/url--1458954960 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1475681345 b/code/processes/converting-process/test-resources/html/work-set/url--1475681345 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1475681345 rename to code/processes/converting-process/test-resources/html/work-set/url--1475681345 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1498328446 b/code/processes/converting-process/test-resources/html/work-set/url--1498328446 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1498328446 rename to code/processes/converting-process/test-resources/html/work-set/url--1498328446 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1507779664 b/code/processes/converting-process/test-resources/html/work-set/url--1507779664 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1507779664 rename to code/processes/converting-process/test-resources/html/work-set/url--1507779664 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1540303379 b/code/processes/converting-process/test-resources/html/work-set/url--1540303379 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1540303379 rename to code/processes/converting-process/test-resources/html/work-set/url--1540303379 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--154898476 b/code/processes/converting-process/test-resources/html/work-set/url--154898476 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--154898476 rename to code/processes/converting-process/test-resources/html/work-set/url--154898476 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1552059399 b/code/processes/converting-process/test-resources/html/work-set/url--1552059399 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1552059399 rename to code/processes/converting-process/test-resources/html/work-set/url--1552059399 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1557688340 b/code/processes/converting-process/test-resources/html/work-set/url--1557688340 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1557688340 rename to code/processes/converting-process/test-resources/html/work-set/url--1557688340 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1584145751 b/code/processes/converting-process/test-resources/html/work-set/url--1584145751 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1584145751 rename to code/processes/converting-process/test-resources/html/work-set/url--1584145751 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1605151204 b/code/processes/converting-process/test-resources/html/work-set/url--1605151204 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1605151204 rename to code/processes/converting-process/test-resources/html/work-set/url--1605151204 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--162269247 b/code/processes/converting-process/test-resources/html/work-set/url--162269247 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--162269247 rename to code/processes/converting-process/test-resources/html/work-set/url--162269247 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1624294488 b/code/processes/converting-process/test-resources/html/work-set/url--1624294488 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1624294488 rename to code/processes/converting-process/test-resources/html/work-set/url--1624294488 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--164108285 b/code/processes/converting-process/test-resources/html/work-set/url--164108285 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--164108285 rename to code/processes/converting-process/test-resources/html/work-set/url--164108285 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1645688243 b/code/processes/converting-process/test-resources/html/work-set/url--1645688243 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1645688243 rename to code/processes/converting-process/test-resources/html/work-set/url--1645688243 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1658004609 b/code/processes/converting-process/test-resources/html/work-set/url--1658004609 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1658004609 rename to code/processes/converting-process/test-resources/html/work-set/url--1658004609 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1658558834 b/code/processes/converting-process/test-resources/html/work-set/url--1658558834 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1658558834 rename to code/processes/converting-process/test-resources/html/work-set/url--1658558834 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1698664879 b/code/processes/converting-process/test-resources/html/work-set/url--1698664879 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1698664879 rename to code/processes/converting-process/test-resources/html/work-set/url--1698664879 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--169975195 b/code/processes/converting-process/test-resources/html/work-set/url--169975195 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--169975195 rename to code/processes/converting-process/test-resources/html/work-set/url--169975195 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1701203332 b/code/processes/converting-process/test-resources/html/work-set/url--1701203332 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1701203332 rename to code/processes/converting-process/test-resources/html/work-set/url--1701203332 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--17281998 b/code/processes/converting-process/test-resources/html/work-set/url--17281998 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--17281998 rename to code/processes/converting-process/test-resources/html/work-set/url--17281998 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1742070028 b/code/processes/converting-process/test-resources/html/work-set/url--1742070028 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1742070028 rename to code/processes/converting-process/test-resources/html/work-set/url--1742070028 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1745376814 b/code/processes/converting-process/test-resources/html/work-set/url--1745376814 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1745376814 rename to code/processes/converting-process/test-resources/html/work-set/url--1745376814 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1749889035 b/code/processes/converting-process/test-resources/html/work-set/url--1749889035 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1749889035 rename to code/processes/converting-process/test-resources/html/work-set/url--1749889035 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--176177364 b/code/processes/converting-process/test-resources/html/work-set/url--176177364 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--176177364 rename to code/processes/converting-process/test-resources/html/work-set/url--176177364 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--177014197 b/code/processes/converting-process/test-resources/html/work-set/url--177014197 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--177014197 rename to code/processes/converting-process/test-resources/html/work-set/url--177014197 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1794527707 b/code/processes/converting-process/test-resources/html/work-set/url--1794527707 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1794527707 rename to code/processes/converting-process/test-resources/html/work-set/url--1794527707 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1797740201 b/code/processes/converting-process/test-resources/html/work-set/url--1797740201 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1797740201 rename to code/processes/converting-process/test-resources/html/work-set/url--1797740201 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1799098579 b/code/processes/converting-process/test-resources/html/work-set/url--1799098579 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1799098579 rename to code/processes/converting-process/test-resources/html/work-set/url--1799098579 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1959637826 b/code/processes/converting-process/test-resources/html/work-set/url--1959637826 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1959637826 rename to code/processes/converting-process/test-resources/html/work-set/url--1959637826 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1971916964 b/code/processes/converting-process/test-resources/html/work-set/url--1971916964 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1971916964 rename to code/processes/converting-process/test-resources/html/work-set/url--1971916964 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--1985840368 b/code/processes/converting-process/test-resources/html/work-set/url--1985840368 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--1985840368 rename to code/processes/converting-process/test-resources/html/work-set/url--1985840368 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--2012610859 b/code/processes/converting-process/test-resources/html/work-set/url--2012610859 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--2012610859 rename to code/processes/converting-process/test-resources/html/work-set/url--2012610859 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--202178680 b/code/processes/converting-process/test-resources/html/work-set/url--202178680 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--202178680 rename to code/processes/converting-process/test-resources/html/work-set/url--202178680 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--2043528727 b/code/processes/converting-process/test-resources/html/work-set/url--2043528727 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--2043528727 rename to code/processes/converting-process/test-resources/html/work-set/url--2043528727 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--2081757477 b/code/processes/converting-process/test-resources/html/work-set/url--2081757477 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--2081757477 rename to code/processes/converting-process/test-resources/html/work-set/url--2081757477 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--2103982576 b/code/processes/converting-process/test-resources/html/work-set/url--2103982576 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--2103982576 rename to code/processes/converting-process/test-resources/html/work-set/url--2103982576 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--2111558769 b/code/processes/converting-process/test-resources/html/work-set/url--2111558769 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--2111558769 rename to code/processes/converting-process/test-resources/html/work-set/url--2111558769 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--213168798 b/code/processes/converting-process/test-resources/html/work-set/url--213168798 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--213168798 rename to code/processes/converting-process/test-resources/html/work-set/url--213168798 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--232544032 b/code/processes/converting-process/test-resources/html/work-set/url--232544032 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--232544032 rename to code/processes/converting-process/test-resources/html/work-set/url--232544032 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--253010011 b/code/processes/converting-process/test-resources/html/work-set/url--253010011 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--253010011 rename to code/processes/converting-process/test-resources/html/work-set/url--253010011 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--274250994 b/code/processes/converting-process/test-resources/html/work-set/url--274250994 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--274250994 rename to code/processes/converting-process/test-resources/html/work-set/url--274250994 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--332442790 b/code/processes/converting-process/test-resources/html/work-set/url--332442790 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--332442790 rename to code/processes/converting-process/test-resources/html/work-set/url--332442790 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--353437903 b/code/processes/converting-process/test-resources/html/work-set/url--353437903 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--353437903 rename to code/processes/converting-process/test-resources/html/work-set/url--353437903 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--364546777 b/code/processes/converting-process/test-resources/html/work-set/url--364546777 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--364546777 rename to code/processes/converting-process/test-resources/html/work-set/url--364546777 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--379129416 b/code/processes/converting-process/test-resources/html/work-set/url--379129416 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--379129416 rename to code/processes/converting-process/test-resources/html/work-set/url--379129416 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--399428149 b/code/processes/converting-process/test-resources/html/work-set/url--399428149 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--399428149 rename to code/processes/converting-process/test-resources/html/work-set/url--399428149 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--425233170 b/code/processes/converting-process/test-resources/html/work-set/url--425233170 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--425233170 rename to code/processes/converting-process/test-resources/html/work-set/url--425233170 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--434612307 b/code/processes/converting-process/test-resources/html/work-set/url--434612307 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--434612307 rename to code/processes/converting-process/test-resources/html/work-set/url--434612307 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--439772328 b/code/processes/converting-process/test-resources/html/work-set/url--439772328 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--439772328 rename to code/processes/converting-process/test-resources/html/work-set/url--439772328 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--458002611 b/code/processes/converting-process/test-resources/html/work-set/url--458002611 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--458002611 rename to code/processes/converting-process/test-resources/html/work-set/url--458002611 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--506010305 b/code/processes/converting-process/test-resources/html/work-set/url--506010305 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--506010305 rename to code/processes/converting-process/test-resources/html/work-set/url--506010305 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--546773534 b/code/processes/converting-process/test-resources/html/work-set/url--546773534 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--546773534 rename to code/processes/converting-process/test-resources/html/work-set/url--546773534 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--551288516 b/code/processes/converting-process/test-resources/html/work-set/url--551288516 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--551288516 rename to code/processes/converting-process/test-resources/html/work-set/url--551288516 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--602577763 b/code/processes/converting-process/test-resources/html/work-set/url--602577763 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--602577763 rename to code/processes/converting-process/test-resources/html/work-set/url--602577763 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--611668054 b/code/processes/converting-process/test-resources/html/work-set/url--611668054 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--611668054 rename to code/processes/converting-process/test-resources/html/work-set/url--611668054 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--634771245 b/code/processes/converting-process/test-resources/html/work-set/url--634771245 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--634771245 rename to code/processes/converting-process/test-resources/html/work-set/url--634771245 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--639320493 b/code/processes/converting-process/test-resources/html/work-set/url--639320493 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--639320493 rename to code/processes/converting-process/test-resources/html/work-set/url--639320493 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--643179018 b/code/processes/converting-process/test-resources/html/work-set/url--643179018 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--643179018 rename to code/processes/converting-process/test-resources/html/work-set/url--643179018 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--663772351 b/code/processes/converting-process/test-resources/html/work-set/url--663772351 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--663772351 rename to code/processes/converting-process/test-resources/html/work-set/url--663772351 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--670789152 b/code/processes/converting-process/test-resources/html/work-set/url--670789152 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--670789152 rename to code/processes/converting-process/test-resources/html/work-set/url--670789152 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--6797317 b/code/processes/converting-process/test-resources/html/work-set/url--6797317 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--6797317 rename to code/processes/converting-process/test-resources/html/work-set/url--6797317 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--700978490 b/code/processes/converting-process/test-resources/html/work-set/url--700978490 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--700978490 rename to code/processes/converting-process/test-resources/html/work-set/url--700978490 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--708035332 b/code/processes/converting-process/test-resources/html/work-set/url--708035332 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--708035332 rename to code/processes/converting-process/test-resources/html/work-set/url--708035332 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--804917062 b/code/processes/converting-process/test-resources/html/work-set/url--804917062 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--804917062 rename to code/processes/converting-process/test-resources/html/work-set/url--804917062 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--819771302 b/code/processes/converting-process/test-resources/html/work-set/url--819771302 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--819771302 rename to code/processes/converting-process/test-resources/html/work-set/url--819771302 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--840796372 b/code/processes/converting-process/test-resources/html/work-set/url--840796372 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--840796372 rename to code/processes/converting-process/test-resources/html/work-set/url--840796372 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--841445362 b/code/processes/converting-process/test-resources/html/work-set/url--841445362 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--841445362 rename to code/processes/converting-process/test-resources/html/work-set/url--841445362 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--862385354 b/code/processes/converting-process/test-resources/html/work-set/url--862385354 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--862385354 rename to code/processes/converting-process/test-resources/html/work-set/url--862385354 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--879796466 b/code/processes/converting-process/test-resources/html/work-set/url--879796466 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--879796466 rename to code/processes/converting-process/test-resources/html/work-set/url--879796466 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--89134993 b/code/processes/converting-process/test-resources/html/work-set/url--89134993 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--89134993 rename to code/processes/converting-process/test-resources/html/work-set/url--89134993 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--905197876 b/code/processes/converting-process/test-resources/html/work-set/url--905197876 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--905197876 rename to code/processes/converting-process/test-resources/html/work-set/url--905197876 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--920328354 b/code/processes/converting-process/test-resources/html/work-set/url--920328354 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--920328354 rename to code/processes/converting-process/test-resources/html/work-set/url--920328354 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--952827759 b/code/processes/converting-process/test-resources/html/work-set/url--952827759 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--952827759 rename to code/processes/converting-process/test-resources/html/work-set/url--952827759 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--964018507 b/code/processes/converting-process/test-resources/html/work-set/url--964018507 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--964018507 rename to code/processes/converting-process/test-resources/html/work-set/url--964018507 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url--972614909 b/code/processes/converting-process/test-resources/html/work-set/url--972614909 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url--972614909 rename to code/processes/converting-process/test-resources/html/work-set/url--972614909 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-10088520 b/code/processes/converting-process/test-resources/html/work-set/url-10088520 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-10088520 rename to code/processes/converting-process/test-resources/html/work-set/url-10088520 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1013281103 b/code/processes/converting-process/test-resources/html/work-set/url-1013281103 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1013281103 rename to code/processes/converting-process/test-resources/html/work-set/url-1013281103 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1019241851 b/code/processes/converting-process/test-resources/html/work-set/url-1019241851 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1019241851 rename to code/processes/converting-process/test-resources/html/work-set/url-1019241851 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1059944953 b/code/processes/converting-process/test-resources/html/work-set/url-1059944953 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1059944953 rename to code/processes/converting-process/test-resources/html/work-set/url-1059944953 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1118681302 b/code/processes/converting-process/test-resources/html/work-set/url-1118681302 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1118681302 rename to code/processes/converting-process/test-resources/html/work-set/url-1118681302 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1179298706 b/code/processes/converting-process/test-resources/html/work-set/url-1179298706 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1179298706 rename to code/processes/converting-process/test-resources/html/work-set/url-1179298706 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1191749784 b/code/processes/converting-process/test-resources/html/work-set/url-1191749784 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1191749784 rename to code/processes/converting-process/test-resources/html/work-set/url-1191749784 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1207094790 b/code/processes/converting-process/test-resources/html/work-set/url-1207094790 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1207094790 rename to code/processes/converting-process/test-resources/html/work-set/url-1207094790 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1213989666 b/code/processes/converting-process/test-resources/html/work-set/url-1213989666 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1213989666 rename to code/processes/converting-process/test-resources/html/work-set/url-1213989666 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1222442301 b/code/processes/converting-process/test-resources/html/work-set/url-1222442301 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1222442301 rename to code/processes/converting-process/test-resources/html/work-set/url-1222442301 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-130332455 b/code/processes/converting-process/test-resources/html/work-set/url-130332455 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-130332455 rename to code/processes/converting-process/test-resources/html/work-set/url-130332455 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1311055461 b/code/processes/converting-process/test-resources/html/work-set/url-1311055461 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1311055461 rename to code/processes/converting-process/test-resources/html/work-set/url-1311055461 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1391842722 b/code/processes/converting-process/test-resources/html/work-set/url-1391842722 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1391842722 rename to code/processes/converting-process/test-resources/html/work-set/url-1391842722 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1457388763 b/code/processes/converting-process/test-resources/html/work-set/url-1457388763 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1457388763 rename to code/processes/converting-process/test-resources/html/work-set/url-1457388763 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1506356272 b/code/processes/converting-process/test-resources/html/work-set/url-1506356272 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1506356272 rename to code/processes/converting-process/test-resources/html/work-set/url-1506356272 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1511762169 b/code/processes/converting-process/test-resources/html/work-set/url-1511762169 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1511762169 rename to code/processes/converting-process/test-resources/html/work-set/url-1511762169 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1534640058 b/code/processes/converting-process/test-resources/html/work-set/url-1534640058 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1534640058 rename to code/processes/converting-process/test-resources/html/work-set/url-1534640058 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1551513871 b/code/processes/converting-process/test-resources/html/work-set/url-1551513871 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1551513871 rename to code/processes/converting-process/test-resources/html/work-set/url-1551513871 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1567632447 b/code/processes/converting-process/test-resources/html/work-set/url-1567632447 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1567632447 rename to code/processes/converting-process/test-resources/html/work-set/url-1567632447 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1623049502 b/code/processes/converting-process/test-resources/html/work-set/url-1623049502 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1623049502 rename to code/processes/converting-process/test-resources/html/work-set/url-1623049502 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-163919330 b/code/processes/converting-process/test-resources/html/work-set/url-163919330 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-163919330 rename to code/processes/converting-process/test-resources/html/work-set/url-163919330 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1661398327 b/code/processes/converting-process/test-resources/html/work-set/url-1661398327 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1661398327 rename to code/processes/converting-process/test-resources/html/work-set/url-1661398327 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1724309925 b/code/processes/converting-process/test-resources/html/work-set/url-1724309925 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1724309925 rename to code/processes/converting-process/test-resources/html/work-set/url-1724309925 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1736807128 b/code/processes/converting-process/test-resources/html/work-set/url-1736807128 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1736807128 rename to code/processes/converting-process/test-resources/html/work-set/url-1736807128 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1739031345 b/code/processes/converting-process/test-resources/html/work-set/url-1739031345 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1739031345 rename to code/processes/converting-process/test-resources/html/work-set/url-1739031345 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1755745765 b/code/processes/converting-process/test-resources/html/work-set/url-1755745765 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1755745765 rename to code/processes/converting-process/test-resources/html/work-set/url-1755745765 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1802811100 b/code/processes/converting-process/test-resources/html/work-set/url-1802811100 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1802811100 rename to code/processes/converting-process/test-resources/html/work-set/url-1802811100 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1805364707 b/code/processes/converting-process/test-resources/html/work-set/url-1805364707 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1805364707 rename to code/processes/converting-process/test-resources/html/work-set/url-1805364707 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1832702370 b/code/processes/converting-process/test-resources/html/work-set/url-1832702370 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1832702370 rename to code/processes/converting-process/test-resources/html/work-set/url-1832702370 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1853114311 b/code/processes/converting-process/test-resources/html/work-set/url-1853114311 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1853114311 rename to code/processes/converting-process/test-resources/html/work-set/url-1853114311 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1924872844 b/code/processes/converting-process/test-resources/html/work-set/url-1924872844 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1924872844 rename to code/processes/converting-process/test-resources/html/work-set/url-1924872844 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-197772804 b/code/processes/converting-process/test-resources/html/work-set/url-197772804 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-197772804 rename to code/processes/converting-process/test-resources/html/work-set/url-197772804 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1984259912 b/code/processes/converting-process/test-resources/html/work-set/url-1984259912 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1984259912 rename to code/processes/converting-process/test-resources/html/work-set/url-1984259912 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-1990903988 b/code/processes/converting-process/test-resources/html/work-set/url-1990903988 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-1990903988 rename to code/processes/converting-process/test-resources/html/work-set/url-1990903988 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-2039310951 b/code/processes/converting-process/test-resources/html/work-set/url-2039310951 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-2039310951 rename to code/processes/converting-process/test-resources/html/work-set/url-2039310951 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-2040857056 b/code/processes/converting-process/test-resources/html/work-set/url-2040857056 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-2040857056 rename to code/processes/converting-process/test-resources/html/work-set/url-2040857056 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-2052613093 b/code/processes/converting-process/test-resources/html/work-set/url-2052613093 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-2052613093 rename to code/processes/converting-process/test-resources/html/work-set/url-2052613093 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-2063899866 b/code/processes/converting-process/test-resources/html/work-set/url-2063899866 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-2063899866 rename to code/processes/converting-process/test-resources/html/work-set/url-2063899866 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-2115548255 b/code/processes/converting-process/test-resources/html/work-set/url-2115548255 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-2115548255 rename to code/processes/converting-process/test-resources/html/work-set/url-2115548255 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-2127148436 b/code/processes/converting-process/test-resources/html/work-set/url-2127148436 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-2127148436 rename to code/processes/converting-process/test-resources/html/work-set/url-2127148436 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-2133781904 b/code/processes/converting-process/test-resources/html/work-set/url-2133781904 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-2133781904 rename to code/processes/converting-process/test-resources/html/work-set/url-2133781904 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-225690385 b/code/processes/converting-process/test-resources/html/work-set/url-225690385 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-225690385 rename to code/processes/converting-process/test-resources/html/work-set/url-225690385 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-226401955 b/code/processes/converting-process/test-resources/html/work-set/url-226401955 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-226401955 rename to code/processes/converting-process/test-resources/html/work-set/url-226401955 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-262970770 b/code/processes/converting-process/test-resources/html/work-set/url-262970770 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-262970770 rename to code/processes/converting-process/test-resources/html/work-set/url-262970770 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-30106798 b/code/processes/converting-process/test-resources/html/work-set/url-30106798 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-30106798 rename to code/processes/converting-process/test-resources/html/work-set/url-30106798 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-302167335 b/code/processes/converting-process/test-resources/html/work-set/url-302167335 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-302167335 rename to code/processes/converting-process/test-resources/html/work-set/url-302167335 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-327999153 b/code/processes/converting-process/test-resources/html/work-set/url-327999153 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-327999153 rename to code/processes/converting-process/test-resources/html/work-set/url-327999153 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-332568225 b/code/processes/converting-process/test-resources/html/work-set/url-332568225 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-332568225 rename to code/processes/converting-process/test-resources/html/work-set/url-332568225 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-343223418 b/code/processes/converting-process/test-resources/html/work-set/url-343223418 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-343223418 rename to code/processes/converting-process/test-resources/html/work-set/url-343223418 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-383103932 b/code/processes/converting-process/test-resources/html/work-set/url-383103932 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-383103932 rename to code/processes/converting-process/test-resources/html/work-set/url-383103932 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-412929678 b/code/processes/converting-process/test-resources/html/work-set/url-412929678 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-412929678 rename to code/processes/converting-process/test-resources/html/work-set/url-412929678 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-475213997 b/code/processes/converting-process/test-resources/html/work-set/url-475213997 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-475213997 rename to code/processes/converting-process/test-resources/html/work-set/url-475213997 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-483403121 b/code/processes/converting-process/test-resources/html/work-set/url-483403121 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-483403121 rename to code/processes/converting-process/test-resources/html/work-set/url-483403121 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-488667993 b/code/processes/converting-process/test-resources/html/work-set/url-488667993 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-488667993 rename to code/processes/converting-process/test-resources/html/work-set/url-488667993 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-50815201 b/code/processes/converting-process/test-resources/html/work-set/url-50815201 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-50815201 rename to code/processes/converting-process/test-resources/html/work-set/url-50815201 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-522685905 b/code/processes/converting-process/test-resources/html/work-set/url-522685905 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-522685905 rename to code/processes/converting-process/test-resources/html/work-set/url-522685905 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-570714305 b/code/processes/converting-process/test-resources/html/work-set/url-570714305 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-570714305 rename to code/processes/converting-process/test-resources/html/work-set/url-570714305 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-58733529 b/code/processes/converting-process/test-resources/html/work-set/url-58733529 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-58733529 rename to code/processes/converting-process/test-resources/html/work-set/url-58733529 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-616518304 b/code/processes/converting-process/test-resources/html/work-set/url-616518304 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-616518304 rename to code/processes/converting-process/test-resources/html/work-set/url-616518304 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-662169426 b/code/processes/converting-process/test-resources/html/work-set/url-662169426 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-662169426 rename to code/processes/converting-process/test-resources/html/work-set/url-662169426 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-677278788 b/code/processes/converting-process/test-resources/html/work-set/url-677278788 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-677278788 rename to code/processes/converting-process/test-resources/html/work-set/url-677278788 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-690486170 b/code/processes/converting-process/test-resources/html/work-set/url-690486170 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-690486170 rename to code/processes/converting-process/test-resources/html/work-set/url-690486170 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-709693331 b/code/processes/converting-process/test-resources/html/work-set/url-709693331 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-709693331 rename to code/processes/converting-process/test-resources/html/work-set/url-709693331 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-734531556 b/code/processes/converting-process/test-resources/html/work-set/url-734531556 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-734531556 rename to code/processes/converting-process/test-resources/html/work-set/url-734531556 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-767530276 b/code/processes/converting-process/test-resources/html/work-set/url-767530276 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-767530276 rename to code/processes/converting-process/test-resources/html/work-set/url-767530276 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-783154014 b/code/processes/converting-process/test-resources/html/work-set/url-783154014 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-783154014 rename to code/processes/converting-process/test-resources/html/work-set/url-783154014 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-796905237 b/code/processes/converting-process/test-resources/html/work-set/url-796905237 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-796905237 rename to code/processes/converting-process/test-resources/html/work-set/url-796905237 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-800099955 b/code/processes/converting-process/test-resources/html/work-set/url-800099955 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-800099955 rename to code/processes/converting-process/test-resources/html/work-set/url-800099955 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-804101946 b/code/processes/converting-process/test-resources/html/work-set/url-804101946 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-804101946 rename to code/processes/converting-process/test-resources/html/work-set/url-804101946 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-830664902 b/code/processes/converting-process/test-resources/html/work-set/url-830664902 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-830664902 rename to code/processes/converting-process/test-resources/html/work-set/url-830664902 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-876060686 b/code/processes/converting-process/test-resources/html/work-set/url-876060686 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-876060686 rename to code/processes/converting-process/test-resources/html/work-set/url-876060686 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-892584998 b/code/processes/converting-process/test-resources/html/work-set/url-892584998 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-892584998 rename to code/processes/converting-process/test-resources/html/work-set/url-892584998 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-942458463 b/code/processes/converting-process/test-resources/html/work-set/url-942458463 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-942458463 rename to code/processes/converting-process/test-resources/html/work-set/url-942458463 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-952036171 b/code/processes/converting-process/test-resources/html/work-set/url-952036171 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-952036171 rename to code/processes/converting-process/test-resources/html/work-set/url-952036171 diff --git a/code/features-convert/summary-extraction/test-resources/html/work-set/url-968207276 b/code/processes/converting-process/test-resources/html/work-set/url-968207276 similarity index 100% rename from code/features-convert/summary-extraction/test-resources/html/work-set/url-968207276 rename to code/processes/converting-process/test-resources/html/work-set/url-968207276 diff --git a/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecializationTest.java b/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecializationTest.java index 355921ea..a9b60211 100644 --- a/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecializationTest.java +++ b/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/JavadocSpecializationTest.java @@ -1,7 +1,7 @@ package nu.marginalia.converting.processor.plugin.specialization; import nu.marginalia.converting.processor.logic.DocumentGeneratorExtractor; -import nu.marginalia.summary.SummaryExtractor; +import nu.marginalia.converting.processor.summary.SummaryExtractor; import nu.marginalia.test.CommonTestData; import org.jsoup.Jsoup; import org.junit.jupiter.api.BeforeAll; diff --git a/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/LemmySpecializationTest.java b/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/LemmySpecializationTest.java index 7aab1759..6d72bb51 100644 --- a/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/LemmySpecializationTest.java +++ b/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/LemmySpecializationTest.java @@ -1,7 +1,7 @@ package nu.marginalia.converting.processor.plugin.specialization; import nu.marginalia.converting.processor.logic.DocumentGeneratorExtractor; -import nu.marginalia.summary.SummaryExtractor; +import nu.marginalia.converting.processor.summary.SummaryExtractor; import nu.marginalia.test.CommonTestData; import org.jsoup.Jsoup; import org.junit.jupiter.api.BeforeAll; diff --git a/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/WikiSpecializationTest.java b/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/WikiSpecializationTest.java index 1fc23148..63d43296 100644 --- a/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/WikiSpecializationTest.java +++ b/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/WikiSpecializationTest.java @@ -1,6 +1,6 @@ package nu.marginalia.converting.processor.plugin.specialization; -import nu.marginalia.summary.SummaryExtractor; +import nu.marginalia.converting.processor.summary.SummaryExtractor; import nu.marginalia.test.CommonTestData; import org.jsoup.Jsoup; import org.junit.jupiter.api.BeforeAll; diff --git a/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecializationTest.java b/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecializationTest.java index 40914ba8..581dea3c 100644 --- a/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecializationTest.java +++ b/code/processes/converting-process/test/nu/marginalia/converting/processor/plugin/specialization/XenForoSpecializationTest.java @@ -1,7 +1,7 @@ package nu.marginalia.converting.processor.plugin.specialization; import nu.marginalia.converting.processor.logic.DocumentGeneratorExtractor; -import nu.marginalia.summary.SummaryExtractor; +import nu.marginalia.converting.processor.summary.SummaryExtractor; import nu.marginalia.test.CommonTestData; import org.jsoup.Jsoup; import org.junit.jupiter.api.BeforeAll; diff --git a/code/features-convert/pubdate/test/nu/marginalia/pubdate/PubDateSnifferTest.java b/code/processes/converting-process/test/nu/marginalia/converting/processor/pubdate/PubDateSnifferTest.java similarity index 98% rename from code/features-convert/pubdate/test/nu/marginalia/pubdate/PubDateSnifferTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/processor/pubdate/PubDateSnifferTest.java index efd320e8..c0ef172c 100644 --- a/code/features-convert/pubdate/test/nu/marginalia/pubdate/PubDateSnifferTest.java +++ b/code/processes/converting-process/test/nu/marginalia/converting/processor/pubdate/PubDateSnifferTest.java @@ -1,9 +1,11 @@ -package nu.marginalia.pubdate; +package nu.marginalia.converting.processor.pubdate; import nu.marginalia.WmsaHome; +import nu.marginalia.converting.processor.pubdate.PubDateParser; +import nu.marginalia.converting.processor.pubdate.PubDateSniffer; +import nu.marginalia.converting.processor.pubdate.heuristic.PubDateHeuristicDOMParsingPass2; import nu.marginalia.model.EdgeUrl; import nu.marginalia.model.html.HtmlStandard; -import nu.marginalia.pubdate.heuristic.PubDateHeuristicDOMParsingPass2; import org.jsoup.Jsoup; import org.junit.jupiter.api.Test; diff --git a/code/features-convert/pubdate/test/nu/marginalia/pubdate/PubDateTest.java b/code/processes/converting-process/test/nu/marginalia/converting/processor/pubdate/PubDateTest.java similarity index 88% rename from code/features-convert/pubdate/test/nu/marginalia/pubdate/PubDateTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/processor/pubdate/PubDateTest.java index 64bd1f73..a9eb5cb3 100644 --- a/code/features-convert/pubdate/test/nu/marginalia/pubdate/PubDateTest.java +++ b/code/processes/converting-process/test/nu/marginalia/converting/processor/pubdate/PubDateTest.java @@ -1,4 +1,4 @@ -package nu.marginalia.pubdate; +package nu.marginalia.converting.processor.pubdate; import nu.marginalia.model.crawl.PubDate; import org.junit.jupiter.api.Test; diff --git a/code/features-convert/summary-extraction/test/nu/marginalia/summary/SummaryExtractorTest.java b/code/processes/converting-process/test/nu/marginalia/converting/processor/summary/SummaryExtractorTest.java similarity index 96% rename from code/features-convert/summary-extraction/test/nu/marginalia/summary/SummaryExtractorTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/processor/summary/SummaryExtractorTest.java index f11eb304..0cc18d0d 100644 --- a/code/features-convert/summary-extraction/test/nu/marginalia/summary/SummaryExtractorTest.java +++ b/code/processes/converting-process/test/nu/marginalia/converting/processor/summary/SummaryExtractorTest.java @@ -1,13 +1,12 @@ -package nu.marginalia.summary; +package nu.marginalia.converting.processor.summary; import lombok.SneakyThrows; import nu.marginalia.WmsaHome; +import nu.marginalia.converting.processor.summary.SummaryExtractor; +import nu.marginalia.converting.processor.summary.heuristic.*; import nu.marginalia.keyword.DocumentKeywordExtractor; import nu.marginalia.language.sentence.SentenceExtractor; import nu.marginalia.model.EdgeUrl; -import nu.marginalia.segmentation.NgramLexicon; -import nu.marginalia.summary.heuristic.*; -import nu.marginalia.term_frequency_dict.TermFrequencyDict; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.junit.jupiter.api.Assertions; diff --git a/code/features-convert/summary-extraction/test/nu/marginalia/summary/heuristic/HeuristicTextUtilTest.java b/code/processes/converting-process/test/nu/marginalia/converting/processor/summary/heuristic/HeuristicTextUtilTest.java similarity index 93% rename from code/features-convert/summary-extraction/test/nu/marginalia/summary/heuristic/HeuristicTextUtilTest.java rename to code/processes/converting-process/test/nu/marginalia/converting/processor/summary/heuristic/HeuristicTextUtilTest.java index 9ea11fac..38da765e 100644 --- a/code/features-convert/summary-extraction/test/nu/marginalia/summary/heuristic/HeuristicTextUtilTest.java +++ b/code/processes/converting-process/test/nu/marginalia/converting/processor/summary/heuristic/HeuristicTextUtilTest.java @@ -1,5 +1,6 @@ -package nu.marginalia.summary.heuristic; +package nu.marginalia.converting.processor.summary.heuristic; +import nu.marginalia.converting.processor.summary.heuristic.HeuristicTextUtil; import org.junit.jupiter.api.Test; import java.util.Set; diff --git a/code/features-convert/reddit-json/test/nu/marginalia/integration/reddit/RedditEntryReaderTest.java b/code/processes/converting-process/test/nu/marginalia/integration/reddit/RedditEntryReaderTest.java similarity index 100% rename from code/features-convert/reddit-json/test/nu/marginalia/integration/reddit/RedditEntryReaderTest.java rename to code/processes/converting-process/test/nu/marginalia/integration/reddit/RedditEntryReaderTest.java diff --git a/code/features-convert/reddit-json/test/nu/marginalia/integration/reddit/db/RedditDbTest.java b/code/processes/converting-process/test/nu/marginalia/integration/reddit/db/RedditDbTest.java similarity index 100% rename from code/features-convert/reddit-json/test/nu/marginalia/integration/reddit/db/RedditDbTest.java rename to code/processes/converting-process/test/nu/marginalia/integration/reddit/db/RedditDbTest.java diff --git a/code/features-convert/stackexchange-xml/test/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReaderTest.java b/code/processes/converting-process/test/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReaderTest.java similarity index 100% rename from code/features-convert/stackexchange-xml/test/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReaderTest.java rename to code/processes/converting-process/test/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlCommentReaderTest.java diff --git a/code/features-convert/stackexchange-xml/test/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReaderTest.java b/code/processes/converting-process/test/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReaderTest.java similarity index 100% rename from code/features-convert/stackexchange-xml/test/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReaderTest.java rename to code/processes/converting-process/test/nu/marginalia/integration/stackexchange/xml/StackExchangeXmlPostReaderTest.java diff --git a/code/features-convert/stackexchange-xml/test/nu/marginalia/integration/stackexchange/xml/StringXmlTestEventReader.java b/code/processes/converting-process/test/nu/marginalia/integration/stackexchange/xml/StringXmlTestEventReader.java similarity index 100% rename from code/features-convert/stackexchange-xml/test/nu/marginalia/integration/stackexchange/xml/StringXmlTestEventReader.java rename to code/processes/converting-process/test/nu/marginalia/integration/stackexchange/xml/StringXmlTestEventReader.java diff --git a/code/processes/crawling-process/build.gradle b/code/processes/crawling-process/build.gradle index 4fdea7d8..2d34904f 100644 --- a/code/processes/crawling-process/build.gradle +++ b/code/processes/crawling-process/build.gradle @@ -37,10 +37,10 @@ dependencies { implementation project(':code:processes:crawling-process:model') - implementation project(':code:features-convert:anchor-keywords') - implementation project(':code:features-crawl:crawl-blocklist') - implementation project(':code:features-crawl:link-parser') - implementation project(':code:features-crawl:content-type') + implementation project(':code:processes:converting-process:ft-anchor-keywords') + implementation project(':code:processes:crawling-process:ft-crawl-blocklist') + implementation project(':code:processes:crawling-process:ft-link-parser') + implementation project(':code:processes:crawling-process:ft-content-type') implementation project(':third-party:commons-codec') implementation libs.bundles.slf4j diff --git a/code/features-crawl/content-type/build.gradle b/code/processes/crawling-process/ft-content-type/build.gradle similarity index 100% rename from code/features-crawl/content-type/build.gradle rename to code/processes/crawling-process/ft-content-type/build.gradle diff --git a/code/features-crawl/content-type/java/nu/marginalia/contenttype/ContentType.java b/code/processes/crawling-process/ft-content-type/java/nu/marginalia/contenttype/ContentType.java similarity index 100% rename from code/features-crawl/content-type/java/nu/marginalia/contenttype/ContentType.java rename to code/processes/crawling-process/ft-content-type/java/nu/marginalia/contenttype/ContentType.java diff --git a/code/features-crawl/content-type/java/nu/marginalia/contenttype/ContentTypeParser.java b/code/processes/crawling-process/ft-content-type/java/nu/marginalia/contenttype/ContentTypeParser.java similarity index 100% rename from code/features-crawl/content-type/java/nu/marginalia/contenttype/ContentTypeParser.java rename to code/processes/crawling-process/ft-content-type/java/nu/marginalia/contenttype/ContentTypeParser.java diff --git a/code/features-crawl/content-type/java/nu/marginalia/contenttype/DocumentBodyToString.java b/code/processes/crawling-process/ft-content-type/java/nu/marginalia/contenttype/DocumentBodyToString.java similarity index 100% rename from code/features-crawl/content-type/java/nu/marginalia/contenttype/DocumentBodyToString.java rename to code/processes/crawling-process/ft-content-type/java/nu/marginalia/contenttype/DocumentBodyToString.java diff --git a/code/features-crawl/content-type/test/nu/marginalia/contenttype/ContentTypeParserTest.java b/code/processes/crawling-process/ft-content-type/test/nu/marginalia/contenttype/ContentTypeParserTest.java similarity index 100% rename from code/features-crawl/content-type/test/nu/marginalia/contenttype/ContentTypeParserTest.java rename to code/processes/crawling-process/ft-content-type/test/nu/marginalia/contenttype/ContentTypeParserTest.java diff --git a/code/features-crawl/content-type/test/nu/marginalia/contenttype/DocumentBodyToStringTest.java b/code/processes/crawling-process/ft-content-type/test/nu/marginalia/contenttype/DocumentBodyToStringTest.java similarity index 100% rename from code/features-crawl/content-type/test/nu/marginalia/contenttype/DocumentBodyToStringTest.java rename to code/processes/crawling-process/ft-content-type/test/nu/marginalia/contenttype/DocumentBodyToStringTest.java diff --git a/code/features-crawl/crawl-blocklist/build.gradle b/code/processes/crawling-process/ft-crawl-blocklist/build.gradle similarity index 100% rename from code/features-crawl/crawl-blocklist/build.gradle rename to code/processes/crawling-process/ft-crawl-blocklist/build.gradle diff --git a/code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java b/code/processes/crawling-process/ft-crawl-blocklist/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java similarity index 100% rename from code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java rename to code/processes/crawling-process/ft-crawl-blocklist/java/nu/marginalia/ip_blocklist/GeoIpBlocklist.java diff --git a/code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/InetAddressCache.java b/code/processes/crawling-process/ft-crawl-blocklist/java/nu/marginalia/ip_blocklist/InetAddressCache.java similarity index 100% rename from code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/InetAddressCache.java rename to code/processes/crawling-process/ft-crawl-blocklist/java/nu/marginalia/ip_blocklist/InetAddressCache.java diff --git a/code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/IpBlockList.java b/code/processes/crawling-process/ft-crawl-blocklist/java/nu/marginalia/ip_blocklist/IpBlockList.java similarity index 100% rename from code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/IpBlockList.java rename to code/processes/crawling-process/ft-crawl-blocklist/java/nu/marginalia/ip_blocklist/IpBlockList.java diff --git a/code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/UrlBlocklist.java b/code/processes/crawling-process/ft-crawl-blocklist/java/nu/marginalia/ip_blocklist/UrlBlocklist.java similarity index 100% rename from code/features-crawl/crawl-blocklist/java/nu/marginalia/ip_blocklist/UrlBlocklist.java rename to code/processes/crawling-process/ft-crawl-blocklist/java/nu/marginalia/ip_blocklist/UrlBlocklist.java diff --git a/code/features-crawl/crawl-blocklist/readme.md b/code/processes/crawling-process/ft-crawl-blocklist/readme.md similarity index 100% rename from code/features-crawl/crawl-blocklist/readme.md rename to code/processes/crawling-process/ft-crawl-blocklist/readme.md diff --git a/code/features-crawl/crawl-blocklist/test/nu/marginalia/ip_blocklist/UrlBlocklistTest.java b/code/processes/crawling-process/ft-crawl-blocklist/test/nu/marginalia/ip_blocklist/UrlBlocklistTest.java similarity index 100% rename from code/features-crawl/crawl-blocklist/test/nu/marginalia/ip_blocklist/UrlBlocklistTest.java rename to code/processes/crawling-process/ft-crawl-blocklist/test/nu/marginalia/ip_blocklist/UrlBlocklistTest.java diff --git a/code/features-crawl/link-parser/build.gradle b/code/processes/crawling-process/ft-link-parser/build.gradle similarity index 100% rename from code/features-crawl/link-parser/build.gradle rename to code/processes/crawling-process/ft-link-parser/build.gradle diff --git a/code/features-crawl/link-parser/java/nu/marginalia/link_parser/FeedExtractor.java b/code/processes/crawling-process/ft-link-parser/java/nu/marginalia/link_parser/FeedExtractor.java similarity index 100% rename from code/features-crawl/link-parser/java/nu/marginalia/link_parser/FeedExtractor.java rename to code/processes/crawling-process/ft-link-parser/java/nu/marginalia/link_parser/FeedExtractor.java diff --git a/code/features-crawl/link-parser/java/nu/marginalia/link_parser/LinkParser.java b/code/processes/crawling-process/ft-link-parser/java/nu/marginalia/link_parser/LinkParser.java similarity index 100% rename from code/features-crawl/link-parser/java/nu/marginalia/link_parser/LinkParser.java rename to code/processes/crawling-process/ft-link-parser/java/nu/marginalia/link_parser/LinkParser.java diff --git a/code/features-crawl/link-parser/readme.md b/code/processes/crawling-process/ft-link-parser/readme.md similarity index 100% rename from code/features-crawl/link-parser/readme.md rename to code/processes/crawling-process/ft-link-parser/readme.md diff --git a/code/processes/crawling-process/model/build.gradle b/code/processes/crawling-process/model/build.gradle index 5e4879d1..50103c41 100644 --- a/code/processes/crawling-process/model/build.gradle +++ b/code/processes/crawling-process/model/build.gradle @@ -22,7 +22,7 @@ dependencies { implementation project(':code:common:config') implementation project(':code:common:process') implementation project(':code:index:api') - implementation project(':code:features-crawl:content-type') + implementation project(':code:processes:crawling-process:ft-content-type') implementation project(':code:libraries:language-processing') implementation project(':third-party:parquet-floor') implementation project(':third-party:commons-codec') diff --git a/code/processes/loading-process/build.gradle b/code/processes/loading-process/build.gradle index 57bf8eaf..341db8ab 100644 --- a/code/processes/loading-process/build.gradle +++ b/code/processes/loading-process/build.gradle @@ -39,7 +39,7 @@ dependencies { implementation project(':code:processes:crawling-process:model') implementation project(':code:processes:converting-process:model') - implementation project(':code:features-convert:keyword-extraction') + implementation project(':code:processes:converting-process:ft-keyword-extraction') implementation project(':code:functions:link-graph:partition') diff --git a/code/services-core/executor-service/build.gradle b/code/services-core/executor-service/build.gradle index 74696bf3..b53aa404 100644 --- a/code/services-core/executor-service/build.gradle +++ b/code/services-core/executor-service/build.gradle @@ -47,10 +47,8 @@ dependencies { implementation project(':code:processes:crawling-process:model') implementation project(':code:processes:crawling-process:model') - implementation project(':code:features-crawl:link-parser') - implementation project(':code:features-convert:data-extractors') - implementation project(':code:features-convert:stackexchange-xml') - implementation project(':code:features-convert:reddit-json') + implementation project(':code:processes:crawling-process:ft-link-parser') + implementation project(':code:execution:data-extractors') implementation project(':code:index:index-journal') implementation project(':code:index:api') implementation project(':code:processes:process-mq-api') diff --git a/code/tools/experiment-runner/build.gradle b/code/tools/experiment-runner/build.gradle index 2aea9f76..d011a973 100644 --- a/code/tools/experiment-runner/build.gradle +++ b/code/tools/experiment-runner/build.gradle @@ -35,11 +35,9 @@ dependencies { implementation project(':code:processes:crawling-process:model') implementation project(':third-party:commons-codec') - implementation project(':code:features-crawl:link-parser') - implementation project(':code:features-convert:adblock') - implementation project(':code:features-convert:anchor-keywords') - implementation project(':code:features-convert:topic-detection') - implementation project(':code:features-convert:keyword-extraction') + implementation project(':code:processes:crawling-process:ft-link-parser') + implementation project(':code:processes:converting-process:ft-anchor-keywords') + implementation project(':code:processes:converting-process:ft-keyword-extraction') implementation libs.bundles.slf4j implementation libs.notnull diff --git a/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/AdblockExperiment.java b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/AdblockExperiment.java index 60cb6938..dc46f3bd 100644 --- a/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/AdblockExperiment.java +++ b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/AdblockExperiment.java @@ -1,8 +1,8 @@ package nu.marginalia.tools.experiments; import com.google.inject.Inject; -import nu.marginalia.adblock.AdblockSimulator; import nu.marginalia.converting.processor.DocumentProcessor; +import nu.marginalia.converting.processor.classifier.adblock.AdblockSimulator; import nu.marginalia.model.crawldata.CrawledDocument; import nu.marginalia.model.crawldata.CrawledDomain; import nu.marginalia.tools.LegacyExperiment; diff --git a/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/TopicExperiment.java b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/TopicExperiment.java index ad2be0bb..5ea9551d 100644 --- a/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/TopicExperiment.java +++ b/code/tools/experiment-runner/java/nu/marginalia/tools/experiments/TopicExperiment.java @@ -2,14 +2,14 @@ package nu.marginalia.tools.experiments; import com.google.inject.Inject; import nu.marginalia.WmsaHome; -import nu.marginalia.adblock.GoogleAnwersSpamDetector; +import nu.marginalia.converting.processor.classifier.adblock.GoogleAnwersSpamDetector; +import nu.marginalia.converting.processor.classifier.topic.RecipeDetector; +import nu.marginalia.converting.processor.classifier.topic.TextileCraftDetector; +import nu.marginalia.converting.processor.classifier.topic.WoodworkingDetector; import nu.marginalia.converting.processor.logic.dom.DomPruningFilter; import nu.marginalia.language.sentence.SentenceExtractor; import nu.marginalia.model.crawldata.CrawledDomain; import nu.marginalia.tools.LegacyExperiment; -import nu.marginalia.topic.RecipeDetector; -import nu.marginalia.topic.TextileCraftDetector; -import nu.marginalia.topic.WoodworkingDetector; import org.jsoup.Jsoup; public class TopicExperiment extends LegacyExperiment { diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 48c0a02c..0d184210 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,5 +1,5 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.7-bin.zip +distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/settings.gradle b/settings.gradle index 78ec0028..03d4273d 100644 --- a/settings.gradle +++ b/settings.gradle @@ -56,19 +56,12 @@ include 'code:features-search:screenshots' include 'code:features-search:random-websites' include 'code:features-search:feedlot-client' -include 'code:features-convert:adblock' -include 'code:features-convert:anchor-keywords' -include 'code:features-convert:data-extractors' -include 'code:features-convert:stackexchange-xml' -include 'code:features-convert:reddit-json' -include 'code:features-convert:pubdate' -include 'code:features-convert:summary-extraction' -include 'code:features-convert:keyword-extraction' -include 'code:features-convert:topic-detection' +include 'code:processes:converting-process:ft-anchor-keywords' +include 'code:execution:data-extractors' -include 'code:features-crawl:crawl-blocklist' -include 'code:features-crawl:link-parser' -include 'code:features-crawl:content-type' +include 'code:processes:crawling-process:ft-crawl-blocklist' +include 'code:processes:crawling-process:ft-link-parser' +include 'code:processes:crawling-process:ft-content-type' include 'code:processes:process-mq-api' @@ -82,6 +75,7 @@ include 'code:common:process' include 'code:processes:converting-process' include 'code:processes:converting-process:model' +include 'code:processes:converting-process:ft-keyword-extraction' include 'code:processes:crawling-process' include 'code:processes:crawling-process:model'