diff --git a/code/common/db/build.gradle b/code/common/db/build.gradle index 52f63895..62ede7ac 100644 --- a/code/common/db/build.gradle +++ b/code/common/db/build.gradle @@ -3,7 +3,6 @@ plugins { id "io.freefair.lombok" version "5.3.3.3" id 'jvm-test-suite' id "org.flywaydb.flyway" version "8.2.0" - } java { @@ -47,7 +46,6 @@ dependencies { } flyway { - url = 'jdbc:mariadb://localhost:3306/WMSA_prod' user = 'wmsa' password = 'wmsa' @@ -56,6 +54,7 @@ flyway { locations = ['filesystem:src/main/resources/db/migration'] } + test { maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1 maxHeapSize = "8G" diff --git a/code/common/db/src/main/resources/sql/current/10-domain-type.sql b/code/common/db/src/main/resources/sql/current/10-domain-type.sql new file mode 100644 index 00000000..2011d1f6 --- /dev/null +++ b/code/common/db/src/main/resources/sql/current/10-domain-type.sql @@ -0,0 +1,19 @@ +CREATE TABLE IF NOT EXISTS DOMAIN_SELECTION_TYPE ( + ID INT PRIMARY KEY AUTO_INCREMENT, + NAME VARCHAR(255) UNIQUE, + SOURCE VARCHAR(255) NOT NULL +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_bin; + +CREATE TABLE DOMAIN_SELECTION ( + DOMAIN_NAME VARCHAR(255) PRIMARY KEY, + DOMAIN_TYPE_ID INT, + FOREIGN KEY (DOMAIN_TYPE_ID) REFERENCES DOMAIN_SELECTION_TYPE(ID) ON DELETE CASCADE +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_unicode_ci; + +INSERT IGNORE INTO DOMAIN_SELECTION_TYPE(NAME, SOURCE) +VALUES ('BLOG', 'https://raw.githubusercontent.com/MarginaliaSearch/submit-site-to-marginalia-search/master/blogs.txt'), + ('TEST', 'https://downloads.marginalia.nu/domain-list-test.txt'); \ No newline at end of file diff --git a/code/process-models/crawling-model/src/main/java/plan/CrawlPlan.java b/code/process-models/crawling-model/src/main/java/plan/CrawlPlan.java index 0f6d66ea..71307140 100644 --- a/code/process-models/crawling-model/src/main/java/plan/CrawlPlan.java +++ b/code/process-models/crawling-model/src/main/java/plan/CrawlPlan.java @@ -16,6 +16,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.function.Predicate; +import java.util.stream.Stream; import java.util.Optional; @AllArgsConstructor @NoArgsConstructor @ToString diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/ConverterDomainTypes.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/ConverterDomainTypes.java index a8f3db7a..83f3ad22 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/ConverterDomainTypes.java +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/processor/ConverterDomainTypes.java @@ -36,6 +36,7 @@ public class ConverterDomainTypes { } for (var item : allBlogs) { + blogs.add(new EdgeDomain(item)); } diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerMain.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerMain.java index fd936a7a..3fad75a9 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerMain.java +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerMain.java @@ -166,6 +166,15 @@ public class CrawlerMain { finally { heartbeat.shutDown(); } + + pool.execute(() -> { + try { + fetchDomain(crawlingSpecification); + } + finally { + taskSem.release(); + } + }); } class CrawlTask implements DumbThreadPool.Task {