Merge branch 'master' into master-control-program

This commit is contained in:
Viktor 2023-08-07 12:53:43 +02:00 committed by GitHub
commit 52e2ab45bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 31 additions and 2 deletions

View File

@ -3,7 +3,6 @@ plugins {
id "io.freefair.lombok" version "5.3.3.3"
id 'jvm-test-suite'
id "org.flywaydb.flyway" version "8.2.0"
}
java {
@ -47,7 +46,6 @@ dependencies {
}
flyway {
url = 'jdbc:mariadb://localhost:3306/WMSA_prod'
user = 'wmsa'
password = 'wmsa'
@ -56,6 +54,7 @@ flyway {
locations = ['filesystem:src/main/resources/db/migration']
}
test {
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
maxHeapSize = "8G"

View File

@ -0,0 +1,19 @@
CREATE TABLE IF NOT EXISTS DOMAIN_SELECTION_TYPE (
ID INT PRIMARY KEY AUTO_INCREMENT,
NAME VARCHAR(255) UNIQUE,
SOURCE VARCHAR(255) NOT NULL
)
CHARACTER SET utf8mb4
COLLATE utf8mb4_bin;
CREATE TABLE DOMAIN_SELECTION (
DOMAIN_NAME VARCHAR(255) PRIMARY KEY,
DOMAIN_TYPE_ID INT,
FOREIGN KEY (DOMAIN_TYPE_ID) REFERENCES DOMAIN_SELECTION_TYPE(ID) ON DELETE CASCADE
)
CHARACTER SET utf8mb4
COLLATE utf8mb4_unicode_ci;
INSERT IGNORE INTO DOMAIN_SELECTION_TYPE(NAME, SOURCE)
VALUES ('BLOG', 'https://raw.githubusercontent.com/MarginaliaSearch/submit-site-to-marginalia-search/master/blogs.txt'),
('TEST', 'https://downloads.marginalia.nu/domain-list-test.txt');

View File

@ -16,6 +16,7 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.function.Predicate;
import java.util.stream.Stream;
import java.util.Optional;
@AllArgsConstructor @NoArgsConstructor @ToString

View File

@ -36,6 +36,7 @@ public class ConverterDomainTypes {
}
for (var item : allBlogs) {
blogs.add(new EdgeDomain(item));
}

View File

@ -166,6 +166,15 @@ public class CrawlerMain {
finally {
heartbeat.shutDown();
}
pool.execute(() -> {
try {
fetchDomain(crawlingSpecification);
}
finally {
taskSem.release();
}
});
}
class CrawlTask implements DumbThreadPool.Task {