mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
Merge branch 'master' into master-control-program
This commit is contained in:
commit
52e2ab45bf
@ -3,7 +3,6 @@ plugins {
|
|||||||
id "io.freefair.lombok" version "5.3.3.3"
|
id "io.freefair.lombok" version "5.3.3.3"
|
||||||
id 'jvm-test-suite'
|
id 'jvm-test-suite'
|
||||||
id "org.flywaydb.flyway" version "8.2.0"
|
id "org.flywaydb.flyway" version "8.2.0"
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
java {
|
java {
|
||||||
@ -47,7 +46,6 @@ dependencies {
|
|||||||
}
|
}
|
||||||
|
|
||||||
flyway {
|
flyway {
|
||||||
|
|
||||||
url = 'jdbc:mariadb://localhost:3306/WMSA_prod'
|
url = 'jdbc:mariadb://localhost:3306/WMSA_prod'
|
||||||
user = 'wmsa'
|
user = 'wmsa'
|
||||||
password = 'wmsa'
|
password = 'wmsa'
|
||||||
@ -56,6 +54,7 @@ flyway {
|
|||||||
locations = ['filesystem:src/main/resources/db/migration']
|
locations = ['filesystem:src/main/resources/db/migration']
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
test {
|
test {
|
||||||
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
|
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
|
||||||
maxHeapSize = "8G"
|
maxHeapSize = "8G"
|
||||||
|
@ -0,0 +1,19 @@
|
|||||||
|
CREATE TABLE IF NOT EXISTS DOMAIN_SELECTION_TYPE (
|
||||||
|
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||||
|
NAME VARCHAR(255) UNIQUE,
|
||||||
|
SOURCE VARCHAR(255) NOT NULL
|
||||||
|
)
|
||||||
|
CHARACTER SET utf8mb4
|
||||||
|
COLLATE utf8mb4_bin;
|
||||||
|
|
||||||
|
CREATE TABLE DOMAIN_SELECTION (
|
||||||
|
DOMAIN_NAME VARCHAR(255) PRIMARY KEY,
|
||||||
|
DOMAIN_TYPE_ID INT,
|
||||||
|
FOREIGN KEY (DOMAIN_TYPE_ID) REFERENCES DOMAIN_SELECTION_TYPE(ID) ON DELETE CASCADE
|
||||||
|
)
|
||||||
|
CHARACTER SET utf8mb4
|
||||||
|
COLLATE utf8mb4_unicode_ci;
|
||||||
|
|
||||||
|
INSERT IGNORE INTO DOMAIN_SELECTION_TYPE(NAME, SOURCE)
|
||||||
|
VALUES ('BLOG', 'https://raw.githubusercontent.com/MarginaliaSearch/submit-site-to-marginalia-search/master/blogs.txt'),
|
||||||
|
('TEST', 'https://downloads.marginalia.nu/domain-list-test.txt');
|
@ -16,6 +16,7 @@ import java.io.IOException;
|
|||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.function.Predicate;
|
import java.util.function.Predicate;
|
||||||
|
import java.util.stream.Stream;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
|
|
||||||
@AllArgsConstructor @NoArgsConstructor @ToString
|
@AllArgsConstructor @NoArgsConstructor @ToString
|
||||||
|
@ -36,6 +36,7 @@ public class ConverterDomainTypes {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (var item : allBlogs) {
|
for (var item : allBlogs) {
|
||||||
|
|
||||||
blogs.add(new EdgeDomain(item));
|
blogs.add(new EdgeDomain(item));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -166,6 +166,15 @@ public class CrawlerMain {
|
|||||||
finally {
|
finally {
|
||||||
heartbeat.shutDown();
|
heartbeat.shutDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pool.execute(() -> {
|
||||||
|
try {
|
||||||
|
fetchDomain(crawlingSpecification);
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
taskSem.release();
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
class CrawlTask implements DumbThreadPool.Task {
|
class CrawlTask implements DumbThreadPool.Task {
|
||||||
|
Loading…
Reference in New Issue
Block a user