mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Move IP Location database out of classpath and into WMSA_HOME/data
This commit is contained in:
parent
056dec5506
commit
61ef2b06b0
@ -170,6 +170,7 @@ task e2eTest(type: Test) {
|
|||||||
dependsOn 'downloadSentenceModelData'
|
dependsOn 'downloadSentenceModelData'
|
||||||
dependsOn 'downloadTokenModelData'
|
dependsOn 'downloadTokenModelData'
|
||||||
dependsOn 'downloadTermFreqData'
|
dependsOn 'downloadTermFreqData'
|
||||||
|
dependsOn 'IP2LocationFile'
|
||||||
|
|
||||||
classpath = sourceSets.e2eTest.runtimeClasspath
|
classpath = sourceSets.e2eTest.runtimeClasspath
|
||||||
testClassesDirs = sourceSets.e2eTest.output.classesDirs
|
testClassesDirs = sourceSets.e2eTest.output.classesDirs
|
||||||
@ -201,10 +202,23 @@ task downloadTokenModelData(type: Download) {
|
|||||||
dest file('data/models/opennlp-tokens.bin')
|
dest file('data/models/opennlp-tokens.bin')
|
||||||
overwrite false
|
overwrite false
|
||||||
}
|
}
|
||||||
|
task downloadIP2LocationFile(type: Download) {
|
||||||
|
src 'https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP'
|
||||||
|
dest file('data/models/IP2LOCATION-LITE-DB1.CSV.ZIP')
|
||||||
|
overwrite false
|
||||||
|
}
|
||||||
|
task IP2LocationFile(type: Copy) {
|
||||||
|
dependsOn 'downloadIP2LocationFile'
|
||||||
|
def zipFile = file('data/models/IP2LOCATION-LITE-DB1.CSV.ZIP')
|
||||||
|
def outputDir = file("data/models/IP2LOC")
|
||||||
|
|
||||||
|
from zipTree(zipFile)
|
||||||
|
into outputDir
|
||||||
|
}
|
||||||
|
|
||||||
task downloadTermFreqData(type: Copy) {
|
task downloadTermFreqData(type: Copy) {
|
||||||
// TODO: Need hosting for this file
|
// TODO: Need hosting for this file
|
||||||
from '/var/lib/wmsa/model/tfreq-new-algo3.bin'
|
from '/var/lib/wmsa/model/tfreq-new-algo3.bin'
|
||||||
into 'data/models'
|
into 'data/models/'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -72,6 +72,7 @@ public class EdgeSearchE2ETest {
|
|||||||
.withNetwork(network)
|
.withNetwork(network)
|
||||||
.withLogConsumer(new Slf4jLogConsumer(LoggerFactory.getLogger("crawler")))
|
.withLogConsumer(new Slf4jLogConsumer(LoggerFactory.getLogger("crawler")))
|
||||||
.withFileSystemBind(modelsPath(), "/var/lib/wmsa/model", BindMode.READ_ONLY)
|
.withFileSystemBind(modelsPath(), "/var/lib/wmsa/model", BindMode.READ_ONLY)
|
||||||
|
.withCopyFileToContainer(ipDatabasePath(), "/var/lib/wmsa/data/IP2LOCATION-LITE-DB1.CSV")
|
||||||
.withCopyFileToContainer(jarFile(), "/WMSA.jar")
|
.withCopyFileToContainer(jarFile(), "/WMSA.jar")
|
||||||
.withCopyFileToContainer(MountableFile.forClasspathResource("crawl.sh"), "/crawl.sh")
|
.withCopyFileToContainer(MountableFile.forClasspathResource("crawl.sh"), "/crawl.sh")
|
||||||
.withFileSystemBind(getCrawlPath().toString(), "/crawl/", BindMode.READ_WRITE)
|
.withFileSystemBind(getCrawlPath().toString(), "/crawl/", BindMode.READ_WRITE)
|
||||||
@ -127,6 +128,14 @@ public class EdgeSearchE2ETest {
|
|||||||
}
|
}
|
||||||
return modelsPath.toString();
|
return modelsPath.toString();
|
||||||
}
|
}
|
||||||
|
public static MountableFile ipDatabasePath() {
|
||||||
|
Path modelsPath = Path.of(System.getProperty("user.dir")).resolve("data/models/IP2LOC/IP2LOCATION-LITE-DB1.CSV");
|
||||||
|
if (!Files.isRegularFile(modelsPath)) {
|
||||||
|
System.err.println("Could not find models, looked in " + modelsPath.toAbsolutePath());
|
||||||
|
throw new RuntimeException();
|
||||||
|
}
|
||||||
|
return MountableFile.forHostPath(modelsPath.toString());
|
||||||
|
}
|
||||||
|
|
||||||
private Path getCrawlPath() {
|
private Path getCrawlPath() {
|
||||||
return Path.of(System.getProperty("user.dir")).resolve("build/tmp/crawl");
|
return Path.of(System.getProperty("user.dir")).resolve("build/tmp/crawl");
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
mkdir -p /var/lib/wmsa/conf/
|
mkdir -p /var/lib/wmsa/conf/
|
||||||
|
mkdir -p /var/lib/wmsa/data/
|
||||||
|
|
||||||
cat > /var/lib/wmsa/db.properties <<EOF
|
cat > /var/lib/wmsa/db.properties <<EOF
|
||||||
db.user=wmsa
|
db.user=wmsa
|
||||||
|
@ -28,4 +28,8 @@ public class WmsaHome {
|
|||||||
return new HostsFile();
|
return new HostsFile();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static Path getIPLocationDatabse() {
|
||||||
|
return getHomePath().resolve("data").resolve("IP2LOCATION-LITE-DB1.CSV");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,10 +6,12 @@ import com.google.inject.Singleton;
|
|||||||
import com.opencsv.CSVReader;
|
import com.opencsv.CSVReader;
|
||||||
import com.opencsv.exceptions.CsvValidationException;
|
import com.opencsv.exceptions.CsvValidationException;
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
|
import nu.marginalia.wmsa.configuration.WmsaHome;
|
||||||
import nu.marginalia.wmsa.edge.model.EdgeDomain;
|
import nu.marginalia.wmsa.edge.model.EdgeDomain;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.FileReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.net.InetAddress;
|
import java.net.InetAddress;
|
||||||
@ -24,8 +26,6 @@ public class GeoIpBlocklist {
|
|||||||
private final Set<String> blacklist = Set.of("CN", "HK");
|
private final Set<String> blacklist = Set.of("CN", "HK");
|
||||||
private final Set<String> graylist = Set.of("RU", "TW", "IN", "ZA", "SG", "UA");
|
private final Set<String> graylist = Set.of("RU", "TW", "IN", "ZA", "SG", "UA");
|
||||||
|
|
||||||
private final Cache<String, String> countryCache = CacheBuilder.newBuilder().maximumSize(100_000).build();
|
|
||||||
|
|
||||||
private static final Logger logger = LoggerFactory.getLogger(GeoIpBlocklist.class);
|
private static final Logger logger = LoggerFactory.getLogger(GeoIpBlocklist.class);
|
||||||
|
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
@ -36,10 +36,9 @@ public class GeoIpBlocklist {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public GeoIpBlocklist() throws IOException, CsvValidationException {
|
public GeoIpBlocklist() throws IOException, CsvValidationException {
|
||||||
var resource = Objects.requireNonNull(ClassLoader.getSystemResourceAsStream("IP2LOCATION-LITE-DB1.CSV"),
|
var resource = WmsaHome.getIPLocationDatabse();
|
||||||
"Could not load IP location db");
|
|
||||||
|
|
||||||
try (var reader = new CSVReader(new InputStreamReader(resource, StandardCharsets.UTF_8))) {
|
try (var reader = new CSVReader(new FileReader(resource.toFile()))) {
|
||||||
for (;;) {
|
for (;;) {
|
||||||
String[] vals = reader.readNext();
|
String[] vals = reader.readNext();
|
||||||
if (vals == null) {
|
if (vals == null) {
|
||||||
|
Loading…
Reference in New Issue
Block a user