mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(setup) Move some file-downloads from setup script to the first boot of the control node of the system
We can only do this for files that are not required for unit tests. As it is illegal to run more than one instance of the control service, this should be fine with regard to race conditions. The boot orchestration will also ensure that no other services will boot up before the downloading is complete.
This commit is contained in:
parent
dbb8bcdd8e
commit
76e9053dd0
@ -75,6 +75,10 @@ public class WmsaHome {
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static Path getDataPath() {
|
||||
return getHomePath().resolve("data");
|
||||
}
|
||||
|
||||
public static Path getAdsDefinition() {
|
||||
return getHomePath().resolve("data").resolve("adblock.txt");
|
||||
}
|
||||
|
@ -3,22 +3,33 @@ package nu.marginalia.control;
|
||||
import com.google.inject.Guice;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Injector;
|
||||
import nu.marginalia.WmsaHome;
|
||||
import nu.marginalia.service.MainClass;
|
||||
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import nu.marginalia.service.module.ServiceDiscoveryModule;
|
||||
import nu.marginalia.service.ServiceId;
|
||||
import nu.marginalia.service.module.ServiceConfigurationModule;
|
||||
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||
import nu.marginalia.service.module.DatabaseModule;
|
||||
import nu.marginalia.service.module.ServiceConfiguration;
|
||||
import nu.marginalia.service.module.ServiceConfigurationModule;
|
||||
import nu.marginalia.service.module.ServiceDiscoveryModule;
|
||||
import nu.marginalia.service.server.Initialization;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
import java.util.zip.ZipFile;
|
||||
|
||||
public class ControlMain extends MainClass {
|
||||
|
||||
@Inject
|
||||
public ControlMain(ControlService service) {
|
||||
}
|
||||
|
||||
public static void main(String... args) {
|
||||
public static void main(String... args) throws Exception {
|
||||
init(ServiceId.Control, args);
|
||||
|
||||
Injector injector = Guice.createInjector(
|
||||
@ -30,9 +41,97 @@ public class ControlMain extends MainClass {
|
||||
// Orchestrate the boot order for the services
|
||||
var registry = injector.getInstance(ServiceRegistryIf.class);
|
||||
var configuration = injector.getInstance(ServiceConfiguration.class);
|
||||
|
||||
// This must be run before orchestrateBoot, so that the other services don't
|
||||
// start up until we're done
|
||||
downloadAncillaryFiles(WmsaHome.getDataPath());
|
||||
|
||||
orchestrateBoot(registry, configuration);
|
||||
|
||||
|
||||
injector.getInstance(ControlMain.class);
|
||||
injector.getInstance(Initialization.class).setReady();
|
||||
}
|
||||
|
||||
static void downloadAncillaryFiles(Path dataPath) throws Exception {
|
||||
Path adblockFile = dataPath.resolve("adblock.txt");
|
||||
if (!Files.exists(adblockFile)) {
|
||||
download(adblockFile, new URI("https://downloads.marginalia.nu/data/adblock.txt"));
|
||||
}
|
||||
|
||||
Path suggestionsFile = dataPath.resolve("suggestions.txt");
|
||||
if (!Files.exists(suggestionsFile)) {
|
||||
downloadGzipped(suggestionsFile, new URI("https://downloads.marginalia.nu/data/suggestions.txt.gz"));
|
||||
}
|
||||
|
||||
Path asnRawData = dataPath.resolve("asn-data-raw-table");
|
||||
if (!Files.exists(asnRawData)) {
|
||||
download(asnRawData, new URI("https://thyme.apnic.net/current/data-raw-table"));
|
||||
}
|
||||
|
||||
Path asnUsedAutnums = dataPath.resolve("asn-used-autnums");
|
||||
if (!Files.exists(asnUsedAutnums)) {
|
||||
download(asnUsedAutnums, new URI("https://thyme.apnic.net/current/data-used-autnums"));
|
||||
}
|
||||
|
||||
Path ip2Location = dataPath.resolve("IP2LOCATION-LITE-DB1.CSV");
|
||||
Path ip2LocationZip = dataPath.resolve("IP2LOCATION-LITE-DB1.CSV.ZIP");
|
||||
|
||||
if (!Files.exists(ip2Location)) {
|
||||
if (Files.exists(ip2LocationZip)) {
|
||||
Files.delete(ip2LocationZip);
|
||||
}
|
||||
|
||||
download(ip2LocationZip, new URI("https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP"));
|
||||
unzip(ip2LocationZip, dataPath, List.of("IP2LOCATION-LITE-DB1.CSV", "README_LITE.TXT", "LICENSE-CC-BY-SA-4.0.TXT"));
|
||||
Files.deleteIfExists(ip2LocationZip);
|
||||
}
|
||||
}
|
||||
|
||||
private static void download(Path dest, URI source) throws IOException {
|
||||
System.out.println("Downloading " + source + " to " + dest);
|
||||
try {
|
||||
if (!Files.exists(dest)) {
|
||||
try (var in = new BufferedInputStream(source.toURL().openStream())) {
|
||||
Files.copy(in, dest);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
Files.deleteIfExists(dest);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
private static void downloadGzipped(Path dest, URI source) throws IOException {
|
||||
System.out.println("Downloading " + source + " to " + dest);
|
||||
try {
|
||||
if (!Files.exists(dest)) {
|
||||
try (var in = new GZIPInputStream(new BufferedInputStream(source.toURL().openStream()))) {
|
||||
Files.copy(in, dest);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
Files.deleteIfExists(dest);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static void unzip(Path inputZip, Path outputDir, Collection<String> fileNames) throws IOException {
|
||||
try (ZipFile zipFile = new ZipFile(inputZip.toFile())) {
|
||||
zipFile.stream().forEach(entry -> {
|
||||
try {
|
||||
if (fileNames.contains(entry.getName())) {
|
||||
System.out.println("Extracting " + entry.getName());
|
||||
Files.copy(zipFile.getInputStream(entry), outputDir.resolve(entry.getName()));
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,33 @@
|
||||
package nu.marginalia.control;
|
||||
|
||||
import nu.marginalia.test.TestUtil;
|
||||
import org.junit.jupiter.api.Assertions;
|
||||
import org.junit.jupiter.api.Disabled;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
class ControlMainTest {
|
||||
|
||||
@Test
|
||||
@Disabled("We don't want to rudely hammer 3rd party services with chonky downloads on every build")
|
||||
void downloadAncillaryFiles() throws Exception {
|
||||
Path tempDir = Files.createTempDirectory("test");
|
||||
|
||||
ControlMain.downloadAncillaryFiles(tempDir);
|
||||
|
||||
Assertions.assertTrue(Files.exists(tempDir.resolve("adblock.txt")));
|
||||
Assertions.assertTrue(Files.exists(tempDir.resolve("suggestions.txt")));
|
||||
Assertions.assertTrue(Files.exists(tempDir.resolve("asn-data-raw-table")));
|
||||
Assertions.assertTrue(Files.exists(tempDir.resolve("asn-used-autnums")));
|
||||
Assertions.assertTrue(Files.exists(tempDir.resolve("LICENSE-CC-BY-SA-4.0.TXT")));
|
||||
Assertions.assertTrue(Files.exists(tempDir.resolve("README_LITE.TXT")));
|
||||
Assertions.assertTrue(Files.exists(tempDir.resolve("IP2LOCATION-LITE-DB1.CSV")));
|
||||
|
||||
// We don't want to leave a mess
|
||||
Assertions.assertFalse(Files.exists(tempDir.resolve("IP2LOCATION-LITE-DB1.CSV.ZIP")));
|
||||
|
||||
TestUtil.clearTempDir(tempDir);
|
||||
}
|
||||
}
|
12
run/setup.sh
12
run/setup.sh
@ -72,16 +72,4 @@ download_model model/segments.bin https://downloads.marginalia.nu/model/segments
|
||||
download_model model/tfreq-new-algo3.bin https://downloads.marginalia.nu/model/tfreq-new-algo3.bin a38f0809f983723001dfc784d88ebb6d
|
||||
download_model model/lid.176.ftz https://downloads.marginalia.nu/model/lid.176.ftz 340156704bb8c8e50c4abf35a7ec2569
|
||||
|
||||
download_model data/IP2LOCATION-LITE-DB1.CSV.ZIP https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP
|
||||
unzip -qn -d data data/IP2LOCATION-LITE-DB1.CSV.ZIP
|
||||
|
||||
download_model data/asn-data-raw-table https://thyme.apnic.net/current/data-raw-table
|
||||
download_model data/asn-used-autnums https://thyme.apnic.net/current/data-used-autnums
|
||||
|
||||
download_model data/adblock.txt https://downloads.marginalia.nu/data/adblock.txt
|
||||
if [ ! -f data/suggestions.txt ]; then
|
||||
download_model data/suggestions.txt.gz https://downloads.marginalia.nu/data/suggestions.txt.gz
|
||||
gunzip data/suggestions.txt.gz
|
||||
fi
|
||||
|
||||
popd
|
||||
|
Loading…
Reference in New Issue
Block a user