(setup) Move some file-downloads from setup script to the first boot of the control node of the system

We can only do this for files that are not required for unit tests.

As it is illegal to run more than one instance of the control service, this should be fine with regard to race conditions.  The boot orchestration will also ensure that no other services will boot up before the downloading is complete.
This commit is contained in:
Viktor Lofgren 2024-11-06 15:28:20 +01:00
parent dbb8bcdd8e
commit 76e9053dd0
4 changed files with 141 additions and 17 deletions

View File

@ -75,6 +75,10 @@ public class WmsaHome {
return ret;
}
public static Path getDataPath() {
return getHomePath().resolve("data");
}
public static Path getAdsDefinition() {
return getHomePath().resolve("data").resolve("adblock.txt");
}

View File

@ -3,22 +3,33 @@ package nu.marginalia.control;
import com.google.inject.Guice;
import com.google.inject.Inject;
import com.google.inject.Injector;
import nu.marginalia.WmsaHome;
import nu.marginalia.service.MainClass;
import nu.marginalia.service.discovery.ServiceRegistryIf;
import nu.marginalia.service.module.ServiceConfiguration;
import nu.marginalia.service.module.ServiceDiscoveryModule;
import nu.marginalia.service.ServiceId;
import nu.marginalia.service.module.ServiceConfigurationModule;
import nu.marginalia.service.discovery.ServiceRegistryIf;
import nu.marginalia.service.module.DatabaseModule;
import nu.marginalia.service.module.ServiceConfiguration;
import nu.marginalia.service.module.ServiceConfigurationModule;
import nu.marginalia.service.module.ServiceDiscoveryModule;
import nu.marginalia.service.server.Initialization;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collection;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipFile;
public class ControlMain extends MainClass {
@Inject
public ControlMain(ControlService service) {
}
public static void main(String... args) {
public static void main(String... args) throws Exception {
init(ServiceId.Control, args);
Injector injector = Guice.createInjector(
@ -30,9 +41,97 @@ public class ControlMain extends MainClass {
// Orchestrate the boot order for the services
var registry = injector.getInstance(ServiceRegistryIf.class);
var configuration = injector.getInstance(ServiceConfiguration.class);
// This must be run before orchestrateBoot, so that the other services don't
// start up until we're done
downloadAncillaryFiles(WmsaHome.getDataPath());
orchestrateBoot(registry, configuration);
injector.getInstance(ControlMain.class);
injector.getInstance(Initialization.class).setReady();
}
static void downloadAncillaryFiles(Path dataPath) throws Exception {
Path adblockFile = dataPath.resolve("adblock.txt");
if (!Files.exists(adblockFile)) {
download(adblockFile, new URI("https://downloads.marginalia.nu/data/adblock.txt"));
}
Path suggestionsFile = dataPath.resolve("suggestions.txt");
if (!Files.exists(suggestionsFile)) {
downloadGzipped(suggestionsFile, new URI("https://downloads.marginalia.nu/data/suggestions.txt.gz"));
}
Path asnRawData = dataPath.resolve("asn-data-raw-table");
if (!Files.exists(asnRawData)) {
download(asnRawData, new URI("https://thyme.apnic.net/current/data-raw-table"));
}
Path asnUsedAutnums = dataPath.resolve("asn-used-autnums");
if (!Files.exists(asnUsedAutnums)) {
download(asnUsedAutnums, new URI("https://thyme.apnic.net/current/data-used-autnums"));
}
Path ip2Location = dataPath.resolve("IP2LOCATION-LITE-DB1.CSV");
Path ip2LocationZip = dataPath.resolve("IP2LOCATION-LITE-DB1.CSV.ZIP");
if (!Files.exists(ip2Location)) {
if (Files.exists(ip2LocationZip)) {
Files.delete(ip2LocationZip);
}
download(ip2LocationZip, new URI("https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP"));
unzip(ip2LocationZip, dataPath, List.of("IP2LOCATION-LITE-DB1.CSV", "README_LITE.TXT", "LICENSE-CC-BY-SA-4.0.TXT"));
Files.deleteIfExists(ip2LocationZip);
}
}
private static void download(Path dest, URI source) throws IOException {
System.out.println("Downloading " + source + " to " + dest);
try {
if (!Files.exists(dest)) {
try (var in = new BufferedInputStream(source.toURL().openStream())) {
Files.copy(in, dest);
}
}
}
catch (IOException e) {
Files.deleteIfExists(dest);
throw e;
}
}
private static void downloadGzipped(Path dest, URI source) throws IOException {
System.out.println("Downloading " + source + " to " + dest);
try {
if (!Files.exists(dest)) {
try (var in = new GZIPInputStream(new BufferedInputStream(source.toURL().openStream()))) {
Files.copy(in, dest);
}
}
}
catch (IOException e) {
Files.deleteIfExists(dest);
throw e;
}
}
private static void unzip(Path inputZip, Path outputDir, Collection<String> fileNames) throws IOException {
try (ZipFile zipFile = new ZipFile(inputZip.toFile())) {
zipFile.stream().forEach(entry -> {
try {
if (fileNames.contains(entry.getName())) {
System.out.println("Extracting " + entry.getName());
Files.copy(zipFile.getInputStream(entry), outputDir.resolve(entry.getName()));
}
}
catch (IOException e) {
throw new RuntimeException(e);
}
});
}
}
}

View File

@ -0,0 +1,33 @@
package nu.marginalia.control;
import nu.marginalia.test.TestUtil;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import java.nio.file.Files;
import java.nio.file.Path;
class ControlMainTest {
@Test
@Disabled("We don't want to rudely hammer 3rd party services with chonky downloads on every build")
void downloadAncillaryFiles() throws Exception {
Path tempDir = Files.createTempDirectory("test");
ControlMain.downloadAncillaryFiles(tempDir);
Assertions.assertTrue(Files.exists(tempDir.resolve("adblock.txt")));
Assertions.assertTrue(Files.exists(tempDir.resolve("suggestions.txt")));
Assertions.assertTrue(Files.exists(tempDir.resolve("asn-data-raw-table")));
Assertions.assertTrue(Files.exists(tempDir.resolve("asn-used-autnums")));
Assertions.assertTrue(Files.exists(tempDir.resolve("LICENSE-CC-BY-SA-4.0.TXT")));
Assertions.assertTrue(Files.exists(tempDir.resolve("README_LITE.TXT")));
Assertions.assertTrue(Files.exists(tempDir.resolve("IP2LOCATION-LITE-DB1.CSV")));
// We don't want to leave a mess
Assertions.assertFalse(Files.exists(tempDir.resolve("IP2LOCATION-LITE-DB1.CSV.ZIP")));
TestUtil.clearTempDir(tempDir);
}
}

View File

@ -72,16 +72,4 @@ download_model model/segments.bin https://downloads.marginalia.nu/model/segments
download_model model/tfreq-new-algo3.bin https://downloads.marginalia.nu/model/tfreq-new-algo3.bin a38f0809f983723001dfc784d88ebb6d
download_model model/lid.176.ftz https://downloads.marginalia.nu/model/lid.176.ftz 340156704bb8c8e50c4abf35a7ec2569
download_model data/IP2LOCATION-LITE-DB1.CSV.ZIP https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP
unzip -qn -d data data/IP2LOCATION-LITE-DB1.CSV.ZIP
download_model data/asn-data-raw-table https://thyme.apnic.net/current/data-raw-table
download_model data/asn-used-autnums https://thyme.apnic.net/current/data-used-autnums
download_model data/adblock.txt https://downloads.marginalia.nu/data/adblock.txt
if [ ! -f data/suggestions.txt ]; then
download_model data/suggestions.txt.gz https://downloads.marginalia.nu/data/suggestions.txt.gz
gunzip data/suggestions.txt.gz
fi
popd