mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(*) Add download-sample action, refactor file storage
This changeset adds an action for downloading a set of sample data from downloads.marginalia.nu. It also refactors out some leaky abstractions out of FileStorageService. allocateTemporaryStorage has been renamed allocateStorage. The storage was never temporary in any scenario... It also doesn't take a storage base, as there was always only one valid option for this input. The allocateStorage method finds the appropriate base itself.
This commit is contained in:
parent
1b8b97b8ec
commit
cae1bad274
@ -71,7 +71,6 @@ public class ExecutorClient extends AbstractDynamicClient {
|
|||||||
post(ctx, node,
|
post(ctx, node,
|
||||||
"/sideload/encyclopedia?path="+ URLEncoder.encode(sourcePath.toString(), StandardCharsets.UTF_8) + "&baseUrl=" + URLEncoder.encode(baseUrl, StandardCharsets.UTF_8),
|
"/sideload/encyclopedia?path="+ URLEncoder.encode(sourcePath.toString(), StandardCharsets.UTF_8) + "&baseUrl=" + URLEncoder.encode(baseUrl, StandardCharsets.UTF_8),
|
||||||
"").blockingSubscribe();
|
"").blockingSubscribe();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void sideloadDirtree(Context ctx, int node, Path sourcePath) {
|
public void sideloadDirtree(Context ctx, int node, Path sourcePath) {
|
||||||
@ -111,6 +110,10 @@ public class ExecutorClient extends AbstractDynamicClient {
|
|||||||
post(ctx, node, "/export/termfreq?fid="+fid, "").blockingSubscribe();
|
post(ctx, node, "/export/termfreq?fid="+fid, "").blockingSubscribe();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void downloadSampleData(Context ctx, int node, String sampleSet) {
|
||||||
|
post(ctx, node, "/action/download-sample-data?set="+URLEncoder.encode(sampleSet, StandardCharsets.UTF_8), "").blockingSubscribe();
|
||||||
|
}
|
||||||
|
|
||||||
public void exportData(Context ctx, int node) {
|
public void exportData(Context ctx, int node) {
|
||||||
post(ctx, node, "/export/data", "").blockingSubscribe();
|
post(ctx, node, "/export/data", "").blockingSubscribe();
|
||||||
}
|
}
|
||||||
@ -166,4 +169,5 @@ public class ExecutorClient extends AbstractDynamicClient {
|
|||||||
public void yieldDomain(Context context, int node, TransferItem item) {
|
public void yieldDomain(Context context, int node, TransferItem item) {
|
||||||
post(context, node, "/transfer/yield", item).blockingSubscribe();
|
post(context, node, "/transfer/yield", item).blockingSubscribe();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -223,14 +223,12 @@ public class FileStorageService {
|
|||||||
return maybePath;
|
return maybePath;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Allocate a temporary storage of the given type */
|
/** Allocate a storage area of the given type */
|
||||||
public FileStorage allocateTemporaryStorage(FileStorageBase base,
|
public FileStorage allocateStorage(FileStorageType type,
|
||||||
FileStorageType type,
|
String prefix,
|
||||||
String prefix,
|
String description) throws IOException, SQLException
|
||||||
String description) throws IOException, SQLException
|
|
||||||
{
|
{
|
||||||
if (!base.type().permitsStorageType(type))
|
var base = getStorageBase(FileStorageBaseType.forFileStorageType(type));
|
||||||
throw new RuntimeException("Attempting to allocate storage of type " + type + " in base of type " + base.type());
|
|
||||||
|
|
||||||
Path newDir = allocateDirectory(base.asPath(), prefix);
|
Path newDir = allocateDirectory(base.asPath(), prefix);
|
||||||
|
|
||||||
|
@ -1,18 +1,17 @@
|
|||||||
package nu.marginalia.storage.model;
|
package nu.marginalia.storage.model;
|
||||||
|
|
||||||
import java.util.EnumSet;
|
|
||||||
|
|
||||||
public enum FileStorageBaseType {
|
public enum FileStorageBaseType {
|
||||||
CURRENT,
|
CURRENT,
|
||||||
WORK,
|
WORK,
|
||||||
STORAGE,
|
STORAGE,
|
||||||
BACKUP;
|
BACKUP;
|
||||||
|
|
||||||
public boolean permitsStorageType(FileStorageType type) {
|
|
||||||
return switch (this) {
|
public static FileStorageBaseType forFileStorageType(FileStorageType type) {
|
||||||
case BACKUP -> FileStorageType.BACKUP.equals(type);
|
return switch (type) {
|
||||||
case STORAGE -> EnumSet.of(FileStorageType.EXPORT, FileStorageType.CRAWL_DATA, FileStorageType.PROCESSED_DATA, FileStorageType.CRAWL_SPEC).contains(type);
|
case EXPORT, CRAWL_DATA, PROCESSED_DATA, CRAWL_SPEC -> STORAGE;
|
||||||
default -> false;
|
case BACKUP -> BACKUP;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -13,18 +13,14 @@ import org.testcontainers.containers.MariaDBContainer;
|
|||||||
import org.testcontainers.junit.jupiter.Container;
|
import org.testcontainers.junit.jupiter.Container;
|
||||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||||
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
|
|
||||||
import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD;
|
|
||||||
|
|
||||||
@Testcontainers
|
@Testcontainers
|
||||||
@Execution(ExecutionMode.SAME_THREAD)
|
@Execution(ExecutionMode.SAME_THREAD)
|
||||||
@Tag("slow")
|
@Tag("slow")
|
||||||
@ -124,8 +120,7 @@ public class FileStorageServiceTest {
|
|||||||
|
|
||||||
var storage = new FileStorageService(dataSource, 0);
|
var storage = new FileStorageService(dataSource, 0);
|
||||||
|
|
||||||
var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.STORAGE);
|
var fileStorage = storage.allocateStorage(FileStorageType.CRAWL_DATA, "xyz", "thisShouldSucceed");
|
||||||
var fileStorage = storage.allocateTemporaryStorage(base, FileStorageType.CRAWL_DATA, "xyz", "thisShouldSucceed");
|
|
||||||
System.out.println("Allocated " + fileStorage.asPath());
|
System.out.println("Allocated " + fileStorage.asPath());
|
||||||
Assertions.assertTrue(Files.exists(fileStorage.asPath()));
|
Assertions.assertTrue(Files.exists(fileStorage.asPath()));
|
||||||
tempDirs.add(fileStorage.asPath());
|
tempDirs.add(fileStorage.asPath());
|
||||||
|
@ -13,6 +13,8 @@ import nu.marginalia.storage.FileStorageService;
|
|||||||
import nu.marginalia.storage.model.FileStorageId;
|
import nu.marginalia.storage.model.FileStorageId;
|
||||||
import nu.marginalia.storage.model.FileStorageState;
|
import nu.marginalia.storage.model.FileStorageState;
|
||||||
import nu.marginalia.storage.model.FileStorageType;
|
import nu.marginalia.storage.model.FileStorageType;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
import spark.Request;
|
import spark.Request;
|
||||||
import spark.Response;
|
import spark.Response;
|
||||||
import spark.Spark;
|
import spark.Spark;
|
||||||
@ -21,9 +23,11 @@ import java.nio.file.Path;
|
|||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
@Singleton
|
@Singleton
|
||||||
public class ControlNodeActionsService {
|
public class ControlNodeActionsService {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(ControlNodeActionsService.class);
|
||||||
private final IndexClient indexClient;
|
private final IndexClient indexClient;
|
||||||
private final RedirectControl redirectControl;
|
private final RedirectControl redirectControl;
|
||||||
private final FileStorageService fileStorageService;
|
private final FileStorageService fileStorageService;
|
||||||
@ -62,6 +66,9 @@ public class ControlNodeActionsService {
|
|||||||
Spark.post("/public/nodes/:node/actions/sideload-stackexchange", this::sideloadStackexchange,
|
Spark.post("/public/nodes/:node/actions/sideload-stackexchange", this::sideloadStackexchange,
|
||||||
redirectControl.renderRedirectAcknowledgement("Sideloading", "..")
|
redirectControl.renderRedirectAcknowledgement("Sideloading", "..")
|
||||||
);
|
);
|
||||||
|
Spark.post("/public/nodes/:node/actions/download-sample-data", this::downloadSampleData,
|
||||||
|
redirectControl.renderRedirectAcknowledgement("Downloading", "..")
|
||||||
|
);
|
||||||
Spark.post("/public/nodes/:id/actions/new-crawl", this::triggerNewCrawl,
|
Spark.post("/public/nodes/:id/actions/new-crawl", this::triggerNewCrawl,
|
||||||
redirectControl.renderRedirectAcknowledgement("Crawling", "..")
|
redirectControl.renderRedirectAcknowledgement("Crawling", "..")
|
||||||
);
|
);
|
||||||
@ -91,6 +98,21 @@ public class ControlNodeActionsService {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Object downloadSampleData(Request request, Response response) {
|
||||||
|
String set = request.queryParams("sample");
|
||||||
|
|
||||||
|
if (set == null)
|
||||||
|
throw new ControlValidationError("No sample specified", "A sample data set must be specified", "..");
|
||||||
|
if (!Set.of("sample-s", "sample-m", "sample-l", "sample-xl").contains(set))
|
||||||
|
throw new ControlValidationError("Invalid sample specified", "A valid sample data set must be specified", "..");
|
||||||
|
|
||||||
|
executorClient.downloadSampleData(Context.fromRequest(request), Integer.parseInt(request.params("node")), set);
|
||||||
|
|
||||||
|
logger.info("Downloading sample data set {}", set);
|
||||||
|
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
public Object sideloadEncyclopedia(Request request, Response response) {
|
public Object sideloadEncyclopedia(Request request, Response response) {
|
||||||
|
|
||||||
String source = request.queryParams("source");
|
String source = request.queryParams("source");
|
||||||
|
@ -0,0 +1,47 @@
|
|||||||
|
<h1 class="my-3">Download Sample Data</h1>
|
||||||
|
|
||||||
|
<div class="my-3 p-3 border bg-light">
|
||||||
|
This will download sample crawl data from <a href="https://downloads.marginalia.nu">downloads.marginalia.nu</a> onto Node {{node.id}}.
|
||||||
|
This is a sample of real crawl data. It is intended for demo, testing and development purposes. Several sets are available.
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<form method="post" action="actions/download-sample-data">
|
||||||
|
|
||||||
|
<table class="table">
|
||||||
|
<tr>
|
||||||
|
<th>Use</th>
|
||||||
|
<th>Set</th>
|
||||||
|
<th>Description</th>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td><input id="sample-s" value="sample-s" name="sample" class="form-check-input" type="radio"></td>
|
||||||
|
<td><label for="sample-s">Small</label></td>
|
||||||
|
<td>1000 Domains. About 2 GB. </td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td><input id="sample-m" value="sample-m" name="sample" class="form-check-input" type="radio"></td>
|
||||||
|
<td><label for="sample-m">Medium</label></td>
|
||||||
|
<td>2000 Domains. About 6 GB. Recommended.</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td><input id="sample-l" value="sample-l" name="sample" class="form-check-input" type="radio"></td>
|
||||||
|
<td><label for="sample-l">Large</label></td>
|
||||||
|
<td>5000 Domains. About 20 GB.</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td><input id="sample-xl" value="sample-xl" name="sample" class="form-check-input" type="radio"></td>
|
||||||
|
<td><label for="sample-xl">Huge</label></td>
|
||||||
|
<td>50,000 Domains. Around 180 GB. Primarily intended for pre-production like testing environments.
|
||||||
|
Expect hours of processing time. </td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
<button
|
||||||
|
class="btn btn-primary me-md-2"
|
||||||
|
onclick="return confirm('Confirm downloading sample data onto node {{node.id}}');"
|
||||||
|
type="submit">
|
||||||
|
Start Download</button>
|
||||||
|
</form>
|
@ -23,6 +23,7 @@
|
|||||||
{{#if view.export-from-crawl-data}} {{> control/node/actions/partial-export-from-crawl-data }} {{/if}}
|
{{#if view.export-from-crawl-data}} {{> control/node/actions/partial-export-from-crawl-data }} {{/if}}
|
||||||
{{#if view.export-sample-data}} {{> control/node/actions/partial-export-sample-data }} {{/if}}
|
{{#if view.export-sample-data}} {{> control/node/actions/partial-export-sample-data }} {{/if}}
|
||||||
{{#if view.restore-backup}} {{> control/node/actions/partial-restore-backup }} {{/if}}
|
{{#if view.restore-backup}} {{> control/node/actions/partial-restore-backup }} {{/if}}
|
||||||
|
{{#if view.download-sample-data}} {{> control/node/actions/partial-download-sample-data }} {{/if}}
|
||||||
<div class="mt-10"> </div>
|
<div class="mt-10"> </div>
|
||||||
</div>
|
</div>
|
||||||
</body>
|
</body>
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=sideload-stackexchange">Sideload Stackexchange</a></li>
|
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=sideload-stackexchange">Sideload Stackexchange</a></li>
|
||||||
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=sideload-warc">Sideload WARC Files</a></li>
|
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=sideload-warc">Sideload WARC Files</a></li>
|
||||||
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=sideload-dirtree">Sideload Dirtree</a></li>
|
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=sideload-dirtree">Sideload Dirtree</a></li>
|
||||||
|
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=download-sample-data">Download Sample Crawl Data</a></li>
|
||||||
<li><hr class="dropdown-divider"></li>
|
<li><hr class="dropdown-divider"></li>
|
||||||
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=export-db-data">Export Database Data</a></li>
|
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=export-db-data">Export Database Data</a></li>
|
||||||
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=export-sample-data">Export Sample Crawl Data</a></li>
|
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=export-sample-data">Export Sample Crawl Data</a></li>
|
||||||
|
@ -61,6 +61,7 @@ dependencies {
|
|||||||
implementation libs.zstd
|
implementation libs.zstd
|
||||||
implementation libs.jsoup
|
implementation libs.jsoup
|
||||||
implementation libs.commons.io
|
implementation libs.commons.io
|
||||||
|
implementation libs.commons.compress
|
||||||
implementation libs.commons.lang3
|
implementation libs.commons.lang3
|
||||||
implementation libs.bundles.mariadb
|
implementation libs.bundles.mariadb
|
||||||
|
|
||||||
|
@ -17,7 +17,9 @@ public enum ExecutorActor {
|
|||||||
EXPORT_FEEDS,
|
EXPORT_FEEDS,
|
||||||
PROC_INDEX_CONSTRUCTOR_SPAWNER,
|
PROC_INDEX_CONSTRUCTOR_SPAWNER,
|
||||||
CONVERT,
|
CONVERT,
|
||||||
RESTORE_BACKUP, EXPORT_SAMPLE_DATA;
|
RESTORE_BACKUP,
|
||||||
|
EXPORT_SAMPLE_DATA,
|
||||||
|
DOWNLOAD_SAMPLE;
|
||||||
|
|
||||||
public String id() {
|
public String id() {
|
||||||
return "fsm:" + name().toLowerCase();
|
return "fsm:" + name().toLowerCase();
|
||||||
|
@ -47,6 +47,7 @@ public class ExecutorActorControlService {
|
|||||||
ExportFeedsActor exportFeedsActor,
|
ExportFeedsActor exportFeedsActor,
|
||||||
ExportSampleDataActor exportSampleDataActor,
|
ExportSampleDataActor exportSampleDataActor,
|
||||||
ExportTermFreqActor exportTermFrequenciesActor,
|
ExportTermFreqActor exportTermFrequenciesActor,
|
||||||
|
DownloadSampleActor downloadSampleActor,
|
||||||
ExecutorActorStateMachines stateMachines) {
|
ExecutorActorStateMachines stateMachines) {
|
||||||
this.messageQueueFactory = messageQueueFactory;
|
this.messageQueueFactory = messageQueueFactory;
|
||||||
this.eventLog = baseServiceParams.eventLog;
|
this.eventLog = baseServiceParams.eventLog;
|
||||||
@ -75,6 +76,8 @@ public class ExecutorActorControlService {
|
|||||||
register(ExecutorActor.EXPORT_FEEDS, exportFeedsActor);
|
register(ExecutorActor.EXPORT_FEEDS, exportFeedsActor);
|
||||||
register(ExecutorActor.EXPORT_SAMPLE_DATA, exportSampleDataActor);
|
register(ExecutorActor.EXPORT_SAMPLE_DATA, exportSampleDataActor);
|
||||||
register(ExecutorActor.EXPORT_TERM_FREQUENCIES, exportTermFrequenciesActor);
|
register(ExecutorActor.EXPORT_TERM_FREQUENCIES, exportTermFrequenciesActor);
|
||||||
|
|
||||||
|
register(ExecutorActor.DOWNLOAD_SAMPLE, downloadSampleActor);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void register(ExecutorActor process, RecordActorPrototype graph) {
|
private void register(ExecutorActor process, RecordActorPrototype graph) {
|
||||||
|
@ -13,7 +13,6 @@ import nu.marginalia.process.ProcessService;
|
|||||||
import nu.marginalia.sideload.SideloadHelper;
|
import nu.marginalia.sideload.SideloadHelper;
|
||||||
import nu.marginalia.sideload.StackExchangeSideloadHelper;
|
import nu.marginalia.sideload.StackExchangeSideloadHelper;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.storage.model.FileStorageBaseType;
|
|
||||||
import nu.marginalia.storage.model.FileStorageId;
|
import nu.marginalia.storage.model.FileStorageId;
|
||||||
import nu.marginalia.storage.model.FileStorageState;
|
import nu.marginalia.storage.model.FileStorageState;
|
||||||
import nu.marginalia.storage.model.FileStorageType;
|
import nu.marginalia.storage.model.FileStorageType;
|
||||||
@ -49,8 +48,7 @@ public class ConvertActor extends RecordActorPrototype {
|
|||||||
return switch (self) {
|
return switch (self) {
|
||||||
case Convert (FileStorageId fid) -> {
|
case Convert (FileStorageId fid) -> {
|
||||||
var toProcess = storageService.getStorage(fid);
|
var toProcess = storageService.getStorage(fid);
|
||||||
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
var processedArea = storageService.allocateStorage(
|
||||||
var processedArea = storageService.allocateTemporaryStorage(base,
|
|
||||||
FileStorageType.PROCESSED_DATA, "processed-data",
|
FileStorageType.PROCESSED_DATA, "processed-data",
|
||||||
"Processed Data; " + toProcess.description());
|
"Processed Data; " + toProcess.description());
|
||||||
|
|
||||||
@ -69,8 +67,7 @@ public class ConvertActor extends RecordActorPrototype {
|
|||||||
|
|
||||||
String fileName = sourcePath.toFile().getName();
|
String fileName = sourcePath.toFile().getName();
|
||||||
|
|
||||||
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
var processedArea = storageService.allocateStorage(
|
||||||
var processedArea = storageService.allocateTemporaryStorage(base,
|
|
||||||
FileStorageType.PROCESSED_DATA, "processed-data",
|
FileStorageType.PROCESSED_DATA, "processed-data",
|
||||||
"Processed Dirtree Data; " + fileName);
|
"Processed Dirtree Data; " + fileName);
|
||||||
|
|
||||||
@ -88,8 +85,7 @@ public class ConvertActor extends RecordActorPrototype {
|
|||||||
|
|
||||||
String fileName = sourcePath.toFile().getName();
|
String fileName = sourcePath.toFile().getName();
|
||||||
|
|
||||||
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
var processedArea = storageService.allocateStorage(
|
||||||
var processedArea = storageService.allocateTemporaryStorage(base,
|
|
||||||
FileStorageType.PROCESSED_DATA, "processed-data",
|
FileStorageType.PROCESSED_DATA, "processed-data",
|
||||||
"Processed Warc Data; " + fileName);
|
"Processed Warc Data; " + fileName);
|
||||||
|
|
||||||
@ -121,8 +117,7 @@ public class ConvertActor extends RecordActorPrototype {
|
|||||||
|
|
||||||
String fileName = sourcePath.toFile().getName();
|
String fileName = sourcePath.toFile().getName();
|
||||||
|
|
||||||
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
var processedArea = storageService.allocateStorage(
|
||||||
var processedArea = storageService.allocateTemporaryStorage(base,
|
|
||||||
FileStorageType.PROCESSED_DATA, "processed-data",
|
FileStorageType.PROCESSED_DATA, "processed-data",
|
||||||
"Processed Encylopedia Data; " + fileName);
|
"Processed Encylopedia Data; " + fileName);
|
||||||
|
|
||||||
@ -171,8 +166,7 @@ public class ConvertActor extends RecordActorPrototype {
|
|||||||
|
|
||||||
String fileName = sourcePath.toFile().getName();
|
String fileName = sourcePath.toFile().getName();
|
||||||
|
|
||||||
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
var processedArea = storageService.allocateStorage(
|
||||||
var processedArea = storageService.allocateTemporaryStorage(base,
|
|
||||||
FileStorageType.PROCESSED_DATA, "processed-data",
|
FileStorageType.PROCESSED_DATA, "processed-data",
|
||||||
"Processed Stackexchange Data; " + fileName);
|
"Processed Stackexchange Data; " + fileName);
|
||||||
|
|
||||||
|
@ -17,14 +17,12 @@ import nu.marginalia.service.module.ServiceConfiguration;
|
|||||||
import nu.marginalia.storage.model.FileStorageState;
|
import nu.marginalia.storage.model.FileStorageState;
|
||||||
import nu.marginalia.svc.BackupService;
|
import nu.marginalia.svc.BackupService;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.storage.model.FileStorageBaseType;
|
|
||||||
import nu.marginalia.storage.model.FileStorageId;
|
import nu.marginalia.storage.model.FileStorageId;
|
||||||
import nu.marginalia.storage.model.FileStorageType;
|
import nu.marginalia.storage.model.FileStorageType;
|
||||||
import nu.marginalia.index.client.IndexClient;
|
import nu.marginalia.index.client.IndexClient;
|
||||||
import nu.marginalia.index.client.IndexMqEndpoints;
|
import nu.marginalia.index.client.IndexMqEndpoints;
|
||||||
import nu.marginalia.mq.MqMessageState;
|
import nu.marginalia.mq.MqMessageState;
|
||||||
import nu.marginalia.mq.outbox.MqOutbox;
|
import nu.marginalia.mq.outbox.MqOutbox;
|
||||||
import nu.marginalia.mqapi.converting.ConvertAction;
|
|
||||||
import nu.marginalia.mqapi.converting.ConvertRequest;
|
import nu.marginalia.mqapi.converting.ConvertRequest;
|
||||||
import nu.marginalia.mqapi.index.CreateIndexRequest;
|
import nu.marginalia.mqapi.index.CreateIndexRequest;
|
||||||
import nu.marginalia.mqapi.index.IndexName;
|
import nu.marginalia.mqapi.index.IndexName;
|
||||||
@ -96,8 +94,7 @@ public class ConvertAndLoadActor extends RecordActorPrototype {
|
|||||||
if (storage.type() != FileStorageType.CRAWL_DATA) yield new Error("Bad storage type " + storage.type());
|
if (storage.type() != FileStorageType.CRAWL_DATA) yield new Error("Bad storage type " + storage.type());
|
||||||
|
|
||||||
|
|
||||||
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
var processedArea = storageService.allocateStorage(FileStorageType.PROCESSED_DATA, "processed-data",
|
||||||
var processedArea = storageService.allocateTemporaryStorage(base, FileStorageType.PROCESSED_DATA, "processed-data",
|
|
||||||
"Processed Data; " + storage.description());
|
"Processed Data; " + storage.description());
|
||||||
|
|
||||||
storageService.setFileStorageState(processedArea.id(), FileStorageState.NEW);
|
storageService.setFileStorageState(processedArea.id(), FileStorageState.NEW);
|
||||||
|
@ -10,7 +10,6 @@ import nu.marginalia.actor.state.Resume;
|
|||||||
import nu.marginalia.process.ProcessOutboxes;
|
import nu.marginalia.process.ProcessOutboxes;
|
||||||
import nu.marginalia.process.ProcessService;
|
import nu.marginalia.process.ProcessService;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.storage.model.FileStorageBaseType;
|
|
||||||
import nu.marginalia.storage.model.FileStorageId;
|
import nu.marginalia.storage.model.FileStorageId;
|
||||||
import nu.marginalia.storage.model.FileStorageType;
|
import nu.marginalia.storage.model.FileStorageType;
|
||||||
import nu.marginalia.mq.MqMessageState;
|
import nu.marginalia.mq.MqMessageState;
|
||||||
@ -43,9 +42,7 @@ public class CrawlActor extends RecordActorPrototype {
|
|||||||
if (storage == null) yield new Error("Bad storage id");
|
if (storage == null) yield new Error("Bad storage id");
|
||||||
if (storage.type() != FileStorageType.CRAWL_SPEC) yield new Error("Bad storage type " + storage.type());
|
if (storage.type() != FileStorageType.CRAWL_SPEC) yield new Error("Bad storage type " + storage.type());
|
||||||
|
|
||||||
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
var dataArea = storageService.allocateStorage(
|
||||||
var dataArea = storageService.allocateTemporaryStorage(
|
|
||||||
base,
|
|
||||||
FileStorageType.CRAWL_DATA,
|
FileStorageType.CRAWL_DATA,
|
||||||
"crawl-data",
|
"crawl-data",
|
||||||
storage.description());
|
storage.description());
|
||||||
|
@ -7,7 +7,6 @@ import nu.marginalia.actor.prototype.RecordActorPrototype;
|
|||||||
import nu.marginalia.actor.state.ActorStep;
|
import nu.marginalia.actor.state.ActorStep;
|
||||||
import nu.marginalia.crawlspec.CrawlSpecFileNames;
|
import nu.marginalia.crawlspec.CrawlSpecFileNames;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.storage.model.FileStorageBaseType;
|
|
||||||
import nu.marginalia.storage.model.FileStorageType;
|
import nu.marginalia.storage.model.FileStorageType;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
@ -41,8 +40,7 @@ public class CrawlJobExtractorActor extends RecordActorPrototype {
|
|||||||
public ActorStep transition(ActorStep self) throws Exception {
|
public ActorStep transition(ActorStep self) throws Exception {
|
||||||
return switch (self) {
|
return switch (self) {
|
||||||
case CreateFromUrl(String description, String url) -> {
|
case CreateFromUrl(String description, String url) -> {
|
||||||
var base = fileStorageService.getStorageBase(FileStorageBaseType.STORAGE);
|
var storage = fileStorageService.allocateStorage(FileStorageType.CRAWL_SPEC, "crawl-spec", description);
|
||||||
var storage = fileStorageService.allocateTemporaryStorage(base, FileStorageType.CRAWL_SPEC, "crawl-spec", description);
|
|
||||||
|
|
||||||
Path urlsTxt = storage.asPath().resolve("urls.txt");
|
Path urlsTxt = storage.asPath().resolve("urls.txt");
|
||||||
|
|
||||||
|
@ -0,0 +1,133 @@
|
|||||||
|
package nu.marginalia.actor.task;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||||
|
import nu.marginalia.actor.state.ActorStep;
|
||||||
|
import nu.marginalia.storage.FileStorageService;
|
||||||
|
import nu.marginalia.storage.model.*;
|
||||||
|
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||||
|
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.StandardOpenOption;
|
||||||
|
import java.nio.file.attribute.PosixFilePermissions;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class DownloadSampleActor extends RecordActorPrototype {
|
||||||
|
|
||||||
|
private final FileStorageService storageService;
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
|
public record Run(String setName) implements ActorStep {}
|
||||||
|
@Override
|
||||||
|
public ActorStep transition(ActorStep self) throws Exception {
|
||||||
|
return switch(self) {
|
||||||
|
case Run(String setName) -> {
|
||||||
|
final FileStorage newStorage = storageService.allocateStorage(
|
||||||
|
FileStorageType.CRAWL_DATA,
|
||||||
|
"sample-crawl-data",
|
||||||
|
"Sample " + setName);
|
||||||
|
|
||||||
|
storageService.setFileStorageState(newStorage.id(), FileStorageState.NEW);
|
||||||
|
|
||||||
|
URL downloadURI = getDownloadURL(setName);
|
||||||
|
|
||||||
|
try {
|
||||||
|
downloadArchive(downloadURI, newStorage.asPath());
|
||||||
|
}
|
||||||
|
catch (IOException ex) {
|
||||||
|
logger.error("Error downloading sample", ex);
|
||||||
|
storageService.flagFileForDeletion(newStorage.id());
|
||||||
|
yield new Error();
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
storageService.setFileStorageState(newStorage.id(), FileStorageState.UNSET);
|
||||||
|
}
|
||||||
|
|
||||||
|
yield new End();
|
||||||
|
}
|
||||||
|
default -> new Error();
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private void downloadArchive(URL downloadURI, Path outputPath) throws IOException, InterruptedException {
|
||||||
|
// See the documentation for commons compress:
|
||||||
|
// https://commons.apache.org/proper/commons-compress/examples.html
|
||||||
|
|
||||||
|
try (var tar = new TarArchiveInputStream(downloadURI.openStream())) {
|
||||||
|
TarArchiveEntry nextEntry;
|
||||||
|
byte[] buffer = new byte[8192];
|
||||||
|
|
||||||
|
while ((nextEntry = tar.getNextEntry()) != null) {
|
||||||
|
// Poll for interruption, to ensure this can be cancelled
|
||||||
|
if (Thread.interrupted()) {
|
||||||
|
throw new InterruptedException();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nextEntry.isDirectory()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Path outputFile = outputPath.resolve(nextEntry.getName());
|
||||||
|
Files.createDirectories(outputFile.getParent(),
|
||||||
|
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxr-xr-x"))
|
||||||
|
);
|
||||||
|
|
||||||
|
long size = nextEntry.getSize();
|
||||||
|
|
||||||
|
// Extract tar entry
|
||||||
|
try (var fos = Files.newOutputStream(outputFile, StandardOpenOption.CREATE)) {
|
||||||
|
transferBytes(tar, fos, buffer, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
Files.setPosixFilePermissions(outputPath, PosixFilePermissions.fromString("rw-r--r--"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void transferBytes(InputStream inputStream, OutputStream outputStream, byte[] buffer, long size)
|
||||||
|
throws IOException
|
||||||
|
{
|
||||||
|
long copiedSize = 0;
|
||||||
|
|
||||||
|
while (copiedSize < size) {
|
||||||
|
int read = inputStream.read(buffer);
|
||||||
|
|
||||||
|
if (read < 0) // We've been promised a file of length 'size', so this shouldn't happen, but just in case...
|
||||||
|
throw new IOException("Unexpected end of stream");
|
||||||
|
|
||||||
|
outputStream.write(buffer, 0, read);
|
||||||
|
copiedSize += read;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private URL getDownloadURL(String setName) throws MalformedURLException {
|
||||||
|
return URI.create(STR."https://downloads.marginalia.nu/samples/\{setName}.tar").toURL();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String describe() {
|
||||||
|
return "Download a sample of crawl data from downloads.marginalia.nu";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public DownloadSampleActor(Gson gson,
|
||||||
|
FileStorageService storageService)
|
||||||
|
{
|
||||||
|
super(gson);
|
||||||
|
this.storageService = storageService;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -23,8 +23,7 @@ public class ExportAtagsActor extends RecordActorPrototype {
|
|||||||
public ActorStep transition(ActorStep self) throws Exception {
|
public ActorStep transition(ActorStep self) throws Exception {
|
||||||
return switch(self) {
|
return switch(self) {
|
||||||
case Export(FileStorageId crawlId) -> {
|
case Export(FileStorageId crawlId) -> {
|
||||||
var storageBase = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
var storage = storageService.allocateStorage(FileStorageType.EXPORT, "atag-export", "Anchor Tags " + LocalDateTime.now());
|
||||||
var storage = storageService.allocateTemporaryStorage(storageBase, FileStorageType.EXPORT, "atag-export", "Anchor Tags " + LocalDateTime.now());
|
|
||||||
|
|
||||||
if (storage == null) yield new Error("Bad storage id");
|
if (storage == null) yield new Error("Bad storage id");
|
||||||
yield new Run(crawlId, storage.id());
|
yield new Run(crawlId, storage.id());
|
||||||
|
@ -8,7 +8,6 @@ import nu.marginalia.actor.prototype.RecordActorPrototype;
|
|||||||
import nu.marginalia.actor.state.ActorStep;
|
import nu.marginalia.actor.state.ActorStep;
|
||||||
import nu.marginalia.query.client.QueryClient;
|
import nu.marginalia.query.client.QueryClient;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.storage.model.FileStorageBaseType;
|
|
||||||
import nu.marginalia.storage.model.FileStorageId;
|
import nu.marginalia.storage.model.FileStorageId;
|
||||||
import nu.marginalia.storage.model.FileStorageType;
|
import nu.marginalia.storage.model.FileStorageType;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -43,8 +42,7 @@ public class ExportDataActor extends RecordActorPrototype {
|
|||||||
public ActorStep transition(ActorStep self) throws Exception {
|
public ActorStep transition(ActorStep self) throws Exception {
|
||||||
return switch(self) {
|
return switch(self) {
|
||||||
case Export() -> {
|
case Export() -> {
|
||||||
var storageBase = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
var storage = storageService.allocateStorage(FileStorageType.EXPORT, "db-export", "DB Exports " + LocalDateTime.now());
|
||||||
var storage = storageService.allocateTemporaryStorage(storageBase, FileStorageType.EXPORT, "db-export", "DB Exports " + LocalDateTime.now());
|
|
||||||
|
|
||||||
if (storage == null) yield new Error("Bad storage id");
|
if (storage == null) yield new Error("Bad storage id");
|
||||||
yield new ExportBlacklist(storage.id());
|
yield new ExportBlacklist(storage.id());
|
||||||
|
@ -8,7 +8,6 @@ import nu.marginalia.actor.state.ActorStep;
|
|||||||
import nu.marginalia.extractor.ExporterIf;
|
import nu.marginalia.extractor.ExporterIf;
|
||||||
import nu.marginalia.extractor.FeedExporter;
|
import nu.marginalia.extractor.FeedExporter;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.storage.model.FileStorageBaseType;
|
|
||||||
import nu.marginalia.storage.model.FileStorageId;
|
import nu.marginalia.storage.model.FileStorageId;
|
||||||
import nu.marginalia.storage.model.FileStorageState;
|
import nu.marginalia.storage.model.FileStorageState;
|
||||||
import nu.marginalia.storage.model.FileStorageType;
|
import nu.marginalia.storage.model.FileStorageType;
|
||||||
@ -29,8 +28,7 @@ public class ExportFeedsActor extends RecordActorPrototype {
|
|||||||
public ActorStep transition(ActorStep self) throws Exception {
|
public ActorStep transition(ActorStep self) throws Exception {
|
||||||
return switch(self) {
|
return switch(self) {
|
||||||
case Export(FileStorageId crawlId) -> {
|
case Export(FileStorageId crawlId) -> {
|
||||||
var storageBase = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
var storage = storageService.allocateStorage(FileStorageType.EXPORT, "feed-export", "Feeds " + LocalDateTime.now());
|
||||||
var storage = storageService.allocateTemporaryStorage(storageBase, FileStorageType.EXPORT, "feed-export", "Feeds " + LocalDateTime.now());
|
|
||||||
|
|
||||||
if (storage == null) yield new Error("Bad storage id");
|
if (storage == null) yield new Error("Bad storage id");
|
||||||
yield new Run(crawlId, storage.id());
|
yield new Run(crawlId, storage.id());
|
||||||
|
@ -5,11 +5,8 @@ import com.google.inject.Inject;
|
|||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||||
import nu.marginalia.actor.state.ActorStep;
|
import nu.marginalia.actor.state.ActorStep;
|
||||||
import nu.marginalia.extractor.ExporterIf;
|
|
||||||
import nu.marginalia.extractor.FeedExporter;
|
|
||||||
import nu.marginalia.extractor.SampleDataExporter;
|
import nu.marginalia.extractor.SampleDataExporter;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.storage.model.FileStorageBaseType;
|
|
||||||
import nu.marginalia.storage.model.FileStorageId;
|
import nu.marginalia.storage.model.FileStorageId;
|
||||||
import nu.marginalia.storage.model.FileStorageState;
|
import nu.marginalia.storage.model.FileStorageState;
|
||||||
import nu.marginalia.storage.model.FileStorageType;
|
import nu.marginalia.storage.model.FileStorageType;
|
||||||
@ -30,8 +27,7 @@ public class ExportSampleDataActor extends RecordActorPrototype {
|
|||||||
public ActorStep transition(ActorStep self) throws Exception {
|
public ActorStep transition(ActorStep self) throws Exception {
|
||||||
return switch(self) {
|
return switch(self) {
|
||||||
case Export(FileStorageId crawlId, int size, String name) -> {
|
case Export(FileStorageId crawlId, int size, String name) -> {
|
||||||
var storageBase = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
var storage = storageService.allocateStorage(FileStorageType.EXPORT,
|
||||||
var storage = storageService.allocateTemporaryStorage(storageBase, FileStorageType.EXPORT,
|
|
||||||
"crawl-sample-export",
|
"crawl-sample-export",
|
||||||
STR."Crawl Data Sample \{name}/\{size} \{LocalDateTime.now()}"
|
STR."Crawl Data Sample \{name}/\{size} \{LocalDateTime.now()}"
|
||||||
);
|
);
|
||||||
|
@ -8,7 +8,6 @@ import nu.marginalia.actor.state.ActorStep;
|
|||||||
import nu.marginalia.extractor.ExporterIf;
|
import nu.marginalia.extractor.ExporterIf;
|
||||||
import nu.marginalia.extractor.TermFrequencyExporter;
|
import nu.marginalia.extractor.TermFrequencyExporter;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.storage.model.FileStorageBaseType;
|
|
||||||
import nu.marginalia.storage.model.FileStorageId;
|
import nu.marginalia.storage.model.FileStorageId;
|
||||||
import nu.marginalia.storage.model.FileStorageState;
|
import nu.marginalia.storage.model.FileStorageState;
|
||||||
import nu.marginalia.storage.model.FileStorageType;
|
import nu.marginalia.storage.model.FileStorageType;
|
||||||
@ -25,8 +24,7 @@ public class ExportTermFreqActor extends RecordActorPrototype {
|
|||||||
public ActorStep transition(ActorStep self) throws Exception {
|
public ActorStep transition(ActorStep self) throws Exception {
|
||||||
return switch(self) {
|
return switch(self) {
|
||||||
case Export(FileStorageId crawlId) -> {
|
case Export(FileStorageId crawlId) -> {
|
||||||
var storageBase = storageService.getStorageBase(FileStorageBaseType.STORAGE);
|
var storage = storageService.allocateStorage(FileStorageType.EXPORT, "term-freq-export", "Term Frequencies " + LocalDateTime.now());
|
||||||
var storage = storageService.allocateTemporaryStorage(storageBase, FileStorageType.EXPORT, "term-freq-export", "Term Frequencies " + LocalDateTime.now());
|
|
||||||
|
|
||||||
if (storage == null) yield new Error("Bad storage id");
|
if (storage == null) yield new Error("Bad storage id");
|
||||||
yield new Run(crawlId, storage.id());
|
yield new Run(crawlId, storage.id());
|
||||||
|
@ -71,6 +71,8 @@ public class ExecutorSvc extends Service {
|
|||||||
Spark.post("/sideload/stackexchange", sideloadService::sideloadStackexchange);
|
Spark.post("/sideload/stackexchange", sideloadService::sideloadStackexchange);
|
||||||
Spark.post("/sideload/encyclopedia", sideloadService::sideloadEncyclopedia);
|
Spark.post("/sideload/encyclopedia", sideloadService::sideloadEncyclopedia);
|
||||||
|
|
||||||
|
Spark.post("/action/download-sample-data", sideloadService::downloadSampleData);
|
||||||
|
|
||||||
Spark.post("/export/atags", exportService::exportAtags);
|
Spark.post("/export/atags", exportService::exportAtags);
|
||||||
Spark.post("/export/sample-data", exportService::exportSampleData);
|
Spark.post("/export/sample-data", exportService::exportSampleData);
|
||||||
Spark.post("/export/feeds", exportService::exportFeeds);
|
Spark.post("/export/feeds", exportService::exportFeeds);
|
||||||
|
@ -5,8 +5,11 @@ import nu.marginalia.WmsaHome;
|
|||||||
import nu.marginalia.actor.ExecutorActor;
|
import nu.marginalia.actor.ExecutorActor;
|
||||||
import nu.marginalia.actor.ExecutorActorControlService;
|
import nu.marginalia.actor.ExecutorActorControlService;
|
||||||
import nu.marginalia.actor.task.ConvertActor;
|
import nu.marginalia.actor.task.ConvertActor;
|
||||||
|
import nu.marginalia.actor.task.DownloadSampleActor;
|
||||||
import nu.marginalia.executor.upload.UploadDirContents;
|
import nu.marginalia.executor.upload.UploadDirContents;
|
||||||
import nu.marginalia.executor.upload.UploadDirItem;
|
import nu.marginalia.executor.upload.UploadDirItem;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
import spark.Request;
|
import spark.Request;
|
||||||
import spark.Response;
|
import spark.Response;
|
||||||
|
|
||||||
@ -18,6 +21,7 @@ import java.util.List;
|
|||||||
|
|
||||||
public class SideloadService {
|
public class SideloadService {
|
||||||
private final ExecutorActorControlService actorControlService;
|
private final ExecutorActorControlService actorControlService;
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(SideloadService.class);
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public SideloadService(ExecutorActorControlService actorControlService) {
|
public SideloadService(ExecutorActorControlService actorControlService) {
|
||||||
@ -56,4 +60,11 @@ public class SideloadService {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Object downloadSampleData(Request request, Response response) throws Exception {
|
||||||
|
String sampleSet = request.queryParams("set");
|
||||||
|
|
||||||
|
actorControlService.startFrom(ExecutorActor.DOWNLOAD_SAMPLE, new DownloadSampleActor.Run(sampleSet));
|
||||||
|
|
||||||
|
return "";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -15,7 +15,6 @@ import nu.marginalia.mq.persistence.MqPersistence;
|
|||||||
import nu.marginalia.process.log.WorkLog;
|
import nu.marginalia.process.log.WorkLog;
|
||||||
import nu.marginalia.service.module.ServiceConfiguration;
|
import nu.marginalia.service.module.ServiceConfiguration;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.storage.model.FileStorageBaseType;
|
|
||||||
import nu.marginalia.storage.model.FileStorageId;
|
import nu.marginalia.storage.model.FileStorageId;
|
||||||
import nu.marginalia.storage.model.FileStorageType;
|
import nu.marginalia.storage.model.FileStorageType;
|
||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
@ -187,8 +186,7 @@ public class TransferService {
|
|||||||
|
|
||||||
// Ensure crawl data exists to receive into
|
// Ensure crawl data exists to receive into
|
||||||
if (storages.isEmpty()) {
|
if (storages.isEmpty()) {
|
||||||
var storage = fileStorageService.allocateTemporaryStorage(
|
var storage = fileStorageService.allocateStorage(
|
||||||
fileStorageService.getStorageBase(FileStorageBaseType.STORAGE),
|
|
||||||
FileStorageType.CRAWL_DATA,
|
FileStorageType.CRAWL_DATA,
|
||||||
"crawl-data",
|
"crawl-data",
|
||||||
"Crawl Data"
|
"Crawl Data"
|
||||||
|
@ -5,7 +5,6 @@ import com.github.luben.zstd.ZstdOutputStream;
|
|||||||
import nu.marginalia.IndexLocations;
|
import nu.marginalia.IndexLocations;
|
||||||
import nu.marginalia.service.control.ServiceHeartbeat;
|
import nu.marginalia.service.control.ServiceHeartbeat;
|
||||||
import nu.marginalia.storage.FileStorageService;
|
import nu.marginalia.storage.FileStorageService;
|
||||||
import nu.marginalia.storage.model.FileStorageBaseType;
|
|
||||||
import nu.marginalia.storage.model.FileStorageId;
|
import nu.marginalia.storage.model.FileStorageId;
|
||||||
import nu.marginalia.storage.model.FileStorageType;
|
import nu.marginalia.storage.model.FileStorageType;
|
||||||
import nu.marginallia.index.journal.IndexJournalFileNames;
|
import nu.marginallia.index.journal.IndexJournalFileNames;
|
||||||
@ -45,11 +44,9 @@ public class BackupService {
|
|||||||
* This backup can later be dehydrated and quickly loaded into _LIVE.
|
* This backup can later be dehydrated and quickly loaded into _LIVE.
|
||||||
* */
|
* */
|
||||||
public void createBackupFromStaging(List<FileStorageId> associatedIds) throws SQLException, IOException {
|
public void createBackupFromStaging(List<FileStorageId> associatedIds) throws SQLException, IOException {
|
||||||
var backupBase = storageService.getStorageBase(FileStorageBaseType.BACKUP);
|
|
||||||
|
|
||||||
String desc = "Pre-load backup snapshot " + LocalDateTime.now();
|
String desc = "Pre-load backup snapshot " + LocalDateTime.now();
|
||||||
|
|
||||||
var backupStorage = storageService.allocateTemporaryStorage(backupBase,
|
var backupStorage = storageService.allocateStorage(
|
||||||
FileStorageType.BACKUP, "snapshot", desc);
|
FileStorageType.BACKUP, "snapshot", desc);
|
||||||
|
|
||||||
for (var associatedId : associatedIds) {
|
for (var associatedId : associatedIds) {
|
||||||
|
Loading…
Reference in New Issue
Block a user