(*) Add download-sample action, refactor file storage

This changeset adds an action for downloading a set of sample data from downloads.marginalia.nu.

It also refactors out some leaky abstractions out of FileStorageService.  allocateTemporaryStorage has been renamed allocateStorage.  The storage was never temporary in any scenario...

It also doesn't take a storage base, as there was always only one valid option for this input.  The allocateStorage method finds the appropriate base itself.
This commit is contained in:
Viktor Lofgren 2024-01-25 13:36:30 +01:00
parent 1b8b97b8ec
commit cae1bad274
25 changed files with 256 additions and 67 deletions

View File

@ -71,7 +71,6 @@ public class ExecutorClient extends AbstractDynamicClient {
post(ctx, node,
"/sideload/encyclopedia?path="+ URLEncoder.encode(sourcePath.toString(), StandardCharsets.UTF_8) + "&baseUrl=" + URLEncoder.encode(baseUrl, StandardCharsets.UTF_8),
"").blockingSubscribe();
}
public void sideloadDirtree(Context ctx, int node, Path sourcePath) {
@ -111,6 +110,10 @@ public class ExecutorClient extends AbstractDynamicClient {
post(ctx, node, "/export/termfreq?fid="+fid, "").blockingSubscribe();
}
public void downloadSampleData(Context ctx, int node, String sampleSet) {
post(ctx, node, "/action/download-sample-data?set="+URLEncoder.encode(sampleSet, StandardCharsets.UTF_8), "").blockingSubscribe();
}
public void exportData(Context ctx, int node) {
post(ctx, node, "/export/data", "").blockingSubscribe();
}
@ -166,4 +169,5 @@ public class ExecutorClient extends AbstractDynamicClient {
public void yieldDomain(Context context, int node, TransferItem item) {
post(context, node, "/transfer/yield", item).blockingSubscribe();
}
}

View File

@ -223,14 +223,12 @@ public class FileStorageService {
return maybePath;
}
/** Allocate a temporary storage of the given type */
public FileStorage allocateTemporaryStorage(FileStorageBase base,
FileStorageType type,
/** Allocate a storage area of the given type */
public FileStorage allocateStorage(FileStorageType type,
String prefix,
String description) throws IOException, SQLException
{
if (!base.type().permitsStorageType(type))
throw new RuntimeException("Attempting to allocate storage of type " + type + " in base of type " + base.type());
var base = getStorageBase(FileStorageBaseType.forFileStorageType(type));
Path newDir = allocateDirectory(base.asPath(), prefix);

View File

@ -1,18 +1,17 @@
package nu.marginalia.storage.model;
import java.util.EnumSet;
public enum FileStorageBaseType {
CURRENT,
WORK,
STORAGE,
BACKUP;
public boolean permitsStorageType(FileStorageType type) {
return switch (this) {
case BACKUP -> FileStorageType.BACKUP.equals(type);
case STORAGE -> EnumSet.of(FileStorageType.EXPORT, FileStorageType.CRAWL_DATA, FileStorageType.PROCESSED_DATA, FileStorageType.CRAWL_SPEC).contains(type);
default -> false;
public static FileStorageBaseType forFileStorageType(FileStorageType type) {
return switch (type) {
case EXPORT, CRAWL_DATA, PROCESSED_DATA, CRAWL_SPEC -> STORAGE;
case BACKUP -> BACKUP;
};
}
}

View File

@ -13,18 +13,14 @@ import org.testcontainers.containers.MariaDBContainer;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.UUID;
import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD;
@Testcontainers
@Execution(ExecutionMode.SAME_THREAD)
@Tag("slow")
@ -124,8 +120,7 @@ public class FileStorageServiceTest {
var storage = new FileStorageService(dataSource, 0);
var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.STORAGE);
var fileStorage = storage.allocateTemporaryStorage(base, FileStorageType.CRAWL_DATA, "xyz", "thisShouldSucceed");
var fileStorage = storage.allocateStorage(FileStorageType.CRAWL_DATA, "xyz", "thisShouldSucceed");
System.out.println("Allocated " + fileStorage.asPath());
Assertions.assertTrue(Files.exists(fileStorage.asPath()));
tempDirs.add(fileStorage.asPath());

View File

@ -13,6 +13,8 @@ import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageState;
import nu.marginalia.storage.model.FileStorageType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
import spark.Response;
import spark.Spark;
@ -21,9 +23,11 @@ import java.nio.file.Path;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
@Singleton
public class ControlNodeActionsService {
private static final Logger logger = LoggerFactory.getLogger(ControlNodeActionsService.class);
private final IndexClient indexClient;
private final RedirectControl redirectControl;
private final FileStorageService fileStorageService;
@ -62,6 +66,9 @@ public class ControlNodeActionsService {
Spark.post("/public/nodes/:node/actions/sideload-stackexchange", this::sideloadStackexchange,
redirectControl.renderRedirectAcknowledgement("Sideloading", "..")
);
Spark.post("/public/nodes/:node/actions/download-sample-data", this::downloadSampleData,
redirectControl.renderRedirectAcknowledgement("Downloading", "..")
);
Spark.post("/public/nodes/:id/actions/new-crawl", this::triggerNewCrawl,
redirectControl.renderRedirectAcknowledgement("Crawling", "..")
);
@ -91,6 +98,21 @@ public class ControlNodeActionsService {
);
}
private Object downloadSampleData(Request request, Response response) {
String set = request.queryParams("sample");
if (set == null)
throw new ControlValidationError("No sample specified", "A sample data set must be specified", "..");
if (!Set.of("sample-s", "sample-m", "sample-l", "sample-xl").contains(set))
throw new ControlValidationError("Invalid sample specified", "A valid sample data set must be specified", "..");
executorClient.downloadSampleData(Context.fromRequest(request), Integer.parseInt(request.params("node")), set);
logger.info("Downloading sample data set {}", set);
return "";
}
public Object sideloadEncyclopedia(Request request, Response response) {
String source = request.queryParams("source");

View File

@ -0,0 +1,47 @@
<h1 class="my-3">Download Sample Data</h1>
<div class="my-3 p-3 border bg-light">
This will download sample crawl data from <a href="https://downloads.marginalia.nu">downloads.marginalia.nu</a> onto Node {{node.id}}.
This is a sample of real crawl data. It is intended for demo, testing and development purposes. Several sets are available.
</div>
<form method="post" action="actions/download-sample-data">
<table class="table">
<tr>
<th>Use</th>
<th>Set</th>
<th>Description</th>
</tr>
<tr>
<td><input id="sample-s" value="sample-s" name="sample" class="form-check-input" type="radio"></td>
<td><label for="sample-s">Small</label></td>
<td>1000 Domains. About 2 GB. </td>
</tr>
<tr>
<td><input id="sample-m" value="sample-m" name="sample" class="form-check-input" type="radio"></td>
<td><label for="sample-m">Medium</label></td>
<td>2000 Domains. About 6 GB. Recommended.</td>
</tr>
<tr>
<td><input id="sample-l" value="sample-l" name="sample" class="form-check-input" type="radio"></td>
<td><label for="sample-l">Large</label></td>
<td>5000 Domains. About 20 GB.</td>
</tr>
<tr>
<td><input id="sample-xl" value="sample-xl" name="sample" class="form-check-input" type="radio"></td>
<td><label for="sample-xl">Huge</label></td>
<td>50,000 Domains. Around 180 GB. Primarily intended for pre-production like testing environments.
Expect hours of processing time. </td>
</tr>
</table>
<button
class="btn btn-primary me-md-2"
onclick="return confirm('Confirm downloading sample data onto node {{node.id}}');"
type="submit">
Start Download</button>
</form>

View File

@ -23,6 +23,7 @@
{{#if view.export-from-crawl-data}} {{> control/node/actions/partial-export-from-crawl-data }} {{/if}}
{{#if view.export-sample-data}} {{> control/node/actions/partial-export-sample-data }} {{/if}}
{{#if view.restore-backup}} {{> control/node/actions/partial-restore-backup }} {{/if}}
{{#if view.download-sample-data}} {{> control/node/actions/partial-download-sample-data }} {{/if}}
<div class="mt-10">&nbsp;</div>
</div>
</body>

View File

@ -24,6 +24,7 @@
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=sideload-stackexchange">Sideload Stackexchange</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=sideload-warc">Sideload WARC Files</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=sideload-dirtree">Sideload Dirtree</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=download-sample-data">Download Sample Crawl Data</a></li>
<li><hr class="dropdown-divider"></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=export-db-data">Export Database Data</a></li>
<li><a class="dropdown-item" href="/nodes/{{node.id}}/actions?view=export-sample-data">Export Sample Crawl Data</a></li>

View File

@ -61,6 +61,7 @@ dependencies {
implementation libs.zstd
implementation libs.jsoup
implementation libs.commons.io
implementation libs.commons.compress
implementation libs.commons.lang3
implementation libs.bundles.mariadb

View File

@ -17,7 +17,9 @@ public enum ExecutorActor {
EXPORT_FEEDS,
PROC_INDEX_CONSTRUCTOR_SPAWNER,
CONVERT,
RESTORE_BACKUP, EXPORT_SAMPLE_DATA;
RESTORE_BACKUP,
EXPORT_SAMPLE_DATA,
DOWNLOAD_SAMPLE;
public String id() {
return "fsm:" + name().toLowerCase();

View File

@ -47,6 +47,7 @@ public class ExecutorActorControlService {
ExportFeedsActor exportFeedsActor,
ExportSampleDataActor exportSampleDataActor,
ExportTermFreqActor exportTermFrequenciesActor,
DownloadSampleActor downloadSampleActor,
ExecutorActorStateMachines stateMachines) {
this.messageQueueFactory = messageQueueFactory;
this.eventLog = baseServiceParams.eventLog;
@ -75,6 +76,8 @@ public class ExecutorActorControlService {
register(ExecutorActor.EXPORT_FEEDS, exportFeedsActor);
register(ExecutorActor.EXPORT_SAMPLE_DATA, exportSampleDataActor);
register(ExecutorActor.EXPORT_TERM_FREQUENCIES, exportTermFrequenciesActor);
register(ExecutorActor.DOWNLOAD_SAMPLE, downloadSampleActor);
}
private void register(ExecutorActor process, RecordActorPrototype graph) {

View File

@ -13,7 +13,6 @@ import nu.marginalia.process.ProcessService;
import nu.marginalia.sideload.SideloadHelper;
import nu.marginalia.sideload.StackExchangeSideloadHelper;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageState;
import nu.marginalia.storage.model.FileStorageType;
@ -49,8 +48,7 @@ public class ConvertActor extends RecordActorPrototype {
return switch (self) {
case Convert (FileStorageId fid) -> {
var toProcess = storageService.getStorage(fid);
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var processedArea = storageService.allocateTemporaryStorage(base,
var processedArea = storageService.allocateStorage(
FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Data; " + toProcess.description());
@ -69,8 +67,7 @@ public class ConvertActor extends RecordActorPrototype {
String fileName = sourcePath.toFile().getName();
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var processedArea = storageService.allocateTemporaryStorage(base,
var processedArea = storageService.allocateStorage(
FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Dirtree Data; " + fileName);
@ -88,8 +85,7 @@ public class ConvertActor extends RecordActorPrototype {
String fileName = sourcePath.toFile().getName();
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var processedArea = storageService.allocateTemporaryStorage(base,
var processedArea = storageService.allocateStorage(
FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Warc Data; " + fileName);
@ -121,8 +117,7 @@ public class ConvertActor extends RecordActorPrototype {
String fileName = sourcePath.toFile().getName();
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var processedArea = storageService.allocateTemporaryStorage(base,
var processedArea = storageService.allocateStorage(
FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Encylopedia Data; " + fileName);
@ -171,8 +166,7 @@ public class ConvertActor extends RecordActorPrototype {
String fileName = sourcePath.toFile().getName();
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var processedArea = storageService.allocateTemporaryStorage(base,
var processedArea = storageService.allocateStorage(
FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Stackexchange Data; " + fileName);

View File

@ -17,14 +17,12 @@ import nu.marginalia.service.module.ServiceConfiguration;
import nu.marginalia.storage.model.FileStorageState;
import nu.marginalia.svc.BackupService;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageType;
import nu.marginalia.index.client.IndexClient;
import nu.marginalia.index.client.IndexMqEndpoints;
import nu.marginalia.mq.MqMessageState;
import nu.marginalia.mq.outbox.MqOutbox;
import nu.marginalia.mqapi.converting.ConvertAction;
import nu.marginalia.mqapi.converting.ConvertRequest;
import nu.marginalia.mqapi.index.CreateIndexRequest;
import nu.marginalia.mqapi.index.IndexName;
@ -96,8 +94,7 @@ public class ConvertAndLoadActor extends RecordActorPrototype {
if (storage.type() != FileStorageType.CRAWL_DATA) yield new Error("Bad storage type " + storage.type());
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var processedArea = storageService.allocateTemporaryStorage(base, FileStorageType.PROCESSED_DATA, "processed-data",
var processedArea = storageService.allocateStorage(FileStorageType.PROCESSED_DATA, "processed-data",
"Processed Data; " + storage.description());
storageService.setFileStorageState(processedArea.id(), FileStorageState.NEW);

View File

@ -10,7 +10,6 @@ import nu.marginalia.actor.state.Resume;
import nu.marginalia.process.ProcessOutboxes;
import nu.marginalia.process.ProcessService;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageType;
import nu.marginalia.mq.MqMessageState;
@ -43,9 +42,7 @@ public class CrawlActor extends RecordActorPrototype {
if (storage == null) yield new Error("Bad storage id");
if (storage.type() != FileStorageType.CRAWL_SPEC) yield new Error("Bad storage type " + storage.type());
var base = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var dataArea = storageService.allocateTemporaryStorage(
base,
var dataArea = storageService.allocateStorage(
FileStorageType.CRAWL_DATA,
"crawl-data",
storage.description());

View File

@ -7,7 +7,6 @@ import nu.marginalia.actor.prototype.RecordActorPrototype;
import nu.marginalia.actor.state.ActorStep;
import nu.marginalia.crawlspec.CrawlSpecFileNames;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -41,8 +40,7 @@ public class CrawlJobExtractorActor extends RecordActorPrototype {
public ActorStep transition(ActorStep self) throws Exception {
return switch (self) {
case CreateFromUrl(String description, String url) -> {
var base = fileStorageService.getStorageBase(FileStorageBaseType.STORAGE);
var storage = fileStorageService.allocateTemporaryStorage(base, FileStorageType.CRAWL_SPEC, "crawl-spec", description);
var storage = fileStorageService.allocateStorage(FileStorageType.CRAWL_SPEC, "crawl-spec", description);
Path urlsTxt = storage.asPath().resolve("urls.txt");

View File

@ -0,0 +1,133 @@
package nu.marginalia.actor.task;
import com.google.gson.Gson;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.actor.prototype.RecordActorPrototype;
import nu.marginalia.actor.state.ActorStep;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.*;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.nio.file.attribute.PosixFilePermissions;
@Singleton
public class DownloadSampleActor extends RecordActorPrototype {
private final FileStorageService storageService;
private final Logger logger = LoggerFactory.getLogger(getClass());
public record Run(String setName) implements ActorStep {}
@Override
public ActorStep transition(ActorStep self) throws Exception {
return switch(self) {
case Run(String setName) -> {
final FileStorage newStorage = storageService.allocateStorage(
FileStorageType.CRAWL_DATA,
"sample-crawl-data",
"Sample " + setName);
storageService.setFileStorageState(newStorage.id(), FileStorageState.NEW);
URL downloadURI = getDownloadURL(setName);
try {
downloadArchive(downloadURI, newStorage.asPath());
}
catch (IOException ex) {
logger.error("Error downloading sample", ex);
storageService.flagFileForDeletion(newStorage.id());
yield new Error();
}
finally {
storageService.setFileStorageState(newStorage.id(), FileStorageState.UNSET);
}
yield new End();
}
default -> new Error();
};
}
private void downloadArchive(URL downloadURI, Path outputPath) throws IOException, InterruptedException {
// See the documentation for commons compress:
// https://commons.apache.org/proper/commons-compress/examples.html
try (var tar = new TarArchiveInputStream(downloadURI.openStream())) {
TarArchiveEntry nextEntry;
byte[] buffer = new byte[8192];
while ((nextEntry = tar.getNextEntry()) != null) {
// Poll for interruption, to ensure this can be cancelled
if (Thread.interrupted()) {
throw new InterruptedException();
}
if (nextEntry.isDirectory()) {
continue;
}
Path outputFile = outputPath.resolve(nextEntry.getName());
Files.createDirectories(outputFile.getParent(),
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxr-xr-x"))
);
long size = nextEntry.getSize();
// Extract tar entry
try (var fos = Files.newOutputStream(outputFile, StandardOpenOption.CREATE)) {
transferBytes(tar, fos, buffer, size);
}
Files.setPosixFilePermissions(outputPath, PosixFilePermissions.fromString("rw-r--r--"));
}
}
}
private void transferBytes(InputStream inputStream, OutputStream outputStream, byte[] buffer, long size)
throws IOException
{
long copiedSize = 0;
while (copiedSize < size) {
int read = inputStream.read(buffer);
if (read < 0) // We've been promised a file of length 'size', so this shouldn't happen, but just in case...
throw new IOException("Unexpected end of stream");
outputStream.write(buffer, 0, read);
copiedSize += read;
}
}
private URL getDownloadURL(String setName) throws MalformedURLException {
return URI.create(STR."https://downloads.marginalia.nu/samples/\{setName}.tar").toURL();
}
@Override
public String describe() {
return "Download a sample of crawl data from downloads.marginalia.nu";
}
@Inject
public DownloadSampleActor(Gson gson,
FileStorageService storageService)
{
super(gson);
this.storageService = storageService;
}
}

View File

@ -23,8 +23,7 @@ public class ExportAtagsActor extends RecordActorPrototype {
public ActorStep transition(ActorStep self) throws Exception {
return switch(self) {
case Export(FileStorageId crawlId) -> {
var storageBase = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var storage = storageService.allocateTemporaryStorage(storageBase, FileStorageType.EXPORT, "atag-export", "Anchor Tags " + LocalDateTime.now());
var storage = storageService.allocateStorage(FileStorageType.EXPORT, "atag-export", "Anchor Tags " + LocalDateTime.now());
if (storage == null) yield new Error("Bad storage id");
yield new Run(crawlId, storage.id());

View File

@ -8,7 +8,6 @@ import nu.marginalia.actor.prototype.RecordActorPrototype;
import nu.marginalia.actor.state.ActorStep;
import nu.marginalia.query.client.QueryClient;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageType;
import org.slf4j.Logger;
@ -43,8 +42,7 @@ public class ExportDataActor extends RecordActorPrototype {
public ActorStep transition(ActorStep self) throws Exception {
return switch(self) {
case Export() -> {
var storageBase = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var storage = storageService.allocateTemporaryStorage(storageBase, FileStorageType.EXPORT, "db-export", "DB Exports " + LocalDateTime.now());
var storage = storageService.allocateStorage(FileStorageType.EXPORT, "db-export", "DB Exports " + LocalDateTime.now());
if (storage == null) yield new Error("Bad storage id");
yield new ExportBlacklist(storage.id());

View File

@ -8,7 +8,6 @@ import nu.marginalia.actor.state.ActorStep;
import nu.marginalia.extractor.ExporterIf;
import nu.marginalia.extractor.FeedExporter;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageState;
import nu.marginalia.storage.model.FileStorageType;
@ -29,8 +28,7 @@ public class ExportFeedsActor extends RecordActorPrototype {
public ActorStep transition(ActorStep self) throws Exception {
return switch(self) {
case Export(FileStorageId crawlId) -> {
var storageBase = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var storage = storageService.allocateTemporaryStorage(storageBase, FileStorageType.EXPORT, "feed-export", "Feeds " + LocalDateTime.now());
var storage = storageService.allocateStorage(FileStorageType.EXPORT, "feed-export", "Feeds " + LocalDateTime.now());
if (storage == null) yield new Error("Bad storage id");
yield new Run(crawlId, storage.id());

View File

@ -5,11 +5,8 @@ import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.actor.prototype.RecordActorPrototype;
import nu.marginalia.actor.state.ActorStep;
import nu.marginalia.extractor.ExporterIf;
import nu.marginalia.extractor.FeedExporter;
import nu.marginalia.extractor.SampleDataExporter;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageState;
import nu.marginalia.storage.model.FileStorageType;
@ -30,8 +27,7 @@ public class ExportSampleDataActor extends RecordActorPrototype {
public ActorStep transition(ActorStep self) throws Exception {
return switch(self) {
case Export(FileStorageId crawlId, int size, String name) -> {
var storageBase = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var storage = storageService.allocateTemporaryStorage(storageBase, FileStorageType.EXPORT,
var storage = storageService.allocateStorage(FileStorageType.EXPORT,
"crawl-sample-export",
STR."Crawl Data Sample \{name}/\{size} \{LocalDateTime.now()}"
);

View File

@ -8,7 +8,6 @@ import nu.marginalia.actor.state.ActorStep;
import nu.marginalia.extractor.ExporterIf;
import nu.marginalia.extractor.TermFrequencyExporter;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageState;
import nu.marginalia.storage.model.FileStorageType;
@ -25,8 +24,7 @@ public class ExportTermFreqActor extends RecordActorPrototype {
public ActorStep transition(ActorStep self) throws Exception {
return switch(self) {
case Export(FileStorageId crawlId) -> {
var storageBase = storageService.getStorageBase(FileStorageBaseType.STORAGE);
var storage = storageService.allocateTemporaryStorage(storageBase, FileStorageType.EXPORT, "term-freq-export", "Term Frequencies " + LocalDateTime.now());
var storage = storageService.allocateStorage(FileStorageType.EXPORT, "term-freq-export", "Term Frequencies " + LocalDateTime.now());
if (storage == null) yield new Error("Bad storage id");
yield new Run(crawlId, storage.id());

View File

@ -71,6 +71,8 @@ public class ExecutorSvc extends Service {
Spark.post("/sideload/stackexchange", sideloadService::sideloadStackexchange);
Spark.post("/sideload/encyclopedia", sideloadService::sideloadEncyclopedia);
Spark.post("/action/download-sample-data", sideloadService::downloadSampleData);
Spark.post("/export/atags", exportService::exportAtags);
Spark.post("/export/sample-data", exportService::exportSampleData);
Spark.post("/export/feeds", exportService::exportFeeds);

View File

@ -5,8 +5,11 @@ import nu.marginalia.WmsaHome;
import nu.marginalia.actor.ExecutorActor;
import nu.marginalia.actor.ExecutorActorControlService;
import nu.marginalia.actor.task.ConvertActor;
import nu.marginalia.actor.task.DownloadSampleActor;
import nu.marginalia.executor.upload.UploadDirContents;
import nu.marginalia.executor.upload.UploadDirItem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
import spark.Response;
@ -18,6 +21,7 @@ import java.util.List;
public class SideloadService {
private final ExecutorActorControlService actorControlService;
private static final Logger logger = LoggerFactory.getLogger(SideloadService.class);
@Inject
public SideloadService(ExecutorActorControlService actorControlService) {
@ -56,4 +60,11 @@ public class SideloadService {
}
public Object downloadSampleData(Request request, Response response) throws Exception {
String sampleSet = request.queryParams("set");
actorControlService.startFrom(ExecutorActor.DOWNLOAD_SAMPLE, new DownloadSampleActor.Run(sampleSet));
return "";
}
}

View File

@ -15,7 +15,6 @@ import nu.marginalia.mq.persistence.MqPersistence;
import nu.marginalia.process.log.WorkLog;
import nu.marginalia.service.module.ServiceConfiguration;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageType;
import org.apache.commons.io.FileUtils;
@ -187,8 +186,7 @@ public class TransferService {
// Ensure crawl data exists to receive into
if (storages.isEmpty()) {
var storage = fileStorageService.allocateTemporaryStorage(
fileStorageService.getStorageBase(FileStorageBaseType.STORAGE),
var storage = fileStorageService.allocateStorage(
FileStorageType.CRAWL_DATA,
"crawl-data",
"Crawl Data"

View File

@ -5,7 +5,6 @@ import com.github.luben.zstd.ZstdOutputStream;
import nu.marginalia.IndexLocations;
import nu.marginalia.service.control.ServiceHeartbeat;
import nu.marginalia.storage.FileStorageService;
import nu.marginalia.storage.model.FileStorageBaseType;
import nu.marginalia.storage.model.FileStorageId;
import nu.marginalia.storage.model.FileStorageType;
import nu.marginallia.index.journal.IndexJournalFileNames;
@ -45,11 +44,9 @@ public class BackupService {
* This backup can later be dehydrated and quickly loaded into _LIVE.
* */
public void createBackupFromStaging(List<FileStorageId> associatedIds) throws SQLException, IOException {
var backupBase = storageService.getStorageBase(FileStorageBaseType.BACKUP);
String desc = "Pre-load backup snapshot " + LocalDateTime.now();
var backupStorage = storageService.allocateTemporaryStorage(backupBase,
var backupStorage = storageService.allocateStorage(
FileStorageType.BACKUP, "snapshot", desc);
for (var associatedId : associatedIds) {