(control) GUI for loading external WARC files

This commit is contained in:
Viktor Lofgren 2024-01-10 12:13:30 +01:00
parent 55c9501e57
commit d56b394bcc
6 changed files with 82 additions and 4 deletions

View File

@ -75,6 +75,12 @@ public class ExecutorClient extends AbstractDynamicClient {
"").blockingSubscribe();
}
public void sideloadWarc(Context ctx, int node, Path sourcePath) {
post(ctx, node,
"/sideload/warc?path="+ URLEncoder.encode(sourcePath.toString(), StandardCharsets.UTF_8),
"").blockingSubscribe();
}
public void sideloadStackexchange(Context ctx, int node, Path sourcePath) {
post(ctx, node,
"/sideload/stackexchange?path="+URLEncoder.encode(sourcePath.toString(), StandardCharsets.UTF_8),

View File

@ -1,6 +1,8 @@
package nu.marginalia.converting.sideload.warc;
import com.google.inject.Inject;
import nu.marginalia.converting.sideload.SideloadSource;
import nu.marginalia.converting.sideload.SideloaderProcessing;
import java.io.IOException;
import java.nio.file.Files;
@ -11,6 +13,13 @@ import java.util.List;
public class WarcSideloadFactory {
private final SideloaderProcessing processing;
@Inject
public WarcSideloadFactory(SideloaderProcessing processing) {
this.processing = processing;
}
public Collection<? extends SideloadSource> createSideloaders(Path pathToWarcFiles) throws IOException {
final List<Path> files = new ArrayList<>();
@ -21,8 +30,14 @@ public class WarcSideloadFactory {
.forEach(files::add);
}
// stub
return null;
List<WarcSideloader> sources = new ArrayList<>();
for (Path file : files) {
sources.add(new WarcSideloader(file, processing));
}
return sources;
}
private boolean isWarcFile(Path path) {

View File

@ -46,6 +46,9 @@ public class ControlNodeActionsService {
Spark.post("/public/nodes/:node/actions/sideload-dirtree", this::sideloadDirtree,
redirectControl.renderRedirectAcknowledgement("Sideloading", "..")
);
Spark.post("/public/nodes/:node/actions/sideload-warc", this::sideloadWarc,
redirectControl.renderRedirectAcknowledgement("Sideloading", "..")
);
Spark.post("/public/nodes/:node/actions/sideload-stackexchange", this::sideloadStackexchange,
redirectControl.renderRedirectAcknowledgement("Sideloading", "..")
);
@ -86,6 +89,22 @@ public class ControlNodeActionsService {
return "";
}
public Object sideloadWarc(Request request, Response response) throws Exception {
Path sourcePath = Path.of(request.queryParams("source"));
if (!Files.exists(sourcePath)) {
Spark.halt(404);
return "No such file " + sourcePath;
}
final int nodeId = Integer.parseInt(request.params("node"));
eventLog.logEvent("USER-ACTION", "SIDELOAD WARC " + nodeId);
executorClient.sideloadWarc(Context.fromRequest(request), nodeId, sourcePath);
return "";
}
public Object sideloadStackexchange(Request request, Response response) throws Exception {
Path sourcePath = Path.of(request.queryParams("source"));

View File

@ -269,12 +269,13 @@
Sideload Dirtree
</button>
</h2>
<div id="collapseSideloadDirtree" class="accordion-collapse collapse p-3" data-bs-parent="#accordionActions">
This will load HTML from a directory structure as specified by a YAML file.
<form method="post" action="actions/sideload-dirtree" onsubmit="return confirm('Confirm sideloading')">
<div class="my-3 py-3">
<label for="source" class="form-label">yaml file location on server</label>
<label for="source" class="form-label">Path yaml file location on server</label>
<div class="row">
<div class="col">
@ -287,8 +288,41 @@
</div>
</form>
</div>
</div>
<div class="accordion-item">
<h2 class="accordion-header">
<button class="accordion-button collapsed"
type="button"
data-bs-toggle="collapse"
data-bs-target="#collapseSideloadWarc"
aria-expanded="false"
aria-controls="collapseSideloadWarc">
Sideload WARC Files
</button>
</h2>
<div id="collapseSideloadWarc" class="accordion-collapse collapse p-3" data-bs-parent="#accordionActions">
This will load HTML from a directory structure containing WARC files, one per domain.
<form method="post" action="actions/sideload-warc" onsubmit="return confirm('Confirm sideloading')">
<div class="my-3 py-3">
<label for="source" class="form-label">Path WARC file location on server</label>
<div class="row">
<div class="col">
<input id="source" name="source" class="form-control" value="">
</div>
<div class="col">
<button type="submit" class="btn btn-primary">Sideload WARC Files</button>
</div>
</div>
</div>
</form>
</div>
</div>
<div class="accordion-item">
<h2 class="accordion-header">

View File

@ -63,6 +63,7 @@ public class ExecutorSvc extends Service {
Spark.post("/process/adjacency-calculation", processingService::startAdjacencyCalculation);
Spark.post("/sideload/dirtree", sideloadService::sideloadDirtree);
Spark.post("/sideload/warc", sideloadService::sideloadWarc);
Spark.post("/sideload/stackexchange", sideloadService::sideloadStackexchange);
Spark.post("/sideload/encyclopedia", sideloadService::sideloadEncyclopedia);

View File

@ -19,7 +19,10 @@ public class SideloadService {
actorControlService.startFrom(ExecutorActor.CONVERT, new ConvertActor.ConvertDirtree(request.queryParams("path")));
return "";
}
public Object sideloadWarc(Request request, Response response) throws Exception {
actorControlService.startFrom(ExecutorActor.CONVERT, new ConvertActor.ConvertWarc(request.queryParams("path")));
return "";
}
public Object sideloadEncyclopedia(Request request, Response response) throws Exception {
actorControlService.startFrom(ExecutorActor.CONVERT,
new ConvertActor.ConvertEncyclopedia(