From d56b394bcc1209b6d53ac63bc0a3e327d919096d Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Wed, 10 Jan 2024 12:13:30 +0100 Subject: [PATCH] (control) GUI for loading external WARC files --- .../executor/client/ExecutorClient.java | 6 ++++ .../sideload/warc/WarcSideloadFactory.java | 19 ++++++++-- .../node/svc/ControlNodeActionsService.java | 19 ++++++++++ .../templates/control/node/node-actions.hdb | 36 ++++++++++++++++++- .../nu/marginalia/executor/ExecutorSvc.java | 1 + .../executor/svc/SideloadService.java | 5 ++- 6 files changed, 82 insertions(+), 4 deletions(-) diff --git a/code/api/executor-api/src/main/java/nu/marginalia/executor/client/ExecutorClient.java b/code/api/executor-api/src/main/java/nu/marginalia/executor/client/ExecutorClient.java index 2f6abfd5..8ce5c951 100644 --- a/code/api/executor-api/src/main/java/nu/marginalia/executor/client/ExecutorClient.java +++ b/code/api/executor-api/src/main/java/nu/marginalia/executor/client/ExecutorClient.java @@ -75,6 +75,12 @@ public class ExecutorClient extends AbstractDynamicClient { "").blockingSubscribe(); } + public void sideloadWarc(Context ctx, int node, Path sourcePath) { + post(ctx, node, + "/sideload/warc?path="+ URLEncoder.encode(sourcePath.toString(), StandardCharsets.UTF_8), + "").blockingSubscribe(); + } + public void sideloadStackexchange(Context ctx, int node, Path sourcePath) { post(ctx, node, "/sideload/stackexchange?path="+URLEncoder.encode(sourcePath.toString(), StandardCharsets.UTF_8), diff --git a/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/warc/WarcSideloadFactory.java b/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/warc/WarcSideloadFactory.java index 35fb6d3a..f192a961 100644 --- a/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/warc/WarcSideloadFactory.java +++ b/code/processes/converting-process/src/main/java/nu/marginalia/converting/sideload/warc/WarcSideloadFactory.java @@ -1,6 +1,8 @@ package nu.marginalia.converting.sideload.warc; +import com.google.inject.Inject; import nu.marginalia.converting.sideload.SideloadSource; +import nu.marginalia.converting.sideload.SideloaderProcessing; import java.io.IOException; import java.nio.file.Files; @@ -11,6 +13,13 @@ import java.util.List; public class WarcSideloadFactory { + private final SideloaderProcessing processing; + + @Inject + public WarcSideloadFactory(SideloaderProcessing processing) { + this.processing = processing; + } + public Collection createSideloaders(Path pathToWarcFiles) throws IOException { final List files = new ArrayList<>(); @@ -21,8 +30,14 @@ public class WarcSideloadFactory { .forEach(files::add); } - // stub - return null; + + List sources = new ArrayList<>(); + + for (Path file : files) { + sources.add(new WarcSideloader(file, processing)); + } + + return sources; } private boolean isWarcFile(Path path) { diff --git a/code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlNodeActionsService.java b/code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlNodeActionsService.java index 9cd11fb4..6e97b7e6 100644 --- a/code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlNodeActionsService.java +++ b/code/services-core/control-service/src/main/java/nu/marginalia/control/node/svc/ControlNodeActionsService.java @@ -46,6 +46,9 @@ public class ControlNodeActionsService { Spark.post("/public/nodes/:node/actions/sideload-dirtree", this::sideloadDirtree, redirectControl.renderRedirectAcknowledgement("Sideloading", "..") ); + Spark.post("/public/nodes/:node/actions/sideload-warc", this::sideloadWarc, + redirectControl.renderRedirectAcknowledgement("Sideloading", "..") + ); Spark.post("/public/nodes/:node/actions/sideload-stackexchange", this::sideloadStackexchange, redirectControl.renderRedirectAcknowledgement("Sideloading", "..") ); @@ -86,6 +89,22 @@ public class ControlNodeActionsService { return ""; } + public Object sideloadWarc(Request request, Response response) throws Exception { + + Path sourcePath = Path.of(request.queryParams("source")); + if (!Files.exists(sourcePath)) { + Spark.halt(404); + return "No such file " + sourcePath; + } + + final int nodeId = Integer.parseInt(request.params("node")); + + eventLog.logEvent("USER-ACTION", "SIDELOAD WARC " + nodeId); + + executorClient.sideloadWarc(Context.fromRequest(request), nodeId, sourcePath); + + return ""; + } public Object sideloadStackexchange(Request request, Response response) throws Exception { Path sourcePath = Path.of(request.queryParams("source")); diff --git a/code/services-core/control-service/src/main/resources/templates/control/node/node-actions.hdb b/code/services-core/control-service/src/main/resources/templates/control/node/node-actions.hdb index f83b8034..bb886f03 100644 --- a/code/services-core/control-service/src/main/resources/templates/control/node/node-actions.hdb +++ b/code/services-core/control-service/src/main/resources/templates/control/node/node-actions.hdb @@ -269,12 +269,13 @@ Sideload Dirtree +
This will load HTML from a directory structure as specified by a YAML file.
- +
@@ -287,8 +288,41 @@
+
+
+

+ +

+ +
+ This will load HTML from a directory structure containing WARC files, one per domain. + +
+
+ + +
+
+ +
+
+ +
+
+
+
+
+ +

diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorSvc.java b/code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorSvc.java index cc8269c4..7817c2eb 100644 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorSvc.java +++ b/code/services-core/executor-service/src/main/java/nu/marginalia/executor/ExecutorSvc.java @@ -63,6 +63,7 @@ public class ExecutorSvc extends Service { Spark.post("/process/adjacency-calculation", processingService::startAdjacencyCalculation); Spark.post("/sideload/dirtree", sideloadService::sideloadDirtree); + Spark.post("/sideload/warc", sideloadService::sideloadWarc); Spark.post("/sideload/stackexchange", sideloadService::sideloadStackexchange); Spark.post("/sideload/encyclopedia", sideloadService::sideloadEncyclopedia); diff --git a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/SideloadService.java b/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/SideloadService.java index a74e70ee..3ef3337b 100644 --- a/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/SideloadService.java +++ b/code/services-core/executor-service/src/main/java/nu/marginalia/executor/svc/SideloadService.java @@ -19,7 +19,10 @@ public class SideloadService { actorControlService.startFrom(ExecutorActor.CONVERT, new ConvertActor.ConvertDirtree(request.queryParams("path"))); return ""; } - + public Object sideloadWarc(Request request, Response response) throws Exception { + actorControlService.startFrom(ExecutorActor.CONVERT, new ConvertActor.ConvertWarc(request.queryParams("path"))); + return ""; + } public Object sideloadEncyclopedia(Request request, Response response) throws Exception { actorControlService.startFrom(ExecutorActor.CONVERT, new ConvertActor.ConvertEncyclopedia(