(control) Separate [Process] and [Process and Load] actions for crawl data; all SLOW data is deletable.

This commit is contained in:
Viktor Lofgren 2023-08-13 13:39:59 +02:00
parent 8210e49b4e
commit c56ee10185
7 changed files with 32 additions and 4 deletions

View File

@ -145,6 +145,7 @@ public class ControlService extends Service {
Spark.post("/public/storage/:fid/crawl", controlActorService::triggerCrawling, redirectToActors);
Spark.post("/public/storage/:fid/recrawl", controlActorService::triggerRecrawling, redirectToActors);
Spark.post("/public/storage/:fid/process", controlActorService::triggerProcessing, redirectToActors);
Spark.post("/public/storage/:fid/process-and-load", controlActorService::triggerProcessingWithLoad, redirectToActors);
Spark.post("/public/storage/:fid/load", controlActorService::loadProcessedData, redirectToActors);
Spark.post("/public/storage/specs", controlActorService::createCrawlSpecification, redirectToStorage);

View File

@ -14,7 +14,6 @@ public enum Actor {
CRAWL_JOB_EXTRACTOR,
EXPORT_DATA,
TRUNCATE_LINK_DATABASE,
CONVERT;

View File

@ -31,6 +31,7 @@ public class ConvertActor extends AbstractStateGraph {
// STATES
public static final String INITIAL = "INITIAL";
public static final String CONVERT = "CONVERT";
public static final String CONVERT_ENCYCLOPEDIA = "CONVERT_ENCYCLOPEDIA";
public static final String CONVERT_STACKEXCHANGE = "CONVERT_STACKEXCHANGE";
@ -72,6 +73,12 @@ public class ConvertActor extends AbstractStateGraph {
this.gson = gson;
}
@GraphState(name= INITIAL, resume = ResumeBehavior.ERROR,
description = "Pro forma initial state")
public void initial(Integer unused) {
error("This actor does not support the initial state");
}
@GraphState(name = CONVERT,
next = CONVERT_WAIT,
resume = ResumeBehavior.ERROR,

View File

@ -1,6 +1,7 @@
package nu.marginalia.control.model;
import nu.marginalia.db.storage.model.FileStorage;
import nu.marginalia.db.storage.model.FileStorageBaseType;
import nu.marginalia.db.storage.model.FileStorageType;
public record FileStorageWithActions(FileStorage storage) {
@ -18,7 +19,6 @@ public record FileStorageWithActions(FileStorage storage) {
return storage.type() == FileStorageType.CRAWL_DATA;
}
public boolean isDeletable() {
return storage.type() == FileStorageType.PROCESSED_DATA
|| storage.type() == FileStorageType.BACKUP;
return storage.base().type() == FileStorageBaseType.SLOW;
}
}

View File

@ -3,6 +3,7 @@ package nu.marginalia.control.svc;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import nu.marginalia.control.actor.ControlActors;
import nu.marginalia.control.actor.task.ConvertActor;
import nu.marginalia.control.actor.task.CrawlJobExtractorActor;
import nu.marginalia.control.actor.task.ConvertAndLoadActor;
import nu.marginalia.control.actor.task.RecrawlActor;
@ -65,7 +66,18 @@ public class ControlActorService {
);
return "";
}
public Object triggerProcessing(Request request, Response response) throws Exception {
controlActors.startFrom(
Actor.CONVERT,
ConvertActor.CONVERT,
FileStorageId.parse(request.params("fid"))
);
return "";
}
public Object triggerProcessingWithLoad(Request request, Response response) throws Exception {
controlActors.start(
Actor.CONVERT_AND_LOAD,
FileStorageId.parse(request.params("fid"))

View File

@ -74,7 +74,16 @@
<td><button type="submit">Process</button></td>
</tr>
</form>
<form method="post" action="/storage/{{storage.id}}/process-and-load" onsubmit="return confirm('Confirm processing and loading of {{storage.path}}')">
<tr>
<td>Process and load this data into index<br>
then automatically load it into the index and db</td>
<td><button type="submit">Process and load</button></td>
</tr>
</form>
{{/if}}
{{#if isRecrawlable}}
<form method="post" action="/storage/{{storage.id}}/recrawl" onsubmit="return confirm('Confirm re-crawling of {{storage.path}}')">
<tr>

View File

@ -88,7 +88,7 @@ GUI (see step 5).
* Go to `Storage`
* Go to `Crawl Data`
* Find the data set you want to process and click `[Info]`
* Click `[Process]`
* Click `[Process and load]`
This will take anywhere between a few minutes to a few hours depending on which
data set you downloaded. You can monitor the progress from the `Overview` tab