From d6b07e4d01d756331936b00552363312cde7e831 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Fri, 21 Jul 2023 19:56:16 +0200 Subject: [PATCH] (controller) Improve the storage interface --- code/libraries/big-string/readme.md | 4 + .../nu/marginalia/control/ControlService.java | 26 +++++- .../model/FileStorageWithRelatedEntries.java | 10 +++ .../svc/ControlFileStorageService.java | 71 +++++++++++++--- .../main/resources/static/control/style.css | 13 ++- .../resources/templates/control/index.hdb | 2 +- .../control/partials/storage-table.hdb | 34 ++++++++ .../control/partials/storage-types.hdb | 6 ++ .../templates/control/storage-crawls.hdb | 28 +++++++ .../templates/control/storage-details.hdb | 81 +++++++++++++++++++ .../templates/control/storage-overview.hdb | 54 +++++++++++++ .../templates/control/storage-processed.hdb | 26 ++++++ .../templates/control/storage-specs.hdb | 64 +++++++++++++++ .../resources/templates/control/storage.hdb | 78 ------------------ 14 files changed, 406 insertions(+), 91 deletions(-) create mode 100644 code/services-satellite/control-service/src/main/java/nu/marginalia/control/model/FileStorageWithRelatedEntries.java create mode 100644 code/services-satellite/control-service/src/main/resources/templates/control/partials/storage-table.hdb create mode 100644 code/services-satellite/control-service/src/main/resources/templates/control/partials/storage-types.hdb create mode 100644 code/services-satellite/control-service/src/main/resources/templates/control/storage-crawls.hdb create mode 100644 code/services-satellite/control-service/src/main/resources/templates/control/storage-details.hdb create mode 100644 code/services-satellite/control-service/src/main/resources/templates/control/storage-overview.hdb create mode 100644 code/services-satellite/control-service/src/main/resources/templates/control/storage-processed.hdb create mode 100644 code/services-satellite/control-service/src/main/resources/templates/control/storage-specs.hdb delete mode 100644 code/services-satellite/control-service/src/main/resources/templates/control/storage.hdb diff --git a/code/libraries/big-string/readme.md b/code/libraries/big-string/readme.md index 84fab2a2..f03c64ad 100644 --- a/code/libraries/big-string/readme.md +++ b/code/libraries/big-string/readme.md @@ -4,6 +4,10 @@ Microlibrary that offers string compression. This is useful when having to load of HTML documents in memory during conversion. XML has been described as the opposite of a compression scheme, and as a result, HTML compresses ridiculously well. +## Configuration + +If the Java property 'bigstring.disabled' is set to true, the BigString class will not compress strings. + ## Demo ```java diff --git a/code/services-satellite/control-service/src/main/java/nu/marginalia/control/ControlService.java b/code/services-satellite/control-service/src/main/java/nu/marginalia/control/ControlService.java index 82869816..eb43f9cb 100644 --- a/code/services-satellite/control-service/src/main/java/nu/marginalia/control/ControlService.java +++ b/code/services-satellite/control-service/src/main/java/nu/marginalia/control/ControlService.java @@ -5,6 +5,8 @@ import com.google.inject.Inject; import nu.marginalia.client.ServiceMonitors; import nu.marginalia.control.model.Actor; import nu.marginalia.control.svc.*; +import nu.marginalia.db.storage.model.FileStorageId; +import nu.marginalia.db.storage.model.FileStorageType; import nu.marginalia.model.gson.GsonFactory; import nu.marginalia.renderer.RendererFactory; import nu.marginalia.service.server.*; @@ -15,6 +17,7 @@ import spark.Response; import spark.Spark; import java.io.IOException; +import java.sql.SQLException; import java.util.Map; public class ControlService extends Service { @@ -53,7 +56,11 @@ public class ControlService extends Service { var serviceByIdRenderer = rendererFactory.renderer("control/service-by-id"); var actorsRenderer = rendererFactory.renderer("control/actors"); var actorDetailsRenderer = rendererFactory.renderer("control/actor-details"); - var storageRenderer = rendererFactory.renderer("control/storage"); + var storageRenderer = rendererFactory.renderer("control/storage-overview"); + var storageSpecsRenderer = rendererFactory.renderer("control/storage-specs"); + var storageCrawlsRenderer = rendererFactory.renderer("control/storage-crawls"); + var storageProcessedRenderer = rendererFactory.renderer("control/storage-processed"); + var storageDetailsRenderer = rendererFactory.renderer("control/storage-details"); this.controlActorService = controlActorService; @@ -74,6 +81,11 @@ public class ControlService extends Service { Spark.get("/public/actors", this::processesModel, actorsRenderer::render); Spark.get("/public/actors/:fsm", this::actorDetailsModel, actorDetailsRenderer::render); Spark.get("/public/storage", this::storageModel, storageRenderer::render); + Spark.get("/public/storage/specs", this::storageModelSpecs, storageSpecsRenderer::render); + Spark.get("/public/storage/crawls", this::storageModelCrawls, storageCrawlsRenderer::render); + Spark.get("/public/storage/processed", this::storageModelProcessed, storageProcessedRenderer::render); + Spark.get("/public/storage/:id", this::storageDetailsModel, storageDetailsRenderer::render); + final HtmlRedirect redirectToServices = new HtmlRedirect("/services"); final HtmlRedirect redirectToProcesses = new HtmlRedirect("/actors"); @@ -118,6 +130,18 @@ public class ControlService extends Service { return Map.of("storage", controlFileStorageService.getStorageList()); } + private Object storageDetailsModel(Request request, Response response) throws SQLException { + return Map.of("storage", controlFileStorageService.getFileStorageWithRelatedEntries(FileStorageId.parse(request.params("id")))); + } + private Object storageModelSpecs(Request request, Response response) { + return Map.of("storage", controlFileStorageService.getStorageList(FileStorageType.CRAWL_SPEC)); + } + private Object storageModelCrawls(Request request, Response response) { + return Map.of("storage", controlFileStorageService.getStorageList(FileStorageType.CRAWL_DATA)); + } + private Object storageModelProcessed(Request request, Response response) { + return Map.of("storage", controlFileStorageService.getStorageList(FileStorageType.PROCESSED_DATA)); + } private Object servicesModel(Request request, Response response) { return Map.of("services", heartbeatService.getServiceHeartbeats(), "events", eventLogService.getLastEntries(20)); diff --git a/code/services-satellite/control-service/src/main/java/nu/marginalia/control/model/FileStorageWithRelatedEntries.java b/code/services-satellite/control-service/src/main/java/nu/marginalia/control/model/FileStorageWithRelatedEntries.java new file mode 100644 index 00000000..28afba5d --- /dev/null +++ b/code/services-satellite/control-service/src/main/java/nu/marginalia/control/model/FileStorageWithRelatedEntries.java @@ -0,0 +1,10 @@ +package nu.marginalia.control.model; + +import nu.marginalia.db.storage.model.FileStorage; +import nu.marginalia.db.storage.model.FileStorageType; + +import java.util.List; + +public record FileStorageWithRelatedEntries(FileStorageWithActions self, List related) { + +} diff --git a/code/services-satellite/control-service/src/main/java/nu/marginalia/control/svc/ControlFileStorageService.java b/code/services-satellite/control-service/src/main/java/nu/marginalia/control/svc/ControlFileStorageService.java index 982c42e0..db122a7c 100644 --- a/code/services-satellite/control-service/src/main/java/nu/marginalia/control/svc/ControlFileStorageService.java +++ b/code/services-satellite/control-service/src/main/java/nu/marginalia/control/svc/ControlFileStorageService.java @@ -4,15 +4,9 @@ import com.google.inject.Inject; import com.google.inject.Singleton; import com.zaxxer.hikari.HikariDataSource; import lombok.SneakyThrows; -import nu.marginalia.control.model.FileStorageBaseWithStorage; -import nu.marginalia.control.model.FileStorageWithActions; -import nu.marginalia.control.model.ProcessHeartbeat; -import nu.marginalia.control.model.ServiceHeartbeat; +import nu.marginalia.control.model.*; import nu.marginalia.db.storage.FileStorageService; -import nu.marginalia.db.storage.model.FileStorage; -import nu.marginalia.db.storage.model.FileStorageBase; -import nu.marginalia.db.storage.model.FileStorageBaseId; -import nu.marginalia.db.storage.model.FileStorageId; +import nu.marginalia.db.storage.model.*; import spark.Request; import spark.Response; @@ -49,9 +43,17 @@ public class ControlFileStorageService { @SneakyThrows public List getStorageList() { - Map fileStorageBaseByBaseId = new HashMap<>(); - Map> fileStoragByBaseId = new HashMap<>(); + var storageIds = getFileStorageIds(); + return makeFileStorageBaseWithStorage(storageIds); + } + @SneakyThrows + public List getStorageList(FileStorageType type) { + var storageIds = getFileStorageIds(type); + return makeFileStorageBaseWithStorage(storageIds); + } + + private List getFileStorageIds() throws SQLException { List storageIds = new ArrayList<>(); try (var conn = dataSource.getConnection(); @@ -62,6 +64,29 @@ public class ControlFileStorageService { } } + return storageIds; + } + + private List getFileStorageIds(FileStorageType type) throws SQLException { + List storageIds = new ArrayList<>(); + + try (var conn = dataSource.getConnection(); + var storageByIdStmt = conn.prepareStatement("SELECT ID FROM FILE_STORAGE WHERE TYPE = ?")) { + storageByIdStmt.setString(1, type.name()); + var rs = storageByIdStmt.executeQuery(); + while (rs.next()) { + storageIds.add(new FileStorageId(rs.getLong("ID"))); + } + } + + return storageIds; + } + + private List makeFileStorageBaseWithStorage(List storageIds) throws SQLException { + + Map fileStorageBaseByBaseId = new HashMap<>(); + Map> fileStoragByBaseId = new HashMap<>(); + for (var id : storageIds) { var storage = fileStorageService.getStorage(id); fileStorageBaseByBaseId.computeIfAbsent(storage.base().id(), k -> storage.base()); @@ -79,5 +104,31 @@ public class ControlFileStorageService { return result; } + public FileStorageWithRelatedEntries getFileStorageWithRelatedEntries(FileStorageId id) throws SQLException { + var storage = fileStorageService.getStorage(id); + var related = getRelatedEntries(id); + return new FileStorageWithRelatedEntries(new FileStorageWithActions(storage), related); + } + private List getRelatedEntries(FileStorageId id) { + List ret = new ArrayList<>(); + try (var conn = dataSource.getConnection(); + var relatedIds = conn.prepareStatement(""" + (SELECT SOURCE_ID AS ID FROM FILE_STORAGE_RELATION WHERE TARGET_ID = ?) + UNION + (SELECT TARGET_ID AS ID FROM FILE_STORAGE_RELATION WHERE SOURCE_ID = ?) + """)) + { + + relatedIds.setLong(1, id.id()); + relatedIds.setLong(2, id.id()); + var rs = relatedIds.executeQuery(); + while (rs.next()) { + ret.add(fileStorageService.getStorage(new FileStorageId(rs.getLong("ID")))); + } + } catch (SQLException throwables) { + throwables.printStackTrace(); + } + return ret; + } } diff --git a/code/services-satellite/control-service/src/main/resources/static/control/style.css b/code/services-satellite/control-service/src/main/resources/static/control/style.css index e4be767f..4056c91e 100644 --- a/code/services-satellite/control-service/src/main/resources/static/control/style.css +++ b/code/services-satellite/control-service/src/main/resources/static/control/style.css @@ -8,6 +8,17 @@ body { grid-template-areas: "left right"; } +section nav.tabs > a { + color: #000; + text-decoration: none; + background-color: #ccc; + padding: 0.5ch; + border-radius: .5ch; +} +section nav.tabs a.selected { + background-color: #eee; +} + .toggle-switch-off { border-left: 5px solid #f00; width: 8ch; @@ -37,7 +48,7 @@ table { } th { text-align: left; } td,th { padding-right: 1ch; border: 1px solid #ccc; } -tr:nth-last-of-type(2n) { +tr:nth-of-type(2n) { background-color: #eee; } body > nav { diff --git a/code/services-satellite/control-service/src/main/resources/templates/control/index.hdb b/code/services-satellite/control-service/src/main/resources/templates/control/index.hdb index b1034529..5e72a451 100644 --- a/code/services-satellite/control-service/src/main/resources/templates/control/index.hdb +++ b/code/services-satellite/control-service/src/main/resources/templates/control/index.hdb @@ -11,5 +11,5 @@

Overview

- + diff --git a/code/services-satellite/control-service/src/main/resources/templates/control/partials/storage-table.hdb b/code/services-satellite/control-service/src/main/resources/templates/control/partials/storage-table.hdb new file mode 100644 index 00000000..9be012e5 --- /dev/null +++ b/code/services-satellite/control-service/src/main/resources/templates/control/partials/storage-table.hdb @@ -0,0 +1,34 @@ + + {{#each storage}} + + + + + + + + + + + + + + + + + + + + + {{#each storage}} + + + + + + + {{/each}} + {{/each}} +
TypeNamePathMust CleanPermit Temp
{{base.type}}{{base.name}}{{base.path}}{{base.mustClean}}{{base.permitTemp}}
TypePathDescription
+ Info + {{storage.type}}{{storage.path}}{{storage.description}}
\ No newline at end of file diff --git a/code/services-satellite/control-service/src/main/resources/templates/control/partials/storage-types.hdb b/code/services-satellite/control-service/src/main/resources/templates/control/partials/storage-types.hdb new file mode 100644 index 00000000..575797f9 --- /dev/null +++ b/code/services-satellite/control-service/src/main/resources/templates/control/partials/storage-types.hdb @@ -0,0 +1,6 @@ + \ No newline at end of file diff --git a/code/services-satellite/control-service/src/main/resources/templates/control/storage-crawls.hdb b/code/services-satellite/control-service/src/main/resources/templates/control/storage-crawls.hdb new file mode 100644 index 00000000..627072a3 --- /dev/null +++ b/code/services-satellite/control-service/src/main/resources/templates/control/storage-crawls.hdb @@ -0,0 +1,28 @@ + + + + Control Service + + + + + {{> control/partials/nav}} +
+ {{> control/partials/storage-types}} +

Crawl Data

+ {{> control/partials/storage-table}} + +

About

+

Crawl data is the content of websites that have been downloaded by the crawler.

+

Crawl data can be turned into processed data, and loaded into the index to make + it searchable.

+
+ + + + + \ No newline at end of file diff --git a/code/services-satellite/control-service/src/main/resources/templates/control/storage-details.hdb b/code/services-satellite/control-service/src/main/resources/templates/control/storage-details.hdb new file mode 100644 index 00000000..9038d510 --- /dev/null +++ b/code/services-satellite/control-service/src/main/resources/templates/control/storage-details.hdb @@ -0,0 +1,81 @@ + + + + Control Service + + + + + {{> control/partials/nav}} +
+ {{> control/partials/storage-types}} +

Storage Details

+ {{#with storage.self.storage}} + + + + + + + + + + + +
TypePathDetails
{{type}}{{path}}{{description}}
+ {{/with}} +

Actions

+ {{#with storage.self}} + {{#if isCrawlable}} +
+ Perform a full re-crawl of this data:
+
+ {{/if}} + {{#if isLoadable}} +
+ Load this data into index:
+
+ {{/if}} + {{#if isConvertible}} +
+ Process and load this data into index:
+
+ {{/if}} + {{#if isRecrawlable}} +
+ Perform a re-crawl of this data:
+
+ {{/if}} + {{#if isDeletable}} +
+ Delete this data:
+
+ {{/if}} + {{/with}} + {{#if storage.related}} +

Related

+ + + + + + + {{#each storage.related}} + + + + + + {{/each}} +
TypePathDetails
{{type}}{{path}}{{description}}
+ {{/if}} +
+ + + + + \ No newline at end of file diff --git a/code/services-satellite/control-service/src/main/resources/templates/control/storage-overview.hdb b/code/services-satellite/control-service/src/main/resources/templates/control/storage-overview.hdb new file mode 100644 index 00000000..7d978fb9 --- /dev/null +++ b/code/services-satellite/control-service/src/main/resources/templates/control/storage-overview.hdb @@ -0,0 +1,54 @@ + + + + Control Service + + + + + {{> control/partials/nav}} +
+ {{> control/partials/storage-types}} +

Storage

+ + {{#each storage}} + + + + + + + + + + + + + + + + + + + + + {{#each storage}} + + + + + + + {{/each}} + {{/each}} +
TypeNamePathMust CleanPermit Temp
{{base.type}}{{base.name}}{{base.path}}{{base.mustClean}}{{base.permitTemp}}
TypePathDescription
+ {{storage.type}}{{storage.path}}{{storage.description}}
+
+ + + + \ No newline at end of file diff --git a/code/services-satellite/control-service/src/main/resources/templates/control/storage-processed.hdb b/code/services-satellite/control-service/src/main/resources/templates/control/storage-processed.hdb new file mode 100644 index 00000000..9a0da6c7 --- /dev/null +++ b/code/services-satellite/control-service/src/main/resources/templates/control/storage-processed.hdb @@ -0,0 +1,26 @@ + + + + Control Service + + + + + {{> control/partials/nav}} +
+ {{> control/partials/storage-types}} +

Processed Data

+ {{> control/partials/storage-table}} + +

About

+

Processed data is crawl data that has been analyzed, and had its keywords extracted, + and is ready to be loaded into the index.

+
+ + + + \ No newline at end of file diff --git a/code/services-satellite/control-service/src/main/resources/templates/control/storage-specs.hdb b/code/services-satellite/control-service/src/main/resources/templates/control/storage-specs.hdb new file mode 100644 index 00000000..c1e64963 --- /dev/null +++ b/code/services-satellite/control-service/src/main/resources/templates/control/storage-specs.hdb @@ -0,0 +1,64 @@ + + + + Control Service + + + + + {{> control/partials/nav}} +
+ {{> control/partials/storage-types}} + +

Crawl Specifications

+ {{> control/partials/storage-table}} + +

About

+ +

Crawling specifications are a work order for the crawler, in essence a list of domains that are to be crawled, + combined with a list of known URLs for each domain, and instructions on how deep to crawl. The crawler requires + a specification in order to understand what to do. +

+

+ A crawling specification can either be generated from the links in the database, or from a list of domains + provided via a URL that links to a text file. +

+

Create New Specification

+ +

To create a new specification fill out the form below.

+
+
+
+
+

(This is how you'll be able to find the + specification later so give it a good and descriptive name)

+ +

Source

+
+
+ +
+ +
+
+ +
+ + + + \ No newline at end of file diff --git a/code/services-satellite/control-service/src/main/resources/templates/control/storage.hdb b/code/services-satellite/control-service/src/main/resources/templates/control/storage.hdb deleted file mode 100644 index 7f748489..00000000 --- a/code/services-satellite/control-service/src/main/resources/templates/control/storage.hdb +++ /dev/null @@ -1,78 +0,0 @@ - - - - Control Service - - - - - {{> control/partials/nav}} -
-

Storage

- - {{#each storage}} - - - - - - - - - - - - - - - - - - - - - {{#each storage}} - - - - - - - {{/each}} - {{/each}} -
TypeNamePathMust CleanPermit Temp
{{base.type}}{{base.name}}{{base.path}}{{base.mustClean}}{{base.permitTemp}}
TypePathDescription
- {{#if isCrawlable}} -
- -
- {{/if}} - {{#if isLoadable}} -
- -
- {{/if}} - {{#if isConvertible}} -
- -
- {{/if}} - {{#if isRecrawlable}} -
- -
- {{/if}} - {{#if isDeletable}} -
- -
- {{/if}} -
{{storage.type}}{{storage.path}}{{storage.description}}
-
- - - - \ No newline at end of file