(controller) Improve the storage interface

This commit is contained in:
Viktor Lofgren 2023-07-21 19:56:16 +02:00
parent 995657c6ce
commit d6b07e4d01
14 changed files with 406 additions and 91 deletions

View File

@ -4,6 +4,10 @@ Microlibrary that offers string compression. This is useful when having to load
of HTML documents in memory during conversion. XML has been described as the opposite of a compression scheme, of HTML documents in memory during conversion. XML has been described as the opposite of a compression scheme,
and as a result, HTML compresses ridiculously well. and as a result, HTML compresses ridiculously well.
## Configuration
If the Java property 'bigstring.disabled' is set to true, the BigString class will not compress strings.
## Demo ## Demo
```java ```java

View File

@ -5,6 +5,8 @@ import com.google.inject.Inject;
import nu.marginalia.client.ServiceMonitors; import nu.marginalia.client.ServiceMonitors;
import nu.marginalia.control.model.Actor; import nu.marginalia.control.model.Actor;
import nu.marginalia.control.svc.*; import nu.marginalia.control.svc.*;
import nu.marginalia.db.storage.model.FileStorageId;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.model.gson.GsonFactory; import nu.marginalia.model.gson.GsonFactory;
import nu.marginalia.renderer.RendererFactory; import nu.marginalia.renderer.RendererFactory;
import nu.marginalia.service.server.*; import nu.marginalia.service.server.*;
@ -15,6 +17,7 @@ import spark.Response;
import spark.Spark; import spark.Spark;
import java.io.IOException; import java.io.IOException;
import java.sql.SQLException;
import java.util.Map; import java.util.Map;
public class ControlService extends Service { public class ControlService extends Service {
@ -53,7 +56,11 @@ public class ControlService extends Service {
var serviceByIdRenderer = rendererFactory.renderer("control/service-by-id"); var serviceByIdRenderer = rendererFactory.renderer("control/service-by-id");
var actorsRenderer = rendererFactory.renderer("control/actors"); var actorsRenderer = rendererFactory.renderer("control/actors");
var actorDetailsRenderer = rendererFactory.renderer("control/actor-details"); var actorDetailsRenderer = rendererFactory.renderer("control/actor-details");
var storageRenderer = rendererFactory.renderer("control/storage"); var storageRenderer = rendererFactory.renderer("control/storage-overview");
var storageSpecsRenderer = rendererFactory.renderer("control/storage-specs");
var storageCrawlsRenderer = rendererFactory.renderer("control/storage-crawls");
var storageProcessedRenderer = rendererFactory.renderer("control/storage-processed");
var storageDetailsRenderer = rendererFactory.renderer("control/storage-details");
this.controlActorService = controlActorService; this.controlActorService = controlActorService;
@ -74,6 +81,11 @@ public class ControlService extends Service {
Spark.get("/public/actors", this::processesModel, actorsRenderer::render); Spark.get("/public/actors", this::processesModel, actorsRenderer::render);
Spark.get("/public/actors/:fsm", this::actorDetailsModel, actorDetailsRenderer::render); Spark.get("/public/actors/:fsm", this::actorDetailsModel, actorDetailsRenderer::render);
Spark.get("/public/storage", this::storageModel, storageRenderer::render); Spark.get("/public/storage", this::storageModel, storageRenderer::render);
Spark.get("/public/storage/specs", this::storageModelSpecs, storageSpecsRenderer::render);
Spark.get("/public/storage/crawls", this::storageModelCrawls, storageCrawlsRenderer::render);
Spark.get("/public/storage/processed", this::storageModelProcessed, storageProcessedRenderer::render);
Spark.get("/public/storage/:id", this::storageDetailsModel, storageDetailsRenderer::render);
final HtmlRedirect redirectToServices = new HtmlRedirect("/services"); final HtmlRedirect redirectToServices = new HtmlRedirect("/services");
final HtmlRedirect redirectToProcesses = new HtmlRedirect("/actors"); final HtmlRedirect redirectToProcesses = new HtmlRedirect("/actors");
@ -118,6 +130,18 @@ public class ControlService extends Service {
return Map.of("storage", controlFileStorageService.getStorageList()); return Map.of("storage", controlFileStorageService.getStorageList());
} }
private Object storageDetailsModel(Request request, Response response) throws SQLException {
return Map.of("storage", controlFileStorageService.getFileStorageWithRelatedEntries(FileStorageId.parse(request.params("id"))));
}
private Object storageModelSpecs(Request request, Response response) {
return Map.of("storage", controlFileStorageService.getStorageList(FileStorageType.CRAWL_SPEC));
}
private Object storageModelCrawls(Request request, Response response) {
return Map.of("storage", controlFileStorageService.getStorageList(FileStorageType.CRAWL_DATA));
}
private Object storageModelProcessed(Request request, Response response) {
return Map.of("storage", controlFileStorageService.getStorageList(FileStorageType.PROCESSED_DATA));
}
private Object servicesModel(Request request, Response response) { private Object servicesModel(Request request, Response response) {
return Map.of("services", heartbeatService.getServiceHeartbeats(), return Map.of("services", heartbeatService.getServiceHeartbeats(),
"events", eventLogService.getLastEntries(20)); "events", eventLogService.getLastEntries(20));

View File

@ -0,0 +1,10 @@
package nu.marginalia.control.model;
import nu.marginalia.db.storage.model.FileStorage;
import nu.marginalia.db.storage.model.FileStorageType;
import java.util.List;
public record FileStorageWithRelatedEntries(FileStorageWithActions self, List<FileStorage> related) {
}

View File

@ -4,15 +4,9 @@ import com.google.inject.Inject;
import com.google.inject.Singleton; import com.google.inject.Singleton;
import com.zaxxer.hikari.HikariDataSource; import com.zaxxer.hikari.HikariDataSource;
import lombok.SneakyThrows; import lombok.SneakyThrows;
import nu.marginalia.control.model.FileStorageBaseWithStorage; import nu.marginalia.control.model.*;
import nu.marginalia.control.model.FileStorageWithActions;
import nu.marginalia.control.model.ProcessHeartbeat;
import nu.marginalia.control.model.ServiceHeartbeat;
import nu.marginalia.db.storage.FileStorageService; import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.db.storage.model.FileStorage; import nu.marginalia.db.storage.model.*;
import nu.marginalia.db.storage.model.FileStorageBase;
import nu.marginalia.db.storage.model.FileStorageBaseId;
import nu.marginalia.db.storage.model.FileStorageId;
import spark.Request; import spark.Request;
import spark.Response; import spark.Response;
@ -49,9 +43,17 @@ public class ControlFileStorageService {
@SneakyThrows @SneakyThrows
public List<FileStorageBaseWithStorage> getStorageList() { public List<FileStorageBaseWithStorage> getStorageList() {
Map<FileStorageBaseId, FileStorageBase> fileStorageBaseByBaseId = new HashMap<>(); var storageIds = getFileStorageIds();
Map<FileStorageBaseId, List<FileStorageWithActions>> fileStoragByBaseId = new HashMap<>(); return makeFileStorageBaseWithStorage(storageIds);
}
@SneakyThrows
public List<FileStorageBaseWithStorage> getStorageList(FileStorageType type) {
var storageIds = getFileStorageIds(type);
return makeFileStorageBaseWithStorage(storageIds);
}
private List<FileStorageId> getFileStorageIds() throws SQLException {
List<FileStorageId> storageIds = new ArrayList<>(); List<FileStorageId> storageIds = new ArrayList<>();
try (var conn = dataSource.getConnection(); try (var conn = dataSource.getConnection();
@ -62,6 +64,29 @@ public class ControlFileStorageService {
} }
} }
return storageIds;
}
private List<FileStorageId> getFileStorageIds(FileStorageType type) throws SQLException {
List<FileStorageId> storageIds = new ArrayList<>();
try (var conn = dataSource.getConnection();
var storageByIdStmt = conn.prepareStatement("SELECT ID FROM FILE_STORAGE WHERE TYPE = ?")) {
storageByIdStmt.setString(1, type.name());
var rs = storageByIdStmt.executeQuery();
while (rs.next()) {
storageIds.add(new FileStorageId(rs.getLong("ID")));
}
}
return storageIds;
}
private List<FileStorageBaseWithStorage> makeFileStorageBaseWithStorage(List<FileStorageId> storageIds) throws SQLException {
Map<FileStorageBaseId, FileStorageBase> fileStorageBaseByBaseId = new HashMap<>();
Map<FileStorageBaseId, List<FileStorageWithActions>> fileStoragByBaseId = new HashMap<>();
for (var id : storageIds) { for (var id : storageIds) {
var storage = fileStorageService.getStorage(id); var storage = fileStorageService.getStorage(id);
fileStorageBaseByBaseId.computeIfAbsent(storage.base().id(), k -> storage.base()); fileStorageBaseByBaseId.computeIfAbsent(storage.base().id(), k -> storage.base());
@ -79,5 +104,31 @@ public class ControlFileStorageService {
return result; return result;
} }
public FileStorageWithRelatedEntries getFileStorageWithRelatedEntries(FileStorageId id) throws SQLException {
var storage = fileStorageService.getStorage(id);
var related = getRelatedEntries(id);
return new FileStorageWithRelatedEntries(new FileStorageWithActions(storage), related);
}
private List<FileStorage> getRelatedEntries(FileStorageId id) {
List<FileStorage> ret = new ArrayList<>();
try (var conn = dataSource.getConnection();
var relatedIds = conn.prepareStatement("""
(SELECT SOURCE_ID AS ID FROM FILE_STORAGE_RELATION WHERE TARGET_ID = ?)
UNION
(SELECT TARGET_ID AS ID FROM FILE_STORAGE_RELATION WHERE SOURCE_ID = ?)
"""))
{
relatedIds.setLong(1, id.id());
relatedIds.setLong(2, id.id());
var rs = relatedIds.executeQuery();
while (rs.next()) {
ret.add(fileStorageService.getStorage(new FileStorageId(rs.getLong("ID"))));
}
} catch (SQLException throwables) {
throwables.printStackTrace();
}
return ret;
}
} }

View File

@ -8,6 +8,17 @@ body {
grid-template-areas: grid-template-areas:
"left right"; "left right";
} }
section nav.tabs > a {
color: #000;
text-decoration: none;
background-color: #ccc;
padding: 0.5ch;
border-radius: .5ch;
}
section nav.tabs a.selected {
background-color: #eee;
}
.toggle-switch-off { .toggle-switch-off {
border-left: 5px solid #f00; border-left: 5px solid #f00;
width: 8ch; width: 8ch;
@ -37,7 +48,7 @@ table {
} }
th { text-align: left; } th { text-align: left; }
td,th { padding-right: 1ch; border: 1px solid #ccc; } td,th { padding-right: 1ch; border: 1px solid #ccc; }
tr:nth-last-of-type(2n) { tr:nth-of-type(2n) {
background-color: #eee; background-color: #eee;
} }
body > nav { body > nav {

View File

@ -11,5 +11,5 @@
<h1>Overview</h1> <h1>Overview</h1>
</section> </section>
</body> </body>
<script src="/refresh.js" /></script> <script src="/refresh.js"></script>
</html> </html>

View File

@ -0,0 +1,34 @@
<table>
{{#each storage}}
<tr>
<th>Type</th>
<th>Name</th>
<th>Path</th>
<th>Must Clean</th>
<th>Permit Temp</th>
</tr>
<tr>
<td>{{base.type}}</td>
<td>{{base.name}}</td>
<td>{{base.path}}</td>
<td>{{base.mustClean}}</td>
<td>{{base.permitTemp}}</td>
</tr>
<tr>
<th></th>
<th>Type</th>
<th colspan="2">Path</th>
<th>Description</th>
</tr>
{{#each storage}}
<tr>
<td>
<a href="/storage/{{storage.id}}">Info</a>
</td>
<td>{{storage.type}}</td>
<td colspan="2">{{storage.path}}</td>
<td>{{storage.description}}</td>
</tr>
{{/each}}
{{/each}}
</table>

View File

@ -0,0 +1,6 @@
<nav class="tabs">
<a href="/storage">Overview</a>
<a href="/storage/specs">Specifications</a>
<a href="/storage/crawls">Crawl Data</a>
<a href="/storage/processed">Processed Data</a>
</nav>

View File

@ -0,0 +1,28 @@
<!DOCTYPE html>
<html>
<head>
<title>Control Service</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="stylesheet" href="/style.css" />
</head>
<body>
{{> control/partials/nav}}
<section>
{{> control/partials/storage-types}}
<h1>Crawl Data</h1>
{{> control/partials/storage-table}}
<h2>About</h2>
<p>Crawl data is the content of websites that have been downloaded by the crawler.</p>
<p>Crawl data can be turned into processed data, and loaded into the index to make
it searchable.</p>
</section>
</body>
<script src="/refresh.js"></script>
<script>
window.setInterval(() => {
refresh(["storage"]);
}, 30000);
</script>
</html>

View File

@ -0,0 +1,81 @@
<!DOCTYPE html>
<html>
<head>
<title>Control Service</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="stylesheet" href="/style.css" />
</head>
<body>
{{> control/partials/nav}}
<section>
{{> control/partials/storage-types}}
<h1>Storage Details</h1>
{{#with storage.self.storage}}
<table>
<tr>
<th>Type</th>
<th>Path</th>
<th>Details</th>
</tr>
<tr>
<td>{{type}}</td>
<td>{{path}}</td>
<td>{{description}}</td>
</tr>
</table>
{{/with}}
<h2>Actions</h2>
{{#with storage.self}}
{{#if isCrawlable}}
<form method="post" action="/storage/{{storage.id}}/crawl">
Perform a full re-crawl of this data: <button type="submit">Crawl</button> <br>
</form>
{{/if}}
{{#if isLoadable}}
<form method="post" action="/storage/{{storage.id}}/load">
Load this data into index: <button type="submit">Load</button> <br>
</form>
{{/if}}
{{#if isConvertible}}
<form method="post" action="/storage/{{storage.id}}/process">
Process and load this data into index: <button type="submit">Process</button> <br>
</form>
{{/if}}
{{#if isRecrawlable}}
<form method="post" action="/storage/{{storage.id}}/recrawl">
Perform a re-crawl of this data: <button type="submit">Recrawl</button><br>
</form>
{{/if}}
{{#if isDeletable}}
<form method="post" action="/storage/{{storage.id}}/delete" onsubmit="return confirm('Confirm deletion of {{storage.path}}')">
Delete this data: <button type="submit">Delete</button><br>
</form>
{{/if}}
{{/with}}
{{#if storage.related}}
<h2>Related</h2>
<table>
<tr>
<th>Type</th>
<th>Path</th>
<th>Details</th>
</tr>
{{#each storage.related}}
<tr>
<td>{{type}}</td>
<td><a href="/storage/{{id}}">{{path}}</a></td>
<td>{{description}}</td>
</tr>
{{/each}}
</table>
{{/if}}
</section>
</body>
<script src="/refresh.js"></script>
<script>
window.setInterval(() => {
refresh(["storage"]);
}, 30000);
</script>
</html>

View File

@ -0,0 +1,54 @@
<!DOCTYPE html>
<html>
<head>
<title>Control Service</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="stylesheet" href="/style.css" />
</head>
<body>
{{> control/partials/nav}}
<section>
{{> control/partials/storage-types}}
<h1>Storage</h1>
<table>
{{#each storage}}
<tr>
<th>Type</th>
<th>Name</th>
<th>Path</th>
<th>Must Clean</th>
<th>Permit Temp</th>
</tr>
<tr>
<td>{{base.type}}</td>
<td>{{base.name}}</td>
<td>{{base.path}}</td>
<td>{{base.mustClean}}</td>
<td>{{base.permitTemp}}</td>
</tr>
<tr>
<th></th>
<th>Type</th>
<th colspan="2">Path</th>
<th>Description</th>
</tr>
{{#each storage}}
<tr>
<td>
</td>
<td>{{storage.type}}</td>
<td colspan="2">{{storage.path}}</td>
<td>{{storage.description}}</td>
</tr>
{{/each}}
{{/each}}
</table>
</section>
</body>
<script src="/refresh.js"></script>
<script>
window.setInterval(() => {
refresh(["storage"]);
}, 30000);
</script>
</html>

View File

@ -0,0 +1,26 @@
<!DOCTYPE html>
<html>
<head>
<title>Control Service</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="stylesheet" href="/style.css" />
</head>
<body>
{{> control/partials/nav}}
<section>
{{> control/partials/storage-types}}
<h1>Processed Data</h1>
{{> control/partials/storage-table}}
<h2>About</h2>
<p>Processed data is crawl data that has been analyzed, and had its keywords extracted,
and is ready to be loaded into the index.</p>
</section>
</body>
<script src="/refresh.js"></script>
<script>
window.setInterval(() => {
refresh(["storage"]);
}, 30000);
</script>
</html>

View File

@ -0,0 +1,64 @@
<!DOCTYPE html>
<html>
<head>
<title>Control Service</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="stylesheet" href="/style.css" />
</head>
<body>
{{> control/partials/nav}}
<section>
{{> control/partials/storage-types}}
<h1>Crawl Specifications</h1>
{{> control/partials/storage-table}}
<h2>About</h2>
<p>Crawling specifications are a work order for the crawler, in essence a list of domains that are to be crawled,
combined with a list of known URLs for each domain, and instructions on how deep to crawl. The crawler requires
a specification in order to understand what to do.
</p>
<p>
A crawling specification can either be generated from the links in the database, or from a list of domains
provided via a URL that links to a text file.
</p>
<h2>Create New Specification</h2>
<p>To create a new specification fill out the form below. </p>
<form method="post" action="/storage/specs">
<div class="form">
<label for="description">Description</label><br>
<input type="text" name="description" id="description" maxlength="255"><br>
<p>(This is how you'll be able to find the
specification later so give it a good and descriptive name)</p>
<p>Source</p>
<input type="radio" name="source"
value="db" id="db"
checked
onclick="document.getElementById('spec-url-options').style.display = 'none';"
> <label for="db">Use links in database</label><br>
<input type="radio" name="source"
value="download" id="download"
onclick="document.getElementById('spec-url-options').style.display = 'block';"
> <label for="download">Download a list of domains from a URL</label><br>
<div id="spec-url-options" style="display: none">
<label for="url">URL to list of domains</label><br>
<input type="text" name="url" id="url" /><br>
<br>
</div>
<br>
<input type="submit">
</div>
</form>
</section>
</body>
<script src="/refresh.js"></script>
<script>
window.setInterval(() => {
refresh(["storage"]);
}, 30000);
</script>
</html>

View File

@ -1,78 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>Control Service</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<link rel="stylesheet" href="/style.css" />
</head>
<body>
{{> control/partials/nav}}
<section>
<h1>Storage</h1>
<table>
{{#each storage}}
<tr>
<th>Type</th>
<th>Name</th>
<th>Path</th>
<th>Must Clean</th>
<th>Permit Temp</th>
</tr>
<tr>
<td>{{base.type}}</td>
<td>{{base.name}}</td>
<td>{{base.path}}</td>
<td>{{base.mustClean}}</td>
<td>{{base.permitTemp}}</td>
</tr>
<tr>
<th></th>
<th>Type</th>
<th colspan="2">Path</th>
<th>Description</th>
</tr>
{{#each storage}}
<tr>
<td>
{{#if isCrawlable}}
<form method="post" action="/storage/{{storage.id}}/crawl">
<button type="submit">Crawl</button>
</form>
{{/if}}
{{#if isLoadable}}
<form method="post" action="/storage/{{storage.id}}/load">
<button type="submit">Load</button>
</form>
{{/if}}
{{#if isConvertible}}
<form method="post" action="/storage/{{storage.id}}/process">
<button type="submit">Process</button>
</form>
{{/if}}
{{#if isRecrawlable}}
<form method="post" action="/storage/{{storage.id}}/recrawl">
<button type="submit">Recrawl</button>
</form>
{{/if}}
{{#if isDeletable}}
<form method="post" action="/storage/{{storage.id}}/delete" onsubmit="return confirm('Confirm deletion of {{storage.path}}')">
<button type="submit">Delete</button>
</form>
{{/if}}
</td>
<td>{{storage.type}}</td>
<td colspan="2">{{storage.path}}</td>
<td>{{storage.description}}</td>
</tr>
{{/each}}
{{/each}}
</table>
</section>
</body>
<script src="/refresh.js"></script>
<script>
window.setInterval(() => {
refresh(["storage"]);
}, 30000);
</script>
</html>