(control) Add filter functionality for crawl data inspector

This commit is contained in:
Viktor Lofgren 2024-05-21 19:05:44 +02:00
parent 9539fdb53c
commit 197c82acd4
2 changed files with 50 additions and 30 deletions

View File

@ -55,6 +55,9 @@ public class ControlCrawlDataService {
String path = request.queryParams("path"); String path = request.queryParams("path");
int after = Integer.parseInt(request.queryParamOrDefault("page", "0")); int after = Integer.parseInt(request.queryParamOrDefault("page", "0"));
String urlGlob = request.queryParamOrDefault("urlGlob", "");
String selectedContentType = request.queryParamOrDefault("contentType", "ALL");
String selectedHttpStatus = request.queryParamOrDefault("httpStatus", "ALL");
var url = executorClient.remoteFileURL(fileStorageService.getStorage(fsid), path).toString(); var url = executorClient.remoteFileURL(fileStorageService.getStorage(fsid), path).toString();
@ -65,8 +68,7 @@ public class ControlCrawlDataService {
String domain; String domain;
try (var conn = DriverManager.getConnection("jdbc:duckdb:"); try (var conn = DriverManager.getConnection("jdbc:duckdb:");
var stmt = conn.createStatement()) var stmt = conn.createStatement()) {
{
ResultSet rs; ResultSet rs;
rs = stmt.executeQuery(DUCKDB."SELECT domain FROM \{url} LIMIT 1"); rs = stmt.executeQuery(DUCKDB."SELECT domain FROM \{url} LIMIT 1");
@ -78,7 +80,7 @@ public class ControlCrawlDataService {
ORDER BY httpStatus ORDER BY httpStatus
"""); """);
while (rs.next()) { while (rs.next()) {
byStatusCode.add(new SummaryStatusCode(rs.getInt(1), rs.getInt(2))); byStatusCode.add(new SummaryStatusCode(rs.getInt(1), rs.getInt(2), selectedHttpStatus.equals(rs.getString(1))));
} }
rs = stmt.executeQuery(DUCKDB.""" rs = stmt.executeQuery(DUCKDB."""
@ -88,13 +90,20 @@ public class ControlCrawlDataService {
ORDER BY contentType ORDER BY contentType
"""); """);
while (rs.next()) { while (rs.next()) {
byContentType.add(new SummaryContentType(rs.getString(1), rs.getInt(2))); byContentType.add(new SummaryContentType(rs.getString(1), rs.getInt(2), selectedContentType.equals(rs.getString(1))));
} }
rs = stmt.executeQuery(DUCKDB."""
SELECT url, contentType, httpStatus, body != '', etagHeader, lastModifiedHeader var query = DUCKDB."SELECT url, contentType, httpStatus, body != '', etagHeader, lastModifiedHeader FROM \{url} WHERE 1=1";
FROM \{url} LIMIT 10 OFFSET \{after} if (!urlGlob.isBlank())
"""); query += DUCKDB." AND url LIKE \{urlGlob.replace('*', '%')}";
if (!selectedContentType.equals("ALL"))
query += DUCKDB." AND contentType = \{selectedContentType}";
if (!selectedHttpStatus.equals("ALL"))
query += DUCKDB." AND httpStatus = \{selectedHttpStatus}";
query += DUCKDB." LIMIT 10 OFFSET \{after}";
rs = stmt.executeQuery(query);
while (rs.next()) { while (rs.next()) {
records.add(new CrawlDataRecordSummary(rs.getString(1), rs.getString(2), rs.getInt(3), rs.getBoolean(4), rs.getString(5), rs.getString(6))); records.add(new CrawlDataRecordSummary(rs.getString(1), rs.getString(2), rs.getInt(3), rs.getBoolean(4), rs.getString(5), rs.getString(6)));
} }
@ -104,6 +113,9 @@ public class ControlCrawlDataService {
ret.put("tab", Map.of("storage", true)); ret.put("tab", Map.of("storage", true));
ret.put("view", Map.of("crawl", true)); ret.put("view", Map.of("crawl", true));
ret.put("selectedContentType", Map.of(selectedContentType, true));
ret.put("selectedHttpStatus", Map.of(selectedHttpStatus, true));
ret.put("urlGlob", urlGlob);
ret.put("pagination", new Pagination(after + 10, after - 10)); ret.put("pagination", new Pagination(after + 10, after - 10));
@ -152,9 +164,9 @@ public class ControlCrawlDataService {
afterDomain); afterDomain);
} }
public record SummaryContentType(String contentType, int count) {} public record SummaryContentType(String contentType, int count, boolean filtered) {}
public record SummaryStatusCode(int statusCode, int count) {} public record SummaryStatusCode(int statusCode, int count, boolean filtered) {}
public record Pagination(int next, int prev) { public record Pagination(int next, int prev) {
public boolean isPrevPage() { public boolean isPrevPage() {
return prev >= 0; return prev >= 0;

View File

@ -30,26 +30,6 @@
<td> <td>
<a class="btn btn-primary" href="/nodes/{{node.id}}/storage/{{storage.id}}/transfer?path={{{path}}}">Download Parquet</a> <a class="btn btn-primary" href="/nodes/{{node.id}}/storage/{{storage.id}}/transfer?path={{{path}}}">Download Parquet</a>
</td> </td>
<tr>
<th>HTTP Status</th>
<th>Count</th>
</tr>
{{#each byStatusCode}}
<tr>
<td>{{statusCode}}</td>
<td>{{count}}</td>
</tr>
{{/each}}
<tr>
<th>Content Type</th>
<th>Count</th>
</tr>
{{#each byContentType}}
<tr>
<td>{{contentType}}</td>
<td>{{count}}</td>
</tr>
{{/each}}
</table> </table>
<h2>Contents</h2> <h2>Contents</h2>
@ -62,6 +42,33 @@
<th>ETag</th> <th>ETag</th>
<th>Last Modified</th> <th>Last Modified</th>
</tr> </tr>
<form>
<input type="hidden" name="fid" value="{{storage.id}}">
<input type="hidden" name="path" value="{{path}}">
<tr>
<td>
<input type="text" class="" id="urlGlob" name="urlGlob" value="{{urlGlob}}">
</td>
<td>
<select name="contentType" id="contentType">
<option value="ALL">All</option>
{{#each byContentType}} <option value="{{contentType}}" {{#if filtered}}selected{{/if}}>{{contentType}}</option> {{/each}}
</select>
</td>
<td>
<select name="httpStatus" id="httpStatus">
<option value="ALL">All</option>
{{#each byStatusCode}} <option value="{{statusCode}}" {{#if filtered}}selected{{/if}}>{{statusCode}}</option> {{/each}}
</select>
</td>
<td colspan="2"></td>
<td>
<button type="submit" class="btn btn-primary">Filter</button>
</td>
</tr>
</form>
{{#each records}} {{#each records}}
<tr> <tr>
<td> <td>
@ -89,6 +96,7 @@
</tr> </tr>
{{/each}} {{/each}}
<tr> <tr>
{{#with pagination}} {{#with pagination}}
<td> <td>