From 5c2f2d558f6d0130dde971471d35d0cd7b5d6331 Mon Sep 17 00:00:00 2001
From: Viktor Lofgren
Date: Tue, 31 May 2022 14:38:13 +0200
Subject: [PATCH] Update index.html for search engine (#25)
Co-authored-by: vlofgren
Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/25
---
README.md | 18 ++++----
.../configuration/server/RateLimiter.java | 2 +-
.../wmsa/edge/index/EdgeIndexService.java | 42 ++++---------------
.../src/main/resources/static/edge/index.html | 17 ++++----
4 files changed, 30 insertions(+), 49 deletions(-)
diff --git a/README.md b/README.md
index 2fa76c4c..cfe88bc9 100644
--- a/README.md
+++ b/README.md
@@ -3,14 +3,18 @@
This is the source code for marginalia.nu, including the [search engine](https://search.marginalia.nu),
the [MEMEX/gemini server](https://memex.marginalia.nu), the and the [encyclopedia service](https://encyclopedia.marginalia.nu).
-The canonical git server for this project is [https://git.marginalia.nu](https://git.marginalia.nu),
-it is fine to mirror it on other hosts, but if you have issues or questions
-that is where you want to go.
+The aim of the project is to develop new and alternative discovery methods for the Internet.
+It's an experimental workshop as much as it is a public service, the overarching goal is to
+elevate the more human, non-commercial sides of the Internet.
-As it stands now, the project is a bit of a mess as it wasn't developed
-with the intention of going open source, a lot of tests and so on make
-assumptions about the directory structure, much configuration is hard coded
-and so on. Please stand by. A lot of the mess is fairly superficial.
+The canonical git server for this project is [https://git.marginalia.nu](https://git.marginalia.nu).
+It is fine to mirror it on other hosts, but if you have issues or questions
+git.marginalia.nu is where you want to go.
+
+As it stands now, the project is still being set up and is a bit of a mess as
+it wasn't developed with the intention of going open source, a lot of tests
+and so on make assumptions about the directory structure, much configuration
+is hard coded and so on. Please stand by. A lot of the mess is fairly superficial.
## Contributing
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/RateLimiter.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/RateLimiter.java
index 4dc4c8da..06a6131a 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/RateLimiter.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/RateLimiter.java
@@ -66,6 +66,6 @@ public class RateLimiter {
private Bucket createBucket() {
var refill = Refill.greedy(1, Duration.ofSeconds(refillRate));
var bw = Bandwidth.classic(capacity, refill);
- return Bucket4j.builder().addLimit(bw).build();
+ return Bucket.builder().addLimit(bw).build();
}
}
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexService.java
index 81d57139..a04a4c83 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexService.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeIndexService.java
@@ -331,8 +331,6 @@ public class EdgeIndexService extends Service {
final Map> results = new HashMap<>();
final DomainResultCountFilter localFilter = new DomainResultCountFilter(specs.limitByDomain);
- boolean debug = sq.searchTermsExclude.contains("special:debug");
-
for (int i : specBuckets) {
int foundResultsCount = results.values().stream().mapToInt(List::size).sum();
@@ -341,37 +339,15 @@ public class EdgeIndexService extends Service {
List resultsForBucket = new ArrayList<>(specs.limitByBucket);
- if (debug) {
- getQuery(i, budget, sq.block, lv -> localFilter.filterRawValue(i, lv), searchTerms)
- .peek(l -> logger.info("Considering {}", Long.toHexString(l)))
- .mapToObj(id -> new EdgeSearchResultItem(i, sq.termSize(), id))
- .filter(ri -> {
- if (seenResults.contains(ri.url.getId())) {
- logger.info("Seen before: {}", Integer.toHexString(ri.url.getId()));
- return false;
- }
- else if (!localFilter.test(i, domainCountFilter, ri)) {
- logger.info("DCF: {} - {}:{}", ri.blockId, Integer.toHexString(ri.domain.getId()), Integer.toHexString(ri.url.getId()));
- return false;
- }
- return true;
- })
- .limit(specs.limitTotal * 3L)
- .distinct()
- .limit(Math.min(specs.limitByBucket
- - results.values().stream().mapToInt(Collection::size).sum(), limit - foundResultsCount))
- .forEach(resultsForBucket::add);
- }
- else {
- getQuery(i, budget, sq.block, lv -> localFilter.filterRawValue(i, lv), searchTerms)
- .mapToObj(id -> new EdgeSearchResultItem(i, sq.termSize(), id))
- .filter(ri -> !seenResults.contains(ri.url.getId()) && localFilter.test(i, domainCountFilter, ri))
- .limit(specs.limitTotal * 3L)
- .distinct()
- .limit(Math.min(specs.limitByBucket
- - results.values().stream().mapToInt(Collection::size).sum(), limit - foundResultsCount))
- .forEach(resultsForBucket::add);
- }
+ getQuery(i, budget, sq.block, lv -> localFilter.filterRawValue(i, lv), searchTerms)
+ .mapToObj(id -> new EdgeSearchResultItem(i, sq.termSize(), id))
+ .filter(ri -> !seenResults.contains(ri.url.getId()) && localFilter.test(i, domainCountFilter, ri))
+ .limit(specs.limitTotal * 3L)
+ .distinct()
+ .limit(Math.min(specs.limitByBucket
+ - results.values().stream().mapToInt(Collection::size).sum(), limit - foundResultsCount))
+ .forEach(resultsForBucket::add);
+
for (var result : resultsForBucket) {
seenResults.add(result.url.getId());
diff --git a/marginalia_nu/src/main/resources/static/edge/index.html b/marginalia_nu/src/main/resources/static/edge/index.html
index 13044a6c..166e67b8 100644
--- a/marginalia_nu/src/main/resources/static/edge/index.html
+++ b/marginalia_nu/src/main/resources/static/edge/index.html
@@ -61,8 +61,12 @@
existed.
The software for this search engine is all custom-built, and all crawling and indexing is
- done in-house.
+ done in-house. The project is open source. Feel free to poke about in the source code or contribute
+ to the development!
+ Consider supporting the
+ project!
Read More
@@ -98,11 +102,6 @@
Updates
-
☛ The web design of the search engine has been completely overhauled. For the most part, this should
- result in even smaller page loads, and better accessibility and easier navigation, but it may still
- be a bit rough in some browsers, if you do find any bugs or accessibility problems, please let me
- know. You can reach me at kontakt@marginalia.nu.
-
☛ The Random Mode has been overhauled, and is
quite entertaining. I encourage you to give it a spin.
☛ A simple public API is now available.
@@ -116,6 +115,8 @@
Publicity, Discussion and Events