From a3a6b40cc3e67e6273f939dfea73322868b4926d Mon Sep 17 00:00:00 2001
From: Viktor Lofgren
Date: Wed, 15 Jun 2022 16:54:27 +0200
Subject: [PATCH 1/3] Changes to crawler (#28)
Co-authored-by: vlofgren
Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/28
---
README.md | 9 ++-
.../nu/marginalia/wmsa/edge/E2ETestBase.java | 2 +-
marginalia_nu/src/e2e/resources/crawl.sh | 2 +
marginalia_nu/src/e2e/resources/init.sh | 33 ++++++----
.../wmsa/configuration/UserAgent.java | 5 ++
.../wmsa/configuration/WebsiteUrl.java | 7 +++
.../wmsa/configuration/WmsaHome.java | 60 +++++++++++++++----
.../module/ConfigurationModule.java | 30 ++--------
.../module/HostnameProvider.java | 36 -----------
.../configuration/module/PortProvider.java | 46 --------------
.../edge/assistant/EdgeAssistantModule.java | 13 ++--
.../wmsa/edge/converting/ConverterModule.java | 10 +---
.../processor/DocumentProcessor.java | 17 +++---
.../processor/logic/LinkParser.java | 34 +++++++++--
.../wmsa/edge/crawling/CrawlerMain.java | 6 +-
.../crawling/retreival/CrawlerRetreiver.java | 14 +++--
.../wmsa/edge/index/EdgeTablesModule.java | 9 +--
.../wmsa/edge/model/EdgeDomain.java | 1 +
.../marginalia/wmsa/edge/model/EdgeUrl.java | 9 +--
.../wmsa/edge/search/EdgeSearchModule.java | 15 ++---
.../wmsa/edge/search/EdgeSearchService.java | 12 ++--
.../resource_store/ResourceStoreModule.java | 1 -
.../wmsa/edge/crawling/LinkParserTest.java | 34 ++++++++++-
23 files changed, 203 insertions(+), 202 deletions(-)
create mode 100644 marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/UserAgent.java
create mode 100644 marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/WebsiteUrl.java
delete mode 100644 marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/HostnameProvider.java
delete mode 100644 marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/PortProvider.java
diff --git a/README.md b/README.md
index cfe88bc9..927fff6f 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,8 @@ the [MEMEX/gemini server](https://memex.marginalia.nu), the and the [encyclopedi
The aim of the project is to develop new and alternative discovery methods for the Internet.
It's an experimental workshop as much as it is a public service, the overarching goal is to
-elevate the more human, non-commercial sides of the Internet.
+elevate the more human, non-commercial sides of the Internet. A side-goal is to do this without
+requiring datacenters and expensive enterprise hardware, to run this operation on affordable hardware.
The canonical git server for this project is [https://git.marginalia.nu](https://git.marginalia.nu).
It is fine to mirror it on other hosts, but if you have issues or questions
@@ -16,6 +17,10 @@ it wasn't developed with the intention of going open source, a lot of tests
and so on make assumptions about the directory structure, much configuration
is hard coded and so on. Please stand by. A lot of the mess is fairly superficial.
+## Documentation
+
+Documentation is a work in progress. See the [wiki](https://git.marginalia.nu/marginalia/marginalia.nu/wiki).
+
## Contributing
The project is still being set up, but if you are interested in contributing, please contact me.
@@ -26,4 +31,4 @@ Consider [supporting this project](https://memex.marginalia.nu/projects/edge/sup
## Contact
-You can email with any questions or feedback.
\ No newline at end of file
+You can email with any questions or feedback.
diff --git a/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java
index 769eca40..0c329a79 100644
--- a/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java
+++ b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java
@@ -33,7 +33,7 @@ public abstract class E2ETestBase {
.withCopyFileToContainer(jarFile(), "/WMSA.jar")
.withCopyFileToContainer(MountableFile.forClasspathResource("init.sh"), "/init.sh")
.withExposedPorts(service.port)
- .withFileSystemBind(modelsPath(), "/var/lib/wmsa/model", BindMode.READ_ONLY)
+ .withFileSystemBind(modelsPath(), "/wmsa/model", BindMode.READ_ONLY)
.withNetwork(network)
.withNetworkAliases(service.name)
.withLogConsumer(new Slf4jLogConsumer(LoggerFactory.getLogger(service.name)))
diff --git a/marginalia_nu/src/e2e/resources/crawl.sh b/marginalia_nu/src/e2e/resources/crawl.sh
index 3a0e4b01..16d43fab 100644
--- a/marginalia_nu/src/e2e/resources/crawl.sh
+++ b/marginalia_nu/src/e2e/resources/crawl.sh
@@ -3,6 +3,8 @@
mkdir -p /var/lib/wmsa/conf/
mkdir -p /var/lib/wmsa/data/
+echo "search.marginalia.nu" > /var/lib/wmsa/conf/user-agent
+
cat > /var/lib/wmsa/conf/db.properties < /var/lib/wmsa/suggestions.txt < ${HOME}/suggestions.txt < /var/lib/wmsa/conf/disks.properties < ${HOME}/conf/disks.properties < /var/lib/wmsa/conf/db.properties < ${HOME}/conf/db.properties < /var/lib/wmsa/conf/ranking-settings.yaml < ${HOME}/conf/ranking-settings.yaml < /var/lib/wmsa/conf/hosts < ${HOME}/conf/hosts < {
- private static final String DEFAULT_HOSTNAME = "127.0.0.1";
- private final int monitorPort;
- private final String monitorHost;
- private final int timeout;
- private final Logger logger = LoggerFactory.getLogger(getClass());
-
- @Inject
- public HostnameProvider(@Named("monitor-port") Integer monitorPort,
- @Named("monitor-host") String monitorHost,
- @Named("monitor-boot-timeout") Integer timeout
- ) {
- this.monitorHost = monitorHost;
- this.monitorPort = monitorPort;
- this.timeout = timeout;
- }
-
- @Override
- public String get() {
- var override = System.getProperty("service-host");
- if (null != override) {
- return override;
- }
- return DEFAULT_HOSTNAME;
- }
-
-}
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/PortProvider.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/PortProvider.java
deleted file mode 100644
index 7286aa68..00000000
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/PortProvider.java
+++ /dev/null
@@ -1,46 +0,0 @@
-package nu.marginalia.wmsa.configuration.module;
-
-import com.google.inject.name.Named;
-import io.reactivex.rxjava3.core.Flowable;
-import nu.marginalia.wmsa.configuration.ServiceDescriptor;
-import org.apache.http.HttpResponse;
-import org.reactivestreams.Publisher;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import javax.inject.Inject;
-import javax.inject.Provider;
-import java.io.IOException;
-import java.util.concurrent.TimeUnit;
-
-public class PortProvider implements Provider {
- private static final Integer DEFAULT_PORT = 5000;
- private final int monitorPort;
- private final String monitorHost;
- private final Logger logger = LoggerFactory.getLogger(getClass());
- private final int timeout = 10;
- @Inject
- public PortProvider(@Named("monitor-port") Integer monitorPort,
- @Named("monitor-host") String monitorHost,
- @Named("monitor-boot-timeout") Integer timeout) {
- this.monitorHost = monitorHost;
- this.monitorPort = monitorPort;
- }
-
- @Override
- public Integer get() {
- return ServiceDescriptor.byName(System.getProperty("service-name")).port;
- }
-
- private Publisher> repeatDelay(Flowable error) {
- return error.delay(1, TimeUnit.SECONDS);
- }
-
- private String accept200(HttpResponse rsp) throws IOException {
- if (rsp.getStatusLine().getStatusCode() != 200) {
- throw new RuntimeException("Monitor responded unexpected status "
- + rsp.getStatusLine().getStatusCode());
- }
- return new String(rsp.getEntity().getContent().readAllBytes());
- }
-}
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantModule.java
index cc5c3fe6..dcc8d90d 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantModule.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantModule.java
@@ -2,6 +2,7 @@ package nu.marginalia.wmsa.edge.assistant;
import com.google.inject.AbstractModule;
import nu.marginalia.util.language.conf.LanguageModels;
+import nu.marginalia.wmsa.configuration.WmsaHome;
import java.nio.file.Path;
@@ -9,14 +10,8 @@ import static com.google.inject.name.Names.named;
public class EdgeAssistantModule extends AbstractModule {
public void configure() {
- bind(Path.class).annotatedWith(named("suggestions-file")).toInstance(Path.of("/var/lib/wmsa/suggestions.txt"));
- bind(LanguageModels.class).toInstance(new LanguageModels(
- Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"),
- Path.of("/var/lib/wmsa/model/tfreq-new-algo3.bin"),
- Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"),
- Path.of("/var/lib/wmsa/model/English.RDR"),
- Path.of("/var/lib/wmsa/model/English.DICT"),
- Path.of("/var/lib/wmsa/model/opennlp-tok.bin")
- ));
+ bind(Path.class).annotatedWith(named("suggestions-file")).toInstance(WmsaHome.getHomePath().resolve("suggestions.txt"));
+
+ bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
}
}
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConverterModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConverterModule.java
index 6f03632f..4bf6eaea 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConverterModule.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConverterModule.java
@@ -5,6 +5,7 @@ import com.google.inject.AbstractModule;
import com.google.inject.name.Names;
import marcono1234.gson.recordadapter.RecordTypeAdapterFactory;
import nu.marginalia.util.language.conf.LanguageModels;
+import nu.marginalia.wmsa.configuration.WmsaHome;
import nu.marginalia.wmsa.edge.model.EdgeCrawlPlan;
import nu.marginalia.wmsa.edge.model.EdgeDomain;
import nu.marginalia.wmsa.edge.model.EdgeUrl;
@@ -30,14 +31,7 @@ public class ConverterModule extends AbstractModule {
bind(Integer.class).annotatedWith(Names.named("max-title-length")).toInstance(128);
bind(Integer.class).annotatedWith(Names.named("max-summary-length")).toInstance(255);
- bind(LanguageModels.class).toInstance(new LanguageModels(
- Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"),
- Path.of("/var/lib/wmsa/model/tfreq-new-algo3.bin"),
- Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"),
- Path.of("/var/lib/wmsa/model/English.RDR"),
- Path.of("/var/lib/wmsa/model/English.DICT"),
- Path.of("/var/lib/wmsa/model/opennlp-tok.bin")
- ));
+ bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
}
private Gson createGson() {
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/DocumentProcessor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/DocumentProcessor.java
index ce6393f2..b205cdea 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/DocumentProcessor.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/DocumentProcessor.java
@@ -185,26 +185,25 @@ public class DocumentProcessor {
}
private void getLinks(EdgeUrl baseUrl, ProcessedDocumentDetails ret, Document doc, EdgePageWordSet words) {
- var links = doc.getElementsByTag("a");
- var frames = doc.getElementsByTag("frame");
- var feeds = doc.select("link[rel=alternate]");
- LinkProcessor lp = new LinkProcessor(ret, baseUrl);
+ final LinkProcessor lp = new LinkProcessor(ret, baseUrl);
- for (var atag : links) {
+ baseUrl = linkParser.getBaseLink(doc, baseUrl);
+
+ for (var atag : doc.getElementsByTag("a")) {
linkParser.parseLink(baseUrl, atag).ifPresent(lp::accept);
}
- for (var frame : frames) {
+ for (var frame : doc.getElementsByTag("frame")) {
linkParser.parseFrame(baseUrl, frame).ifPresent(lp::accept);
}
- for (var link : feeds) {
+ for (var link : doc.select("link[rel=alternate]")) {
feedExtractor
- .getFeedFromAlternateTag(baseUrl, link)
+ .getFeedFromAlternateTag(baseUrl, link)
.ifPresent(lp::acceptFeed);
}
- Set linkTerms = new HashSet<>();
+ final Set linkTerms = new HashSet<>();
for (var domain : lp.getForeignDomains()) {
linkTerms.add("links:"+domain.toString().toLowerCase());
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/LinkParser.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/LinkParser.java
index aedaf0f7..378182f2 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/LinkParser.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/LinkParser.java
@@ -1,9 +1,12 @@
package nu.marginalia.wmsa.edge.converting.processor.logic;
import com.google.common.base.CharMatcher;
+import com.google.common.base.Strings;
import lombok.SneakyThrows;
import nu.marginalia.wmsa.edge.model.EdgeUrl;
import org.jetbrains.annotations.Contract;
+import org.jetbrains.annotations.Nullable;
+import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -26,11 +29,11 @@ public class LinkParser {
".gz", ".asc", ".md5", ".asf", ".mov", ".sig", ".pub", ".iso");
@Contract(pure=true)
- public Optional parseLink(EdgeUrl baseUrl, Element l) {
+ public Optional parseLink(EdgeUrl relativeBaseUrl, Element l) {
return Optional.of(l)
.filter(this::shouldIndexLink)
.map(this::getUrl)
- .map(link -> resolveUrl(baseUrl, link))
+ .map(link -> resolveUrl(relativeBaseUrl, link))
.flatMap(this::createURI)
.map(URI::normalize)
.map(this::renormalize)
@@ -100,6 +103,8 @@ public class LinkParser {
}
private static final Pattern paramRegex = Pattern.compile("\\?.*$");
+ private static final Pattern spaceRegex = Pattern.compile(" ");
+
@SneakyThrows
private String resolveUrl(EdgeUrl baseUrl, String s) {
s = paramRegex.matcher(s).replaceAll("");
@@ -111,10 +116,12 @@ public class LinkParser {
// url looks like /my-page
if (s.startsWith("/")) {
- return baseUrl.sibling(s).toString();
+ return baseUrl.withPath(s).toString();
}
- return baseUrl.sibling(relativeNavigation(baseUrl) + s.replaceAll(" ", "%20")).toString();
+ final String partFromNewLink = spaceRegex.matcher(s).replaceAll("%20");
+
+ return baseUrl.withPath(relativeNavigation(baseUrl) + partFromNewLink).toString();
}
// for a relative url that looks like /foo or /foo/bar; return / or /foo
@@ -162,4 +169,23 @@ public class LinkParser {
}
return true;
}
+
+ @Nullable
+ public EdgeUrl getBaseLink(Document parsed, EdgeUrl documentUrl) {
+ var baseTags = parsed.getElementsByTag("base");
+
+ try {
+ for (var tag : baseTags) {
+ String href = tag.attr("href");
+ if (!Strings.isNullOrEmpty(href)) {
+ return new EdgeUrl(resolveUrl(documentUrl, href));
+ }
+ }
+ }
+ catch (Exception ex) {
+ logger.warn("Failed to parse , falling back to document url");
+ }
+
+ return documentUrl;
+ }
}
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlerMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlerMain.java
index d81e348b..7238dce0 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlerMain.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlerMain.java
@@ -2,6 +2,8 @@ package nu.marginalia.wmsa.edge.crawling;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
+import nu.marginalia.wmsa.configuration.UserAgent;
+import nu.marginalia.wmsa.configuration.WmsaHome;
import nu.marginalia.wmsa.edge.crawling.model.CrawledDomain;
import nu.marginalia.wmsa.edge.crawling.model.CrawlingSpecification;
import nu.marginalia.wmsa.edge.crawling.retreival.CrawlerRetreiver;
@@ -34,10 +36,12 @@ public class CrawlerMain implements AutoCloseable {
private final Dispatcher dispatcher = new Dispatcher(new ThreadPoolExecutor(0, Integer.MAX_VALUE, 5, TimeUnit.SECONDS,
new SynchronousQueue<>(), Util.threadFactory("OkHttp Dispatcher", true)));
+ private final UserAgent userAgent;
public CrawlerMain(EdgeCrawlPlan plan) throws Exception {
this.inputSpec = plan.getJobSpec();
this.numberOfThreads = 512;
+ this.userAgent = WmsaHome.getUserAgent();
workLog = new WorkLog(plan.crawl.getLogFile());
domainWriter = new CrawledDomainWriter(plan.crawl.getDir());
@@ -88,7 +92,7 @@ public class CrawlerMain implements AutoCloseable {
if (workLog.isJobFinished(specification.id))
return null;
- var fetcher = new HttpFetcher("search.marginalia.nu", dispatcher);
+ var fetcher = new HttpFetcher(userAgent.uaString(), dispatcher);
try {
var retreiver = new CrawlerRetreiver(fetcher, specification);
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/retreival/CrawlerRetreiver.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/retreival/CrawlerRetreiver.java
index a7c08a24..2b27ed4d 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/retreival/CrawlerRetreiver.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/retreival/CrawlerRetreiver.java
@@ -202,10 +202,11 @@ public class CrawlerRetreiver {
return domain.equals(url.domain.toString().toLowerCase());
}
- private void findLinks(EdgeUrl url, Document parsed) {
+ private void findLinks(EdgeUrl baseUrl, Document parsed) {
+ baseUrl = linkParser.getBaseLink(parsed, baseUrl);
for (var link : parsed.getElementsByTag("a")) {
- linkParser.parseLink(url, link)
+ linkParser.parseLink(baseUrl, link)
.filter(this::isSameDomain)
.filter(u -> !urlBlocklist.isUrlBlocked(u))
.filter(u -> !urlBlocklist.isForumLink(u))
@@ -213,7 +214,7 @@ public class CrawlerRetreiver {
.ifPresent(queue::addLast);
}
for (var link : parsed.getElementsByTag("frame")) {
- linkParser.parseFrame(url, link)
+ linkParser.parseFrame(baseUrl, link)
.filter(this::isSameDomain)
.filter(u -> !urlBlocklist.isUrlBlocked(u))
.filter(u -> !urlBlocklist.isForumLink(u))
@@ -221,7 +222,7 @@ public class CrawlerRetreiver {
.ifPresent(queue::addLast);
}
for (var link : parsed.getElementsByTag("iframe")) {
- linkParser.parseFrame(url, link)
+ linkParser.parseFrame(baseUrl, link)
.filter(this::isSameDomain)
.filter(u -> !urlBlocklist.isUrlBlocked(u))
.filter(u -> !urlBlocklist.isForumLink(u))
@@ -230,10 +231,11 @@ public class CrawlerRetreiver {
}
}
- private Optional findCanonicalUrl(EdgeUrl url, Document parsed) {
+ private Optional findCanonicalUrl(EdgeUrl baseUrl, Document parsed) {
+ baseUrl = baseUrl.withPath("/");
for (var link : parsed.select("link[rel=canonical]")) {
- return linkParser.parseLink(url, link);
+ return linkParser.parseLink(baseUrl, link);
}
return Optional.empty();
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeTablesModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeTablesModule.java
index bc9c2f44..4650b15b 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeTablesModule.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeTablesModule.java
@@ -2,17 +2,18 @@ package nu.marginalia.wmsa.edge.index;
import com.google.inject.AbstractModule;
import com.google.inject.name.Names;
+import nu.marginalia.wmsa.configuration.WmsaHome;
import java.nio.file.Path;
public class EdgeTablesModule extends AbstractModule {
public void configure() {
- bind(Path.class).annotatedWith(Names.named("partition-root-slow")).toInstance(Path.of("/var/lib/wmsa/index/write"));
- bind(Path.class).annotatedWith(Names.named("partition-root-slow-tmp")).toInstance(Path.of("/backup/work/index-tmp/"));
+ bind(Path.class).annotatedWith(Names.named("partition-root-slow")).toInstance(WmsaHome.getDisk("index-write"));
+ bind(Path.class).annotatedWith(Names.named("partition-root-fast")).toInstance(WmsaHome.getDisk("index-read"));
- bind(Path.class).annotatedWith(Names.named("partition-root-fast")).toInstance(Path.of("/var/lib/wmsa/index/read"));
- bind(Path.class).annotatedWith(Names.named("tmp-file-dir")).toInstance(Path.of("/var/lib/wmsa/index/read"));
+ bind(Path.class).annotatedWith(Names.named("partition-root-slow-tmp")).toInstance(WmsaHome.getDisk("tmp-slow"));
+ bind(Path.class).annotatedWith(Names.named("tmp-file-dir")).toInstance(WmsaHome.getDisk("tmp-fast"));
bind(String.class).annotatedWith(Names.named("edge-writer-page-index-file")).toInstance("page-index.dat");
bind(String.class).annotatedWith(Names.named("edge-writer-dictionary-file")).toInstance("dictionary.dat");
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeDomain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeDomain.java
index cb778947..53740c95 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeDomain.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeDomain.java
@@ -21,6 +21,7 @@ public class EdgeDomain implements WideHashable {
@SneakyThrows
public EdgeDomain(String host) {
+ Objects.requireNonNull(host, "domain name must not be null");
var dot = host.lastIndexOf('.');
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeUrl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeUrl.java
index 39bc475b..e82d4b7c 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeUrl.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeUrl.java
@@ -79,11 +79,6 @@ public class EdgeUrl implements WideHashable {
this.port = port(URI.getPort(), proto);
}
- public EdgeUrl sibling(String newPath) {
- return new EdgeUrl(proto, domain, port, newPath);
- }
-
-
private static Integer port(Integer port, String protocol) {
if (null == port || port < 1) {
return null;
@@ -120,5 +115,7 @@ public class EdgeUrl implements WideHashable {
return (int) path.chars().filter(c -> c=='/').count();
}
-
+ public EdgeUrl withPath(String s) {
+ return new EdgeUrl(proto, domain, port, s);
+ }
}
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchModule.java
index 9e1df8d5..9db18272 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchModule.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchModule.java
@@ -2,21 +2,14 @@ package nu.marginalia.wmsa.edge.search;
import com.google.inject.AbstractModule;
import nu.marginalia.util.language.conf.LanguageModels;
-
-import java.nio.file.Path;
+import nu.marginalia.wmsa.configuration.WebsiteUrl;
+import nu.marginalia.wmsa.configuration.WmsaHome;
public class EdgeSearchModule extends AbstractModule {
public void configure() {
-
- bind(LanguageModels.class).toInstance(new LanguageModels(
- Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"),
- Path.of("/var/lib/wmsa/model/tfreq-new-algo3.bin"),
- Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"),
- Path.of("/var/lib/wmsa/model/English.RDR"),
- Path.of("/var/lib/wmsa/model/English.DICT"),
- Path.of("/var/lib/wmsa/model/opennlp-tok.bin")
- ));
+ bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
+ bind(WebsiteUrl.class).toInstance(new WebsiteUrl(System.getProperty("website-url", "https://search.marginalia.nu/")));
}
}
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java
index 329322a2..fa2d06e0 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java
@@ -8,6 +8,7 @@ import com.google.inject.name.Named;
import lombok.SneakyThrows;
import nu.marginalia.wmsa.api.model.ApiSearchResult;
import nu.marginalia.wmsa.api.model.ApiSearchResults;
+import nu.marginalia.wmsa.configuration.WebsiteUrl;
import nu.marginalia.wmsa.configuration.server.Context;
import nu.marginalia.wmsa.configuration.server.Initialization;
import nu.marginalia.wmsa.configuration.server.MetricsServer;
@@ -34,7 +35,7 @@ public class EdgeSearchService extends Service {
private final EdgeIndexClient indexClient;
private final EdgeSearchOperator searchOperator;
private final CommandEvaluator searchCommandEvaulator;
-
+ private final WebsiteUrl websiteUrl;
private static final Logger logger = LoggerFactory.getLogger(EdgeSearchService.class);
@SneakyThrows
@@ -45,13 +46,14 @@ public class EdgeSearchService extends Service {
Initialization initialization,
MetricsServer metricsServer,
EdgeSearchOperator searchOperator,
- CommandEvaluator searchCommandEvaulator
- ) {
+ CommandEvaluator searchCommandEvaulator,
+ WebsiteUrl websiteUrl) {
super(ip, port, initialization, metricsServer);
this.indexClient = indexClient;
this.searchOperator = searchOperator;
this.searchCommandEvaulator = searchCommandEvaulator;
+ this.websiteUrl = websiteUrl;
Spark.staticFiles.expireTime(600);
@@ -79,7 +81,7 @@ public class EdgeSearchService extends Service {
final String query = URLEncoder.encode(String.format("%s site:%s", queryRaw, site), StandardCharsets.UTF_8);
final String profile = request.queryParamOrDefault("profile", "yolo");
- response.redirect("https://search.marginalia.nu/search?query="+query+"&profile="+profile);
+ response.redirect(websiteUrl.withPath("search?query="+query+"&profile="+profile));
return null;
}
@@ -141,7 +143,7 @@ public class EdgeSearchService extends Service {
final String queryParam = request.queryParams("query");
if (null == queryParam || queryParam.isBlank()) {
- response.redirect("https://search.marginalia.nu/");
+ response.redirect(websiteUrl.url());
return null;
}
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreModule.java
index 2de9e931..30bac9d3 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreModule.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreModule.java
@@ -7,7 +7,6 @@ import java.nio.file.Path;
public class ResourceStoreModule extends AbstractModule {
public void configure() {
- bind(String.class).annotatedWith(Names.named("external-url")).toInstance("https://reddit.marginalia.nu/");
bind(Path.class).annotatedWith(Names.named("data-path")).toInstance(Path.of("/var/lib/wmsa/archive.fast/resources"));
}
diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawling/LinkParserTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawling/LinkParserTest.java
index 80c62153..d4a7e428 100644
--- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawling/LinkParserTest.java
+++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawling/LinkParserTest.java
@@ -11,9 +11,8 @@ import static org.junit.jupiter.api.Assertions.*;
class LinkParserTest {
- private String parseLink(String href, String base) throws URISyntaxException {
- var url = new EdgeUrl("http://www.marginalia.nu/" + base);
- var domain = url.domain;
+ private String parseLink(String href, String relBase) throws URISyntaxException {
+ var url = new EdgeUrl("http://www.marginalia.nu/" + relBase);
var parser = new LinkParser();
var stuff = Jsoup.parseBodyFragment("test");
var lnk = parser.parseLink(
@@ -43,6 +42,7 @@ class LinkParserTest {
void testAnchor() throws URISyntaxException {
assertNull(parseLink("#test", "/"));
}
+
@Test
void testRelative() throws URISyntaxException {
assertEquals("http://www.marginalia.nu/test", parseLink("../test", "/"));
@@ -51,4 +51,32 @@ class LinkParserTest {
assertEquals("http://www.marginalia.nu/test", parseLink("../test", "/foo/index.html"));
assertEquals("http://www.marginalia.nu/test", parseLink("/test", "/foo/index.html"));
}
+
+ private EdgeUrl getBaseUrl(String href, EdgeUrl documentUrl) {
+ LinkParser lp = new LinkParser();
+
+ return lp.getBaseLink(Jsoup.parse(""), documentUrl);
+ }
+
+ @Test
+ public void getBaseUrlTest() throws URISyntaxException {
+ assertEquals(new EdgeUrl("https://www.marginalia.nu/base"),
+ getBaseUrl("/base", new EdgeUrl("https://www.marginalia.nu/test/foo.bar")));
+
+ assertEquals(new EdgeUrl("https://memex.marginalia.nu/base"),
+ getBaseUrl("https://memex.marginalia.nu/base", new EdgeUrl("https://www.marginalia.nu/test/foo.bar")));
+
+ assertEquals(new EdgeUrl("https://www.marginalia.nu/test/base"),
+ getBaseUrl("base", new EdgeUrl("https://www.marginalia.nu/test/foo.bar")));
+ }
+
+ @Test
+ public void testParseBadBaseLink() throws URISyntaxException {
+ LinkParser lp = new LinkParser();
+ var url = new EdgeUrl("https://memex.marginalia.nu/");
+
+ assertEquals(url, lp.getBaseLink(Jsoup.parse(""), url));
+ assertEquals(url, lp.getBaseLink(Jsoup.parse(""), url));
+ assertEquals(url, lp.getBaseLink(Jsoup.parse(""), url));
+ }
}
\ No newline at end of file
From 8df48d1c6d112c5542238b81e32c69347e00c792 Mon Sep 17 00:00:00 2001
From: Viktor Lofgren
Date: Thu, 16 Jun 2022 14:15:54 +0200
Subject: [PATCH 2/3] Fix front page typo (#29)
Co-authored-by: vlofgren
Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/29
---
marginalia_nu/src/main/resources/static/edge/index.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/marginalia_nu/src/main/resources/static/edge/index.html b/marginalia_nu/src/main/resources/static/edge/index.html
index 166e67b8..47d6e314 100644
--- a/marginalia_nu/src/main/resources/static/edge/index.html
+++ b/marginalia_nu/src/main/resources/static/edge/index.html
@@ -88,7 +88,7 @@
theology,
the occult,
knitting,
- compter science,
+ computer science,
or art.
From 93c274f1d4e17a26969b05df50807235a2b27c5e Mon Sep 17 00:00:00 2001
From: vlofgren
Date: Fri, 8 Jul 2022 12:34:05 +0200
Subject: [PATCH 3/3] E2E-test for memex
---
.../nu/marginalia/wmsa/edge/E2ETestBase.java | 16 ++
.../nu/marginalia/wmsa/edge/MemexE2ETest.java | 95 ++++++++++
marginalia_nu/src/e2e/resources/init.sh | 1 +
marginalia_nu/src/e2e/resources/memex.sh | 39 +++++
.../src/e2e/resources/memex/index.gmi | 6 +
.../src/e2e/resources/memex/log/a.gmi | 7 +
.../src/e2e/resources/memex/log/b.gmi | 6 +
.../src/e2e/resources/memex/log/index.gmi | 7 +
.../src/e2e/resources/nginx/memex.conf | 27 +++
.../nu/marginalia/gemini/GeminiService.java | 163 +----------------
.../marginalia/gemini/GeminiServiceDummy.java | 10 ++
.../marginalia/gemini/GeminiServiceImpl.java | 164 ++++++++++++++++++
.../gemini/plugins/BareStaticPagePlugin.java | 7 +-
.../nu/marginalia/wmsa/auth/AuthService.java | 23 ++-
.../wmsa/configuration/ServiceDescriptor.java | 5 +-
.../configuration/command/StartCommand.java | 1 -
.../wmsa/configuration/server/Service.java | 48 +++--
.../java/nu/marginalia/wmsa/memex/Memex.java | 4 +-
.../wmsa/memex/MemexConfigurationModule.java | 44 ++++-
.../nu/marginalia/wmsa/memex/MemexMain.java | 2 +-
.../marginalia/wmsa/memex/MemexService.java | 17 +-
.../wmsa/memex/client/MemexApiClient.java | 2 +-
.../memex/system/MemexSourceFileSystem.java | 6 +-
.../wmsa/memex/system/git/MemexGitRepo.java | 15 ++
.../memex/system/git/MemexGitRepoDummy.java | 36 ++++
.../MemexGitRepoImpl.java} | 16 +-
.../wmsa/memex/change/GemtextChangeTest.java | 14 +-
.../memex/change/GemtextTaskUpdateTest.java | 14 +-
.../GemtextTombstoneUpdateCaclulatorTest.java | 14 +-
29 files changed, 579 insertions(+), 230 deletions(-)
create mode 100644 marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/MemexE2ETest.java
create mode 100644 marginalia_nu/src/e2e/resources/memex.sh
create mode 100644 marginalia_nu/src/e2e/resources/memex/index.gmi
create mode 100644 marginalia_nu/src/e2e/resources/memex/log/a.gmi
create mode 100644 marginalia_nu/src/e2e/resources/memex/log/b.gmi
create mode 100644 marginalia_nu/src/e2e/resources/memex/log/index.gmi
create mode 100644 marginalia_nu/src/e2e/resources/nginx/memex.conf
create mode 100644 marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceDummy.java
create mode 100644 marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceImpl.java
create mode 100644 marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepo.java
create mode 100644 marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepoDummy.java
rename marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/{MemexGitRepo.java => git/MemexGitRepoImpl.java} (90%)
diff --git a/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java
index 0c329a79..da40a7fc 100644
--- a/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java
+++ b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java
@@ -43,6 +43,22 @@ public abstract class E2ETestBase {
.withReadTimeout(Duration.ofSeconds(15)))
;
}
+ public static GenericContainer> forService(ServiceDescriptor service, GenericContainer> mariaDB, String setupScript) {
+ return new GenericContainer<>("openjdk:17-alpine")
+ .dependsOn(mariaDB)
+ .withCopyFileToContainer(jarFile(), "/WMSA.jar")
+ .withCopyFileToContainer(MountableFile.forClasspathResource(setupScript), "/" + setupScript)
+ .withExposedPorts(service.port)
+ .withFileSystemBind(modelsPath(), "/wmsa/model", BindMode.READ_ONLY)
+ .withNetwork(network)
+ .withNetworkAliases(service.name)
+ .withLogConsumer(new Slf4jLogConsumer(LoggerFactory.getLogger(service.name)))
+ .withCommand("sh", setupScript, service.name)
+ .waitingFor(Wait.forHttp("/internal/ping")
+ .forPort(service.port)
+ .withReadTimeout(Duration.ofSeconds(15)))
+ ;
+ }
public static MountableFile jarFile() {
Path cwd = Path.of(System.getProperty("user.dir"));
diff --git a/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/MemexE2ETest.java b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/MemexE2ETest.java
new file mode 100644
index 00000000..7410b3b3
--- /dev/null
+++ b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/MemexE2ETest.java
@@ -0,0 +1,95 @@
+package nu.marginalia.wmsa.edge;
+
+
+import com.google.gson.Gson;
+import com.google.gson.GsonBuilder;
+import okhttp3.OkHttpClient;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.mariadb.jdbc.Driver;
+import org.openqa.selenium.OutputType;
+import org.openqa.selenium.chrome.ChromeOptions;
+import org.slf4j.LoggerFactory;
+import org.testcontainers.containers.*;
+import org.testcontainers.containers.output.Slf4jLogConsumer;
+import org.testcontainers.junit.jupiter.Container;
+import org.testcontainers.junit.jupiter.Testcontainers;
+import org.testcontainers.utility.MountableFile;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.LocalDateTime;
+import java.util.concurrent.TimeUnit;
+
+import static nu.marginalia.wmsa.configuration.ServiceDescriptor.AUTH;
+import static nu.marginalia.wmsa.configuration.ServiceDescriptor.MEMEX;
+
+@Tag("e2e")
+@Testcontainers
+public class MemexE2ETest extends E2ETestBase {
+ @Container
+ public MariaDBContainer> mariaDB = getMariaDBContainer();
+
+ @Container
+ public GenericContainer> auth = forService(AUTH, mariaDB);
+
+ @Container
+ public GenericContainer> memexContainer = forService(MEMEX, mariaDB, "memex.sh")
+ .withClasspathResourceMapping("/memex", "/memex", BindMode.READ_ONLY);
+
+ @Container
+ public NginxContainer> proxyNginx = new NginxContainer<>("nginx:stable")
+ .dependsOn(auth)
+ .dependsOn(memexContainer)
+ .withLogConsumer(new Slf4jLogConsumer(LoggerFactory.getLogger("nginx")))
+ .withCopyFileToContainer(MountableFile.forClasspathResource("nginx/memex.conf"), "/etc/nginx/conf.d/default.conf")
+ .withNetwork(network)
+ .withNetworkAliases("proxyNginx");
+
+ @Container
+ public BrowserWebDriverContainer> chrome = new BrowserWebDriverContainer<>()
+ .withNetwork(network)
+ .withCapabilities(new ChromeOptions());
+
+ private Gson gson = new GsonBuilder().create();
+ private OkHttpClient httpClient = new OkHttpClient.Builder()
+ .connectTimeout(100, TimeUnit.MILLISECONDS)
+ .readTimeout(6000, TimeUnit.SECONDS)
+ .retryOnConnectionFailure(true)
+ .followRedirects(true)
+ .build();
+
+ @Test
+ public void run() throws IOException, InterruptedException {
+ Thread.sleep(10_000);
+ new Driver();
+
+ var driver = chrome.getWebDriver();
+
+ driver.get("http://proxyNginx/");
+ Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("frontpage"));
+
+ driver.get("http://proxyNginx/log/");
+ Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("log"));
+
+ driver.get("http://proxyNginx/log/a.gmi");
+ Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("log-a.gmi"));
+
+ driver.get("http://proxyNginx/log/b.gmi");
+ Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("log-b.gmi"));
+ }
+
+ private static Path screenshotFilename(String operation) throws IOException {
+ var path = Path.of(System.getProperty("user.dir")).resolve("build/test/e2e/");
+ Files.createDirectories(path);
+
+ String name = String.format("test-%s-%s.png", operation, LocalDateTime.now());
+ path = path.resolve(name);
+
+ System.out.println("Screenshot in " + path);
+ return path;
+ }
+
+
+}
diff --git a/marginalia_nu/src/e2e/resources/init.sh b/marginalia_nu/src/e2e/resources/init.sh
index 5409f787..2f9fa103 100644
--- a/marginalia_nu/src/e2e/resources/init.sh
+++ b/marginalia_nu/src/e2e/resources/init.sh
@@ -69,4 +69,5 @@ memex memex
dating dating
EOF
+echo "*** Starting $1"
WMSA_HOME=${HOME} java -Dsmall-ram=TRUE -Dservice-host=0.0.0.0 -jar /WMSA.jar start $1
\ No newline at end of file
diff --git a/marginalia_nu/src/e2e/resources/memex.sh b/marginalia_nu/src/e2e/resources/memex.sh
new file mode 100644
index 00000000..6ce801b5
--- /dev/null
+++ b/marginalia_nu/src/e2e/resources/memex.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+HOME=/wmsa
+
+mkdir -p ${HOME}/conf
+
+cat > ${HOME}/conf/db.properties < ${HOME}/conf/hosts < serve(connection));
- }
- }
- }
- catch (IOException ex) {
- logger.error("IO Exception in gemini server", ex);
- }
- }
-
- private void serve(SSLSocket socket) {
- final GeminiConnection connection;
- try {
- connection = new GeminiConnection(socket);
- }
- catch (IOException ex) {
- logger.error("Failed to create connection object", ex);
- return;
- }
-
- try {
- handleRequest(connection);
- }
- catch (GeminiUserException ex) {
- errorResponse(connection, ex.getMessage());
- }
- catch (SSLException ex) {
- logger.error(connection.getAddress() + " SSL error");
- connection.close();
- }
- catch (Exception ex) {
- errorResponse(connection, "Error");
- logger.error(connection.getAddress(), ex);
- }
- finally {
- connection.close();
- }
- }
-
- private void errorResponse(GeminiConnection connection, String message) {
- if (connection.isConnected()) {
- try {
- logger.error("=> " + connection.getAddress(), message);
- connection.writeStatusLine(GeminiStatusCode.ERROR_PERMANENT, message);
- }
- catch (IOException ex) {
- logger.error("Exception while sending error", ex);
- }
- }
- }
-
- private void handleRequest(GeminiConnection connection) throws Exception {
-
- final String address = connection.getAddress();
- logger.info("Connect: " + address);
-
- final Optional maybeUri = connection.readUrl();
- if (maybeUri.isEmpty()) {
- logger.info("Done: {}", address);
- return;
- }
-
- final URI uri = maybeUri.get();
- logger.info("Request {}", uri);
-
- if (!uri.getScheme().equals("gemini")) {
- throw new GeminiUserException("Unsupported protocol");
- }
-
- servePage(connection, uri);
- logger.info("Done: {}", address);
- }
-
- private void servePage(GeminiConnection connection, URI url) throws IOException {
- String path = url.getPath();
-
- for (Plugin p : plugins) {
- if (p.serve(url, connection)) {
- return;
- }
- }
-
- logger.error("FileNotFound {}", path);
- connection.writeStatusLine(GeminiStatusCode.ERROR_TEMPORARY, "No such file");
- }
-
+public interface GeminiService {
+ String DEFAULT_FILENAME = "index.gmi";
+ void run();
}
diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceDummy.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceDummy.java
new file mode 100644
index 00000000..81586f31
--- /dev/null
+++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceDummy.java
@@ -0,0 +1,10 @@
+package nu.marginalia.gemini;
+
+import com.google.inject.Singleton;
+
+@Singleton
+public class GeminiServiceDummy implements GeminiService {
+ @Override
+ public void run() {
+ }
+}
diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceImpl.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceImpl.java
new file mode 100644
index 00000000..0381be48
--- /dev/null
+++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceImpl.java
@@ -0,0 +1,164 @@
+package nu.marginalia.gemini;
+
+import com.google.inject.Inject;
+import com.google.inject.Singleton;
+import com.google.inject.name.Named;
+import nu.marginalia.gemini.io.GeminiConnection;
+import nu.marginalia.gemini.io.GeminiSSLSetUp;
+import nu.marginalia.gemini.io.GeminiStatusCode;
+import nu.marginalia.gemini.io.GeminiUserException;
+import nu.marginalia.gemini.plugins.BareStaticPagePlugin;
+import nu.marginalia.gemini.plugins.Plugin;
+import nu.marginalia.gemini.plugins.SearchPlugin;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import javax.net.ssl.SSLException;
+import javax.net.ssl.SSLServerSocket;
+import javax.net.ssl.SSLServerSocketFactory;
+import javax.net.ssl.SSLSocket;
+import java.io.IOException;
+import java.net.URI;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Optional;
+import java.util.concurrent.Executor;
+import java.util.concurrent.Executors;
+
+@Singleton
+public class GeminiServiceImpl implements GeminiService {
+
+ public final Path serverRoot;
+
+ private final Logger logger = LoggerFactory.getLogger(getClass().getSimpleName());
+ private final Executor pool = Executors.newFixedThreadPool(32);
+ private final SSLServerSocket serverSocket;
+
+ private final Plugin[] plugins;
+ private final BadBotList badBotList = BadBotList.INSTANCE;
+
+ @Inject
+ public GeminiServiceImpl(@Named("gemini-server-root") Path serverRoot,
+ @Named("gemini-server-port") Integer port,
+ GeminiSSLSetUp sslSetUp,
+ BareStaticPagePlugin pagePlugin,
+ SearchPlugin searchPlugin) throws Exception {
+ this.serverRoot = serverRoot;
+ logger.info("Setting up crypto");
+ final SSLServerSocketFactory socketFactory = sslSetUp.getServerSocketFactory();
+
+ serverSocket = (SSLServerSocket) socketFactory.createServerSocket(port /* 1965 */);
+ serverSocket.setEnabledCipherSuites(socketFactory.getSupportedCipherSuites());
+ serverSocket.setEnabledProtocols(new String[] {"TLSv1.3", "TLSv1.2"});
+
+ logger.info("Verifying setup");
+ if (!Files.exists(this.serverRoot)) {
+ logger.error("Could not find SERVER_ROOT {}", this.serverRoot);
+ System.exit(255);
+ }
+
+ plugins = new Plugin[] {
+ pagePlugin,
+ searchPlugin
+ };
+ }
+
+ @Override
+ public void run() {
+ logger.info("Awaiting connections");
+
+ try {
+ for (;;) {
+ SSLSocket connection = (SSLSocket) serverSocket.accept();
+ connection.setSoTimeout(10_000);
+
+ if (!badBotList.isAllowed(connection.getInetAddress())) {
+ connection.close();
+ } else {
+ pool.execute(() -> serve(connection));
+ }
+ }
+ }
+ catch (IOException ex) {
+ logger.error("IO Exception in gemini server", ex);
+ }
+ }
+
+ private void serve(SSLSocket socket) {
+ final GeminiConnection connection;
+ try {
+ connection = new GeminiConnection(socket);
+ }
+ catch (IOException ex) {
+ logger.error("Failed to create connection object", ex);
+ return;
+ }
+
+ try {
+ handleRequest(connection);
+ }
+ catch (GeminiUserException ex) {
+ errorResponse(connection, ex.getMessage());
+ }
+ catch (SSLException ex) {
+ logger.error(connection.getAddress() + " SSL error");
+ connection.close();
+ }
+ catch (Exception ex) {
+ errorResponse(connection, "Error");
+ logger.error(connection.getAddress(), ex);
+ }
+ finally {
+ connection.close();
+ }
+ }
+
+ private void errorResponse(GeminiConnection connection, String message) {
+ if (connection.isConnected()) {
+ try {
+ logger.error("=> " + connection.getAddress(), message);
+ connection.writeStatusLine(GeminiStatusCode.ERROR_PERMANENT, message);
+ }
+ catch (IOException ex) {
+ logger.error("Exception while sending error", ex);
+ }
+ }
+ }
+
+ private void handleRequest(GeminiConnection connection) throws Exception {
+
+ final String address = connection.getAddress();
+ logger.info("Connect: " + address);
+
+ final Optional maybeUri = connection.readUrl();
+ if (maybeUri.isEmpty()) {
+ logger.info("Done: {}", address);
+ return;
+ }
+
+ final URI uri = maybeUri.get();
+ logger.info("Request {}", uri);
+
+ if (!uri.getScheme().equals("gemini")) {
+ throw new GeminiUserException("Unsupported protocol");
+ }
+
+ servePage(connection, uri);
+ logger.info("Done: {}", address);
+ }
+
+ private void servePage(GeminiConnection connection, URI url) throws IOException {
+ String path = url.getPath();
+
+ for (Plugin p : plugins) {
+ if (p.serve(url, connection)) {
+ return;
+ }
+ }
+
+ logger.error("FileNotFound {}", path);
+ connection.writeStatusLine(GeminiStatusCode.ERROR_TEMPORARY, "No such file");
+ }
+
+
+}
diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/BareStaticPagePlugin.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/BareStaticPagePlugin.java
index fbfb502b..46bdfb7d 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/BareStaticPagePlugin.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/BareStaticPagePlugin.java
@@ -2,6 +2,7 @@ package nu.marginalia.gemini.plugins;
import com.google.inject.Inject;
import com.google.inject.name.Named;
+import nu.marginalia.gemini.GeminiService;
import nu.marginalia.gemini.io.GeminiConnection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -11,8 +12,6 @@ import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
-import static nu.marginalia.gemini.GeminiService.DEFAULT_FILENAME;
-
public class BareStaticPagePlugin implements Plugin {
private final Logger logger = LoggerFactory.getLogger(getClass());
@@ -43,8 +42,8 @@ public class BareStaticPagePlugin implements Plugin {
private Path getServerPath(String requestPath) {
final Path serverPath = Path.of(geminiServerRoot + requestPath);
- if (Files.isDirectory(serverPath) && Files.isRegularFile(serverPath.resolve(DEFAULT_FILENAME))) {
- return serverPath.resolve(DEFAULT_FILENAME);
+ if (Files.isDirectory(serverPath) && Files.isRegularFile(serverPath.resolve(GeminiService.DEFAULT_FILENAME))) {
+ return serverPath.resolve(GeminiService.DEFAULT_FILENAME);
}
return serverPath;
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthService.java
index 60c22b9b..4c93db95 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthService.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthService.java
@@ -1,6 +1,5 @@
package nu.marginalia.wmsa.auth;
-import com.github.jknack.handlebars.internal.Files;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import nu.marginalia.wmsa.auth.model.LoginFormModel;
@@ -14,11 +13,12 @@ import spark.Request;
import spark.Response;
import spark.Spark;
-import java.io.FileReader;
import java.io.IOException;
+import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Objects;
import java.util.Optional;
+import java.util.UUID;
import static spark.Spark.*;
@@ -40,11 +40,8 @@ public class AuthService extends Service {
super(ip, port, initialization, metricsServer);
- try (var is = new FileReader(topSecretPasswordFile.toFile())) {
- password = Files.read(is);
- } catch (IOException e) {
- logger.error("Could not read password from file " + topSecretPasswordFile, e);
- }
+ password = initPassword(topSecretPasswordFile);
+
loginFormRenderer = rendererFactory.renderer("auth/login");
Spark.path("public/api", () -> {
@@ -60,6 +57,18 @@ public class AuthService extends Service {
});
}
+ private String initPassword(Path topSecretPasswordFile) {
+ if (Files.exists(topSecretPasswordFile)) {
+ try {
+ return Files.readString(topSecretPasswordFile);
+ } catch (IOException e) {
+ logger.error("Could not read password from file " + topSecretPasswordFile, e);
+ }
+ }
+ logger.error("Setting random password");
+ return UUID.randomUUID().toString();
+ }
+
private Object loginForm(Request request, Response response) {
String redir = Objects.requireNonNull(request.queryParams("redirect"));
String service = Objects.requireNonNull(request.queryParams("service"));
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/ServiceDescriptor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/ServiceDescriptor.java
index e0aff247..c0f7dde2 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/ServiceDescriptor.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/ServiceDescriptor.java
@@ -1,7 +1,7 @@
package nu.marginalia.wmsa.configuration;
-import nu.marginalia.wmsa.auth.AuthMain;
import nu.marginalia.wmsa.api.ApiMain;
+import nu.marginalia.wmsa.auth.AuthMain;
import nu.marginalia.wmsa.configuration.command.Command;
import nu.marginalia.wmsa.configuration.command.ListCommand;
import nu.marginalia.wmsa.configuration.command.StartCommand;
@@ -35,7 +35,7 @@ public enum ServiceDescriptor {
EDGE_SEARCH("edge-search", 5023, EdgeSearchMain.class),
EDGE_ASSISTANT("edge-assistant", 5025, EdgeAssistantMain.class),
- EDGE_MEMEX("memex", 5030, MemexMain.class),
+ MEMEX("memex", 5030, MemexMain.class),
ENCYCLOPEDIA("encyclopedia", 5040, EncyclopediaMain.class),
@@ -79,7 +79,6 @@ public enum ServiceDescriptor {
}
public static void main(String... args) {
-
MainMapLookup.setMainArguments(args);
Map functions = Stream.of(new ListCommand(),
new StartCommand(),
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/StartCommand.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/StartCommand.java
index 55d46813..cb63d749 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/StartCommand.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/StartCommand.java
@@ -16,7 +16,6 @@ public class StartCommand extends Command {
System.err.println("Usage: start service-descriptor");
System.exit(255);
}
-
var mainMethod = getKind(args[1]).mainClass.getMethod("main", String[].class);
String[] args2 = Arrays.copyOfRange(args, 2, args.length);
mainMethod.invoke(null, (Object) args2);
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Service.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Service.java
index c9f618da..9674611f 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Service.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Service.java
@@ -37,7 +37,7 @@ public class Service {
private static volatile boolean initialized = false;
- public Service(String ip, int port, Initialization initialization, MetricsServer metricsServer) {
+ public Service(String ip, int port, Initialization initialization, MetricsServer metricsServer, Runnable configureStaticFiles) {
this.initialization = initialization;
serviceName = System.getProperty("service-name");
@@ -51,8 +51,7 @@ public class Service {
logger.info("{} Listening to {}:{}", getClass().getSimpleName(), ip == null ? "" : ip, port);
- Spark.staticFiles.expireTime(3600);
- Spark.staticFiles.header("Cache-control", "public");
+ configureStaticFiles.run();
Spark.before(this::filterPublicRequests);
Spark.before(this::auditRequestIn);
@@ -66,24 +65,35 @@ public class Service {
}
}
+ public Service(String ip, int port, Initialization initialization, MetricsServer metricsServer) {
+ this(ip, port, initialization, metricsServer, () -> {
+ // configureStaticFiles can't be an overridable method in Service because it may
+ // need to depend on parameters to the constructor, and super-constructors
+ // must run first
+ Spark.staticFiles.expireTime(3600);
+ Spark.staticFiles.header("Cache-control", "public");
+ });
+ }
+
private void filterPublicRequests(Request request, Response response) {
- if (null != request.headers("X-Public")) {
-
- String context = Optional
- .ofNullable(request.headers("X-Context"))
- .orElseGet(request::ip);
-
- if (!request.pathInfo().startsWith("/public/")) {
- logger.warn(httpMarker, "External connection to internal API: {} -> {} {}", context, request.requestMethod(), request.pathInfo());
- Spark.halt(HttpStatus.SC_FORBIDDEN);
- }
-
- String url = request.pathInfo();
- if (request.queryString() != null) {
- url = url + "?" + request.queryString();
- }
- logger.info(httpMarker, "PUBLIC {}: {} {}", Context.fromRequest(request).getIpHash().orElse("?"), request.requestMethod(), url);
+ if (null == request.headers("X-Public")) {
+ return;
}
+
+ String context = Optional
+ .ofNullable(request.headers("X-Context"))
+ .orElseGet(request::ip);
+
+ if (!request.pathInfo().startsWith("/public/")) {
+ logger.warn(httpMarker, "External connection to internal API: {} -> {} {}", context, request.requestMethod(), request.pathInfo());
+ Spark.halt(HttpStatus.SC_FORBIDDEN);
+ }
+
+ String url = request.pathInfo();
+ if (request.queryString() != null) {
+ url = url + "?" + request.queryString();
+ }
+ logger.info(httpMarker, "PUBLIC {}: {} {}", Context.fromRequest(request).getIpHash().orElse("?"), request.requestMethod(), url);
}
private Object isInitialized(Request request, Response response) {
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/Memex.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/Memex.java
index 5b5ac2f7..febdc5af 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/Memex.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/Memex.java
@@ -6,9 +6,9 @@ import com.google.inject.name.Named;
import io.reactivex.rxjava3.schedulers.Schedulers;
import nu.marginalia.gemini.GeminiService;
import nu.marginalia.gemini.gmi.GemtextDatabase;
+import nu.marginalia.gemini.gmi.GemtextDocument;
import nu.marginalia.util.graphics.dithering.FloydSteinbergDither;
import nu.marginalia.util.graphics.dithering.Palettes;
-import nu.marginalia.gemini.gmi.GemtextDocument;
import nu.marginalia.wmsa.memex.change.GemtextTombstoneUpdateCaclulator;
import nu.marginalia.wmsa.memex.model.MemexImage;
import nu.marginalia.wmsa.memex.model.MemexNode;
@@ -16,7 +16,7 @@ import nu.marginalia.wmsa.memex.model.MemexNodeUrl;
import nu.marginalia.wmsa.memex.renderer.MemexRendererers;
import nu.marginalia.wmsa.memex.system.MemexFileSystemMonitor;
import nu.marginalia.wmsa.memex.system.MemexFileWriter;
-import nu.marginalia.wmsa.memex.system.MemexGitRepo;
+import nu.marginalia.wmsa.memex.system.git.MemexGitRepo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexConfigurationModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexConfigurationModule.java
index 676ebc05..2533a9d1 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexConfigurationModule.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexConfigurationModule.java
@@ -5,23 +5,59 @@ import com.google.inject.Inject;
import com.google.inject.Provider;
import com.google.inject.name.Named;
import com.google.inject.name.Names;
+import lombok.SneakyThrows;
+import nu.marginalia.gemini.GeminiService;
+import nu.marginalia.gemini.GeminiServiceDummy;
+import nu.marginalia.gemini.GeminiServiceImpl;
import nu.marginalia.wmsa.memex.system.MemexFileWriter;
+import nu.marginalia.wmsa.memex.system.git.MemexGitRepo;
+import nu.marginalia.wmsa.memex.system.git.MemexGitRepoDummy;
+import nu.marginalia.wmsa.memex.system.git.MemexGitRepoImpl;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.nio.file.Path;
public class MemexConfigurationModule extends AbstractModule {
+ private static final Logger logger = LoggerFactory.getLogger(MemexConfigurationModule.class);
+
+ private static final String MEMEX_ROOT_PROPERTY = System.getProperty("memex-root", "/var/lib/wmsa/memex");
+ private static final String MEMEX_HTML_PROPERTY = System.getProperty("memex-html-resources", "/var/lib/wmsa/memex-html");
+ private static final String MEMEX_GMI_PROPERTY = System.getProperty("memex-gmi-resources", "/var/lib/wmsa/memex-gmi");
+
+ private static final boolean MEMEX_DISABLE_GIT = Boolean.getBoolean("memex-disable-git");
+ private static final boolean MEMEX_DISABLE_GEMINI = Boolean.getBoolean("memex-disable-gemini");
+
+ @SneakyThrows
+ public MemexConfigurationModule() {
+ Thread.sleep(100);
+ }
+
public void configure() {
- bind(Path.class).annotatedWith(Names.named("memex-root")).toInstance(Path.of("/var/lib/wmsa/memex"));
- bind(Path.class).annotatedWith(Names.named("memex-html-resources")).toInstance(Path.of("/var/lib/wmsa/memex-html"));
- bind(Path.class).annotatedWith(Names.named("memex-gmi-resources")).toInstance(Path.of("/var/lib/wmsa/memex-gmi"));
+ bind(Path.class).annotatedWith(Names.named("memex-root")).toInstance(Path.of(MEMEX_ROOT_PROPERTY));
+ bind(Path.class).annotatedWith(Names.named("memex-html-resources")).toInstance(Path.of(MEMEX_HTML_PROPERTY));
+ bind(Path.class).annotatedWith(Names.named("memex-gmi-resources")).toInstance(Path.of(MEMEX_GMI_PROPERTY));
+
bind(String.class).annotatedWith(Names.named("tombestone-special-file")).toInstance("/special/tombstone.gmi");
bind(String.class).annotatedWith(Names.named("redirects-special-file")).toInstance("/special/redirect.gmi");
+ switchImpl(MemexGitRepo.class, MEMEX_DISABLE_GIT, MemexGitRepoDummy.class, MemexGitRepoImpl.class);
+ switchImpl(GeminiService.class, MEMEX_DISABLE_GEMINI, GeminiServiceDummy.class, GeminiServiceImpl.class);
+
bind(MemexFileWriter.class).annotatedWith(Names.named("html")).toProvider(MemexHtmlWriterProvider.class);
bind(MemexFileWriter.class).annotatedWith(Names.named("gmi")).toProvider(MemexGmiWriterProvider.class);
}
-
+ void switchImpl(Class impl, boolean param, Class extends T> ifEnabled, Class extends T> ifDisabled) {
+ final Class extends T> choice;
+ if (param) {
+ choice = ifEnabled;
+ }
+ else {
+ choice = ifDisabled;
+ }
+ bind(impl).to(choice).asEagerSingleton();
+ }
public static class MemexHtmlWriterProvider implements Provider {
private final Path path;
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexMain.java
index e58848d2..f46ce4d1 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexMain.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexMain.java
@@ -18,7 +18,7 @@ public class MemexMain extends MainClass {
}
public static void main(String... args) {
- init(ServiceDescriptor.EDGE_MEMEX, args);
+ init(ServiceDescriptor.MEMEX, args);
Injector injector = Guice.createInjector(
new MemexConfigurationModule(),
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexService.java
index 4d22f1af..16440960 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexService.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexService.java
@@ -3,6 +3,7 @@ package nu.marginalia.wmsa.memex;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import lombok.SneakyThrows;
+import nu.marginalia.gemini.gmi.GemtextDocument;
import nu.marginalia.gemini.gmi.renderer.GemtextRendererFactory;
import nu.marginalia.wmsa.auth.client.AuthClient;
import nu.marginalia.wmsa.configuration.server.Context;
@@ -10,12 +11,11 @@ import nu.marginalia.wmsa.configuration.server.Initialization;
import nu.marginalia.wmsa.configuration.server.MetricsServer;
import nu.marginalia.wmsa.configuration.server.Service;
import nu.marginalia.wmsa.memex.change.GemtextMutation;
-import nu.marginalia.gemini.gmi.GemtextDocument;
import nu.marginalia.wmsa.memex.change.update.GemtextDocumentUpdateCalculator;
-import nu.marginalia.wmsa.memex.renderer.MemexHtmlRenderer;
import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId;
import nu.marginalia.wmsa.memex.model.MemexNodeUrl;
import nu.marginalia.wmsa.memex.model.render.*;
+import nu.marginalia.wmsa.memex.renderer.MemexHtmlRenderer;
import org.apache.http.HttpStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -49,9 +49,18 @@ public class MemexService extends Service {
MemexHtmlRenderer renderer,
AuthClient authClient,
Initialization initialization,
- MetricsServer metricsServer) {
+ MetricsServer metricsServer,
+ @Named("memex-html-resources") Path memexHtmlDir
+ ) {
- super(ip, port, initialization, metricsServer);
+ super(ip, port, initialization, metricsServer, () -> {
+ staticFiles.externalLocation(memexHtmlDir.toString());
+ staticFiles.disableMimeTypeGuessing();
+ staticFiles.registerMimeType("gmi", "text/html");
+ staticFiles.registerMimeType("png", "text/html");
+ staticFiles.expireTime(60);
+ staticFiles.header("Cache-control", "public,proxy-revalidate");
+ });
this.updateCalculator = updateCalculator;
this.memex = memex;
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/client/MemexApiClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/client/MemexApiClient.java
index b98b34c7..b038637d 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/client/MemexApiClient.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/client/MemexApiClient.java
@@ -8,7 +8,7 @@ import nu.marginalia.wmsa.configuration.ServiceDescriptor;
public class MemexApiClient extends AbstractDynamicClient {
@Inject
public MemexApiClient() {
- super(ServiceDescriptor.EDGE_MEMEX);
+ super(ServiceDescriptor.MEMEX);
}
}
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexSourceFileSystem.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexSourceFileSystem.java
index c72e2383..9d165272 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexSourceFileSystem.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexSourceFileSystem.java
@@ -4,11 +4,15 @@ import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.google.inject.name.Named;
import nu.marginalia.wmsa.memex.model.MemexNodeUrl;
+import nu.marginalia.wmsa.memex.system.git.MemexGitRepo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
-import java.nio.file.*;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.nio.file.StandardOpenOption;
@Singleton
public class MemexSourceFileSystem {
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepo.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepo.java
new file mode 100644
index 00000000..d4e55491
--- /dev/null
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepo.java
@@ -0,0 +1,15 @@
+package nu.marginalia.wmsa.memex.system.git;
+
+import nu.marginalia.wmsa.memex.model.MemexNodeUrl;
+
+public interface MemexGitRepo {
+ void pull();
+
+ void remove(MemexNodeUrl url);
+
+ void add(MemexNodeUrl url);
+
+ void update(MemexNodeUrl url);
+
+ void rename(MemexNodeUrl src, MemexNodeUrl dst);
+}
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepoDummy.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepoDummy.java
new file mode 100644
index 00000000..4d5116ff
--- /dev/null
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepoDummy.java
@@ -0,0 +1,36 @@
+package nu.marginalia.wmsa.memex.system.git;
+
+import com.google.inject.Singleton;
+import nu.marginalia.wmsa.memex.model.MemexNodeUrl;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+@Singleton
+public class MemexGitRepoDummy implements MemexGitRepo {
+ private static final Logger logger = LoggerFactory.getLogger(MemexGitRepoDummy.class);
+
+ @Override
+ public void pull() {
+ logger.info("Would perform a pull here");
+ }
+
+ @Override
+ public void remove(MemexNodeUrl url) {
+ logger.info("Would perform a remove here");
+ }
+
+ @Override
+ public void add(MemexNodeUrl url) {
+ logger.info("Would perform an add here");
+ }
+
+ @Override
+ public void update(MemexNodeUrl url) {
+ logger.info("Would perform an update here");
+ }
+
+ @Override
+ public void rename(MemexNodeUrl src, MemexNodeUrl dst) {
+ logger.info("Would perform a rename here");
+ }
+}
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexGitRepo.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepoImpl.java
similarity index 90%
rename from marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexGitRepo.java
rename to marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepoImpl.java
index 05ca6603..10c72060 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexGitRepo.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepoImpl.java
@@ -1,4 +1,4 @@
-package nu.marginalia.wmsa.memex.system;
+package nu.marginalia.wmsa.memex.system.git;
import com.google.inject.Inject;
import com.google.inject.Singleton;
@@ -10,7 +10,8 @@ import org.eclipse.jgit.api.Git;
import org.eclipse.jgit.api.errors.GitAPIException;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.storage.file.FileRepositoryBuilder;
-import org.eclipse.jgit.transport.*;
+import org.eclipse.jgit.transport.JschConfigSessionFactory;
+import org.eclipse.jgit.transport.SshSessionFactory;
import org.eclipse.jgit.util.FS;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -19,13 +20,13 @@ import java.io.IOException;
import java.nio.file.Path;
@Singleton
-public class MemexGitRepo {
+public class MemexGitRepoImpl implements MemexGitRepo {
private final Git git;
- private final Logger logger = LoggerFactory.getLogger(MemexGitRepo.class);
+ private final Logger logger = LoggerFactory.getLogger(MemexGitRepoImpl.class);
@Inject
- public MemexGitRepo(@Named("memex-root") Path root) throws IOException {
+ public MemexGitRepoImpl(@Named("memex-root") Path root) throws IOException {
FileRepositoryBuilder repositoryBuilder = new FileRepositoryBuilder();
@@ -49,6 +50,7 @@ public class MemexGitRepo {
pull();
}
+ @Override
public void pull() {
try {
git.pull().call();
@@ -58,6 +60,7 @@ public class MemexGitRepo {
}
}
+ @Override
public void remove(MemexNodeUrl url) {
try {
git.rm()
@@ -72,6 +75,7 @@ public class MemexGitRepo {
}
}
+ @Override
public void add(MemexNodeUrl url) {
try {
git.add()
@@ -87,6 +91,7 @@ public class MemexGitRepo {
logger.error("Git operation failed", ex);
}
}
+ @Override
public void update(MemexNodeUrl url) {
try {
git.add()
@@ -105,6 +110,7 @@ public class MemexGitRepo {
}
+ @Override
public void rename(MemexNodeUrl src, MemexNodeUrl dst) {
try {
git.rm().addFilepattern(filePattern(src)).call();
diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextChangeTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextChangeTest.java
index 9699bcf9..e3e670c7 100644
--- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextChangeTest.java
+++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextChangeTest.java
@@ -2,16 +2,18 @@ package nu.marginalia.wmsa.memex.change;
import io.reactivex.rxjava3.plugins.RxJavaPlugins;
import lombok.SneakyThrows;
-import nu.marginalia.gemini.GeminiService;
+import nu.marginalia.gemini.GeminiServiceImpl;
import nu.marginalia.util.test.TestUtil;
-import nu.marginalia.wmsa.memex.*;
+import nu.marginalia.wmsa.memex.Memex;
+import nu.marginalia.wmsa.memex.MemexData;
+import nu.marginalia.wmsa.memex.MemexLoader;
import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId;
import nu.marginalia.wmsa.memex.model.MemexNodeUrl;
import nu.marginalia.wmsa.memex.renderer.MemexRendererers;
import nu.marginalia.wmsa.memex.system.MemexFileSystemModifiedTimes;
import nu.marginalia.wmsa.memex.system.MemexFileWriter;
-import nu.marginalia.wmsa.memex.system.MemexGitRepo;
import nu.marginalia.wmsa.memex.system.MemexSourceFileSystem;
+import nu.marginalia.wmsa.memex.system.git.MemexGitRepoImpl;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
@@ -61,13 +63,13 @@ class GemtextChangeTest {
var data = new MemexData();
memex = new Memex(data, null,
- Mockito.mock(MemexGitRepo.class), new MemexLoader(data, new MemexFileSystemModifiedTimes(),
- new MemexSourceFileSystem(tempDir, Mockito.mock(MemexGitRepo.class)),
+ Mockito.mock(MemexGitRepoImpl.class), new MemexLoader(data, new MemexFileSystemModifiedTimes(),
+ new MemexSourceFileSystem(tempDir, Mockito.mock(MemexGitRepoImpl.class)),
tempDir, tombstonePath, redirectPath),
Mockito.mock(MemexFileWriter.class),
null,
Mockito.mock(MemexRendererers.class),
- Mockito.mock(GeminiService.class));
+ Mockito.mock(GeminiServiceImpl.class));
}
@SneakyThrows
diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTaskUpdateTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTaskUpdateTest.java
index 8aefc613..d80d32eb 100644
--- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTaskUpdateTest.java
+++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTaskUpdateTest.java
@@ -2,18 +2,20 @@ package nu.marginalia.wmsa.memex.change;
import io.reactivex.rxjava3.plugins.RxJavaPlugins;
import lombok.SneakyThrows;
-import nu.marginalia.gemini.GeminiService;
+import nu.marginalia.gemini.GeminiServiceImpl;
import nu.marginalia.gemini.gmi.GemtextDocument;
import nu.marginalia.util.test.TestUtil;
-import nu.marginalia.wmsa.memex.*;
+import nu.marginalia.wmsa.memex.Memex;
+import nu.marginalia.wmsa.memex.MemexData;
+import nu.marginalia.wmsa.memex.MemexLoader;
import nu.marginalia.wmsa.memex.change.update.GemtextDocumentUpdateCalculator;
import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId;
import nu.marginalia.wmsa.memex.model.MemexNodeUrl;
import nu.marginalia.wmsa.memex.renderer.MemexRendererers;
import nu.marginalia.wmsa.memex.system.MemexFileSystemModifiedTimes;
import nu.marginalia.wmsa.memex.system.MemexFileWriter;
-import nu.marginalia.wmsa.memex.system.MemexGitRepo;
import nu.marginalia.wmsa.memex.system.MemexSourceFileSystem;
+import nu.marginalia.wmsa.memex.system.git.MemexGitRepoImpl;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
@@ -67,12 +69,12 @@ class GemtextTaskUpdateTest {
Files.createDirectory(tempDir.resolve("special"));
var data = new MemexData();
- memex = new Memex(data, null, Mockito.mock(MemexGitRepo.class), new MemexLoader(data, new MemexFileSystemModifiedTimes(),
- new MemexSourceFileSystem(tempDir, Mockito.mock(MemexGitRepo.class)), tempDir, tombstonePath, redirectPath),
+ memex = new Memex(data, null, Mockito.mock(MemexGitRepoImpl.class), new MemexLoader(data, new MemexFileSystemModifiedTimes(),
+ new MemexSourceFileSystem(tempDir, Mockito.mock(MemexGitRepoImpl.class)), tempDir, tombstonePath, redirectPath),
Mockito.mock(MemexFileWriter.class),
null,
Mockito.mock(MemexRendererers.class),
- Mockito.mock(GeminiService.class));
+ Mockito.mock(GeminiServiceImpl.class));
}
@SneakyThrows
diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTombstoneUpdateCaclulatorTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTombstoneUpdateCaclulatorTest.java
index bfe3b104..51120654 100644
--- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTombstoneUpdateCaclulatorTest.java
+++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTombstoneUpdateCaclulatorTest.java
@@ -2,15 +2,17 @@ package nu.marginalia.wmsa.memex.change;
import io.reactivex.rxjava3.plugins.RxJavaPlugins;
import lombok.SneakyThrows;
-import nu.marginalia.gemini.GeminiService;
+import nu.marginalia.gemini.GeminiServiceImpl;
import nu.marginalia.util.test.TestUtil;
-import nu.marginalia.wmsa.memex.*;
+import nu.marginalia.wmsa.memex.Memex;
+import nu.marginalia.wmsa.memex.MemexData;
+import nu.marginalia.wmsa.memex.MemexLoader;
import nu.marginalia.wmsa.memex.model.MemexNodeUrl;
import nu.marginalia.wmsa.memex.renderer.MemexRendererers;
import nu.marginalia.wmsa.memex.system.MemexFileSystemModifiedTimes;
import nu.marginalia.wmsa.memex.system.MemexFileWriter;
-import nu.marginalia.wmsa.memex.system.MemexGitRepo;
import nu.marginalia.wmsa.memex.system.MemexSourceFileSystem;
+import nu.marginalia.wmsa.memex.system.git.MemexGitRepoImpl;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.BeforeEach;
@@ -64,13 +66,13 @@ class GemtextTombstoneUpdateCaclulatorTest {
var data = new MemexData();
memex = new Memex(data, null,
- Mockito.mock(MemexGitRepo.class),
+ Mockito.mock(MemexGitRepoImpl.class),
new MemexLoader(data, new MemexFileSystemModifiedTimes(),
- new MemexSourceFileSystem(tempDir, Mockito.mock(MemexGitRepo.class)), tempDir, tombstonePath, redirectPath),
+ new MemexSourceFileSystem(tempDir, Mockito.mock(MemexGitRepoImpl.class)), tempDir, tombstonePath, redirectPath),
Mockito.mock(MemexFileWriter.class),
updateCaclulator,
Mockito.mock(MemexRendererers.class),
- Mockito.mock(GeminiService.class));
+ Mockito.mock(GeminiServiceImpl.class));
}
@SneakyThrows