From a3a6b40cc3e67e6273f939dfea73322868b4926d Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Wed, 15 Jun 2022 16:54:27 +0200 Subject: [PATCH 1/3] Changes to crawler (#28) Co-authored-by: vlofgren Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/28 --- README.md | 9 ++- .../nu/marginalia/wmsa/edge/E2ETestBase.java | 2 +- marginalia_nu/src/e2e/resources/crawl.sh | 2 + marginalia_nu/src/e2e/resources/init.sh | 33 ++++++---- .../wmsa/configuration/UserAgent.java | 5 ++ .../wmsa/configuration/WebsiteUrl.java | 7 +++ .../wmsa/configuration/WmsaHome.java | 60 +++++++++++++++---- .../module/ConfigurationModule.java | 30 ++-------- .../module/HostnameProvider.java | 36 ----------- .../configuration/module/PortProvider.java | 46 -------------- .../edge/assistant/EdgeAssistantModule.java | 13 ++-- .../wmsa/edge/converting/ConverterModule.java | 10 +--- .../processor/DocumentProcessor.java | 17 +++--- .../processor/logic/LinkParser.java | 34 +++++++++-- .../wmsa/edge/crawling/CrawlerMain.java | 6 +- .../crawling/retreival/CrawlerRetreiver.java | 14 +++-- .../wmsa/edge/index/EdgeTablesModule.java | 9 +-- .../wmsa/edge/model/EdgeDomain.java | 1 + .../marginalia/wmsa/edge/model/EdgeUrl.java | 9 +-- .../wmsa/edge/search/EdgeSearchModule.java | 15 ++--- .../wmsa/edge/search/EdgeSearchService.java | 12 ++-- .../resource_store/ResourceStoreModule.java | 1 - .../wmsa/edge/crawling/LinkParserTest.java | 34 ++++++++++- 23 files changed, 203 insertions(+), 202 deletions(-) create mode 100644 marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/UserAgent.java create mode 100644 marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/WebsiteUrl.java delete mode 100644 marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/HostnameProvider.java delete mode 100644 marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/PortProvider.java diff --git a/README.md b/README.md index cfe88bc9..927fff6f 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,8 @@ the [MEMEX/gemini server](https://memex.marginalia.nu), the and the [encyclopedi The aim of the project is to develop new and alternative discovery methods for the Internet. It's an experimental workshop as much as it is a public service, the overarching goal is to -elevate the more human, non-commercial sides of the Internet. +elevate the more human, non-commercial sides of the Internet. A side-goal is to do this without +requiring datacenters and expensive enterprise hardware, to run this operation on affordable hardware. The canonical git server for this project is [https://git.marginalia.nu](https://git.marginalia.nu). It is fine to mirror it on other hosts, but if you have issues or questions @@ -16,6 +17,10 @@ it wasn't developed with the intention of going open source, a lot of tests and so on make assumptions about the directory structure, much configuration is hard coded and so on. Please stand by. A lot of the mess is fairly superficial. +## Documentation + +Documentation is a work in progress. See the [wiki](https://git.marginalia.nu/marginalia/marginalia.nu/wiki). + ## Contributing The project is still being set up, but if you are interested in contributing, please contact me. @@ -26,4 +31,4 @@ Consider [supporting this project](https://memex.marginalia.nu/projects/edge/sup ## Contact -You can email with any questions or feedback. \ No newline at end of file +You can email with any questions or feedback. diff --git a/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java index 769eca40..0c329a79 100644 --- a/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java +++ b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java @@ -33,7 +33,7 @@ public abstract class E2ETestBase { .withCopyFileToContainer(jarFile(), "/WMSA.jar") .withCopyFileToContainer(MountableFile.forClasspathResource("init.sh"), "/init.sh") .withExposedPorts(service.port) - .withFileSystemBind(modelsPath(), "/var/lib/wmsa/model", BindMode.READ_ONLY) + .withFileSystemBind(modelsPath(), "/wmsa/model", BindMode.READ_ONLY) .withNetwork(network) .withNetworkAliases(service.name) .withLogConsumer(new Slf4jLogConsumer(LoggerFactory.getLogger(service.name))) diff --git a/marginalia_nu/src/e2e/resources/crawl.sh b/marginalia_nu/src/e2e/resources/crawl.sh index 3a0e4b01..16d43fab 100644 --- a/marginalia_nu/src/e2e/resources/crawl.sh +++ b/marginalia_nu/src/e2e/resources/crawl.sh @@ -3,6 +3,8 @@ mkdir -p /var/lib/wmsa/conf/ mkdir -p /var/lib/wmsa/data/ +echo "search.marginalia.nu" > /var/lib/wmsa/conf/user-agent + cat > /var/lib/wmsa/conf/db.properties < /var/lib/wmsa/suggestions.txt < ${HOME}/suggestions.txt < /var/lib/wmsa/conf/disks.properties < ${HOME}/conf/disks.properties < /var/lib/wmsa/conf/db.properties < ${HOME}/conf/db.properties < /var/lib/wmsa/conf/ranking-settings.yaml < ${HOME}/conf/ranking-settings.yaml < /var/lib/wmsa/conf/hosts < ${HOME}/conf/hosts < { - private static final String DEFAULT_HOSTNAME = "127.0.0.1"; - private final int monitorPort; - private final String monitorHost; - private final int timeout; - private final Logger logger = LoggerFactory.getLogger(getClass()); - - @Inject - public HostnameProvider(@Named("monitor-port") Integer monitorPort, - @Named("monitor-host") String monitorHost, - @Named("monitor-boot-timeout") Integer timeout - ) { - this.monitorHost = monitorHost; - this.monitorPort = monitorPort; - this.timeout = timeout; - } - - @Override - public String get() { - var override = System.getProperty("service-host"); - if (null != override) { - return override; - } - return DEFAULT_HOSTNAME; - } - -} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/PortProvider.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/PortProvider.java deleted file mode 100644 index 7286aa68..00000000 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/module/PortProvider.java +++ /dev/null @@ -1,46 +0,0 @@ -package nu.marginalia.wmsa.configuration.module; - -import com.google.inject.name.Named; -import io.reactivex.rxjava3.core.Flowable; -import nu.marginalia.wmsa.configuration.ServiceDescriptor; -import org.apache.http.HttpResponse; -import org.reactivestreams.Publisher; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.inject.Inject; -import javax.inject.Provider; -import java.io.IOException; -import java.util.concurrent.TimeUnit; - -public class PortProvider implements Provider { - private static final Integer DEFAULT_PORT = 5000; - private final int monitorPort; - private final String monitorHost; - private final Logger logger = LoggerFactory.getLogger(getClass()); - private final int timeout = 10; - @Inject - public PortProvider(@Named("monitor-port") Integer monitorPort, - @Named("monitor-host") String monitorHost, - @Named("monitor-boot-timeout") Integer timeout) { - this.monitorHost = monitorHost; - this.monitorPort = monitorPort; - } - - @Override - public Integer get() { - return ServiceDescriptor.byName(System.getProperty("service-name")).port; - } - - private Publisher repeatDelay(Flowable error) { - return error.delay(1, TimeUnit.SECONDS); - } - - private String accept200(HttpResponse rsp) throws IOException { - if (rsp.getStatusLine().getStatusCode() != 200) { - throw new RuntimeException("Monitor responded unexpected status " - + rsp.getStatusLine().getStatusCode()); - } - return new String(rsp.getEntity().getContent().readAllBytes()); - } -} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantModule.java index cc5c3fe6..dcc8d90d 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantModule.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/assistant/EdgeAssistantModule.java @@ -2,6 +2,7 @@ package nu.marginalia.wmsa.edge.assistant; import com.google.inject.AbstractModule; import nu.marginalia.util.language.conf.LanguageModels; +import nu.marginalia.wmsa.configuration.WmsaHome; import java.nio.file.Path; @@ -9,14 +10,8 @@ import static com.google.inject.name.Names.named; public class EdgeAssistantModule extends AbstractModule { public void configure() { - bind(Path.class).annotatedWith(named("suggestions-file")).toInstance(Path.of("/var/lib/wmsa/suggestions.txt")); - bind(LanguageModels.class).toInstance(new LanguageModels( - Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"), - Path.of("/var/lib/wmsa/model/tfreq-new-algo3.bin"), - Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"), - Path.of("/var/lib/wmsa/model/English.RDR"), - Path.of("/var/lib/wmsa/model/English.DICT"), - Path.of("/var/lib/wmsa/model/opennlp-tok.bin") - )); + bind(Path.class).annotatedWith(named("suggestions-file")).toInstance(WmsaHome.getHomePath().resolve("suggestions.txt")); + + bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels()); } } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConverterModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConverterModule.java index 6f03632f..4bf6eaea 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConverterModule.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/ConverterModule.java @@ -5,6 +5,7 @@ import com.google.inject.AbstractModule; import com.google.inject.name.Names; import marcono1234.gson.recordadapter.RecordTypeAdapterFactory; import nu.marginalia.util.language.conf.LanguageModels; +import nu.marginalia.wmsa.configuration.WmsaHome; import nu.marginalia.wmsa.edge.model.EdgeCrawlPlan; import nu.marginalia.wmsa.edge.model.EdgeDomain; import nu.marginalia.wmsa.edge.model.EdgeUrl; @@ -30,14 +31,7 @@ public class ConverterModule extends AbstractModule { bind(Integer.class).annotatedWith(Names.named("max-title-length")).toInstance(128); bind(Integer.class).annotatedWith(Names.named("max-summary-length")).toInstance(255); - bind(LanguageModels.class).toInstance(new LanguageModels( - Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"), - Path.of("/var/lib/wmsa/model/tfreq-new-algo3.bin"), - Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"), - Path.of("/var/lib/wmsa/model/English.RDR"), - Path.of("/var/lib/wmsa/model/English.DICT"), - Path.of("/var/lib/wmsa/model/opennlp-tok.bin") - )); + bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels()); } private Gson createGson() { diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/DocumentProcessor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/DocumentProcessor.java index ce6393f2..b205cdea 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/DocumentProcessor.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/DocumentProcessor.java @@ -185,26 +185,25 @@ public class DocumentProcessor { } private void getLinks(EdgeUrl baseUrl, ProcessedDocumentDetails ret, Document doc, EdgePageWordSet words) { - var links = doc.getElementsByTag("a"); - var frames = doc.getElementsByTag("frame"); - var feeds = doc.select("link[rel=alternate]"); - LinkProcessor lp = new LinkProcessor(ret, baseUrl); + final LinkProcessor lp = new LinkProcessor(ret, baseUrl); - for (var atag : links) { + baseUrl = linkParser.getBaseLink(doc, baseUrl); + + for (var atag : doc.getElementsByTag("a")) { linkParser.parseLink(baseUrl, atag).ifPresent(lp::accept); } - for (var frame : frames) { + for (var frame : doc.getElementsByTag("frame")) { linkParser.parseFrame(baseUrl, frame).ifPresent(lp::accept); } - for (var link : feeds) { + for (var link : doc.select("link[rel=alternate]")) { feedExtractor - .getFeedFromAlternateTag(baseUrl, link) + .getFeedFromAlternateTag(baseUrl, link) .ifPresent(lp::acceptFeed); } - Set linkTerms = new HashSet<>(); + final Set linkTerms = new HashSet<>(); for (var domain : lp.getForeignDomains()) { linkTerms.add("links:"+domain.toString().toLowerCase()); diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/LinkParser.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/LinkParser.java index aedaf0f7..378182f2 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/LinkParser.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/converting/processor/logic/LinkParser.java @@ -1,9 +1,12 @@ package nu.marginalia.wmsa.edge.converting.processor.logic; import com.google.common.base.CharMatcher; +import com.google.common.base.Strings; import lombok.SneakyThrows; import nu.marginalia.wmsa.edge.model.EdgeUrl; import org.jetbrains.annotations.Contract; +import org.jetbrains.annotations.Nullable; +import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -26,11 +29,11 @@ public class LinkParser { ".gz", ".asc", ".md5", ".asf", ".mov", ".sig", ".pub", ".iso"); @Contract(pure=true) - public Optional parseLink(EdgeUrl baseUrl, Element l) { + public Optional parseLink(EdgeUrl relativeBaseUrl, Element l) { return Optional.of(l) .filter(this::shouldIndexLink) .map(this::getUrl) - .map(link -> resolveUrl(baseUrl, link)) + .map(link -> resolveUrl(relativeBaseUrl, link)) .flatMap(this::createURI) .map(URI::normalize) .map(this::renormalize) @@ -100,6 +103,8 @@ public class LinkParser { } private static final Pattern paramRegex = Pattern.compile("\\?.*$"); + private static final Pattern spaceRegex = Pattern.compile(" "); + @SneakyThrows private String resolveUrl(EdgeUrl baseUrl, String s) { s = paramRegex.matcher(s).replaceAll(""); @@ -111,10 +116,12 @@ public class LinkParser { // url looks like /my-page if (s.startsWith("/")) { - return baseUrl.sibling(s).toString(); + return baseUrl.withPath(s).toString(); } - return baseUrl.sibling(relativeNavigation(baseUrl) + s.replaceAll(" ", "%20")).toString(); + final String partFromNewLink = spaceRegex.matcher(s).replaceAll("%20"); + + return baseUrl.withPath(relativeNavigation(baseUrl) + partFromNewLink).toString(); } // for a relative url that looks like /foo or /foo/bar; return / or /foo @@ -162,4 +169,23 @@ public class LinkParser { } return true; } + + @Nullable + public EdgeUrl getBaseLink(Document parsed, EdgeUrl documentUrl) { + var baseTags = parsed.getElementsByTag("base"); + + try { + for (var tag : baseTags) { + String href = tag.attr("href"); + if (!Strings.isNullOrEmpty(href)) { + return new EdgeUrl(resolveUrl(documentUrl, href)); + } + } + } + catch (Exception ex) { + logger.warn("Failed to parse , falling back to document url"); + } + + return documentUrl; + } } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlerMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlerMain.java index d81e348b..7238dce0 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlerMain.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/CrawlerMain.java @@ -2,6 +2,8 @@ package nu.marginalia.wmsa.edge.crawling; import com.google.gson.Gson; import com.google.gson.GsonBuilder; +import nu.marginalia.wmsa.configuration.UserAgent; +import nu.marginalia.wmsa.configuration.WmsaHome; import nu.marginalia.wmsa.edge.crawling.model.CrawledDomain; import nu.marginalia.wmsa.edge.crawling.model.CrawlingSpecification; import nu.marginalia.wmsa.edge.crawling.retreival.CrawlerRetreiver; @@ -34,10 +36,12 @@ public class CrawlerMain implements AutoCloseable { private final Dispatcher dispatcher = new Dispatcher(new ThreadPoolExecutor(0, Integer.MAX_VALUE, 5, TimeUnit.SECONDS, new SynchronousQueue<>(), Util.threadFactory("OkHttp Dispatcher", true))); + private final UserAgent userAgent; public CrawlerMain(EdgeCrawlPlan plan) throws Exception { this.inputSpec = plan.getJobSpec(); this.numberOfThreads = 512; + this.userAgent = WmsaHome.getUserAgent(); workLog = new WorkLog(plan.crawl.getLogFile()); domainWriter = new CrawledDomainWriter(plan.crawl.getDir()); @@ -88,7 +92,7 @@ public class CrawlerMain implements AutoCloseable { if (workLog.isJobFinished(specification.id)) return null; - var fetcher = new HttpFetcher("search.marginalia.nu", dispatcher); + var fetcher = new HttpFetcher(userAgent.uaString(), dispatcher); try { var retreiver = new CrawlerRetreiver(fetcher, specification); diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/retreival/CrawlerRetreiver.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/retreival/CrawlerRetreiver.java index a7c08a24..2b27ed4d 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/retreival/CrawlerRetreiver.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/crawling/retreival/CrawlerRetreiver.java @@ -202,10 +202,11 @@ public class CrawlerRetreiver { return domain.equals(url.domain.toString().toLowerCase()); } - private void findLinks(EdgeUrl url, Document parsed) { + private void findLinks(EdgeUrl baseUrl, Document parsed) { + baseUrl = linkParser.getBaseLink(parsed, baseUrl); for (var link : parsed.getElementsByTag("a")) { - linkParser.parseLink(url, link) + linkParser.parseLink(baseUrl, link) .filter(this::isSameDomain) .filter(u -> !urlBlocklist.isUrlBlocked(u)) .filter(u -> !urlBlocklist.isForumLink(u)) @@ -213,7 +214,7 @@ public class CrawlerRetreiver { .ifPresent(queue::addLast); } for (var link : parsed.getElementsByTag("frame")) { - linkParser.parseFrame(url, link) + linkParser.parseFrame(baseUrl, link) .filter(this::isSameDomain) .filter(u -> !urlBlocklist.isUrlBlocked(u)) .filter(u -> !urlBlocklist.isForumLink(u)) @@ -221,7 +222,7 @@ public class CrawlerRetreiver { .ifPresent(queue::addLast); } for (var link : parsed.getElementsByTag("iframe")) { - linkParser.parseFrame(url, link) + linkParser.parseFrame(baseUrl, link) .filter(this::isSameDomain) .filter(u -> !urlBlocklist.isUrlBlocked(u)) .filter(u -> !urlBlocklist.isForumLink(u)) @@ -230,10 +231,11 @@ public class CrawlerRetreiver { } } - private Optional findCanonicalUrl(EdgeUrl url, Document parsed) { + private Optional findCanonicalUrl(EdgeUrl baseUrl, Document parsed) { + baseUrl = baseUrl.withPath("/"); for (var link : parsed.select("link[rel=canonical]")) { - return linkParser.parseLink(url, link); + return linkParser.parseLink(baseUrl, link); } return Optional.empty(); diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeTablesModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeTablesModule.java index bc9c2f44..4650b15b 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeTablesModule.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/index/EdgeTablesModule.java @@ -2,17 +2,18 @@ package nu.marginalia.wmsa.edge.index; import com.google.inject.AbstractModule; import com.google.inject.name.Names; +import nu.marginalia.wmsa.configuration.WmsaHome; import java.nio.file.Path; public class EdgeTablesModule extends AbstractModule { public void configure() { - bind(Path.class).annotatedWith(Names.named("partition-root-slow")).toInstance(Path.of("/var/lib/wmsa/index/write")); - bind(Path.class).annotatedWith(Names.named("partition-root-slow-tmp")).toInstance(Path.of("/backup/work/index-tmp/")); + bind(Path.class).annotatedWith(Names.named("partition-root-slow")).toInstance(WmsaHome.getDisk("index-write")); + bind(Path.class).annotatedWith(Names.named("partition-root-fast")).toInstance(WmsaHome.getDisk("index-read")); - bind(Path.class).annotatedWith(Names.named("partition-root-fast")).toInstance(Path.of("/var/lib/wmsa/index/read")); - bind(Path.class).annotatedWith(Names.named("tmp-file-dir")).toInstance(Path.of("/var/lib/wmsa/index/read")); + bind(Path.class).annotatedWith(Names.named("partition-root-slow-tmp")).toInstance(WmsaHome.getDisk("tmp-slow")); + bind(Path.class).annotatedWith(Names.named("tmp-file-dir")).toInstance(WmsaHome.getDisk("tmp-fast")); bind(String.class).annotatedWith(Names.named("edge-writer-page-index-file")).toInstance("page-index.dat"); bind(String.class).annotatedWith(Names.named("edge-writer-dictionary-file")).toInstance("dictionary.dat"); diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeDomain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeDomain.java index cb778947..53740c95 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeDomain.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeDomain.java @@ -21,6 +21,7 @@ public class EdgeDomain implements WideHashable { @SneakyThrows public EdgeDomain(String host) { + Objects.requireNonNull(host, "domain name must not be null"); var dot = host.lastIndexOf('.'); diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeUrl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeUrl.java index 39bc475b..e82d4b7c 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeUrl.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/EdgeUrl.java @@ -79,11 +79,6 @@ public class EdgeUrl implements WideHashable { this.port = port(URI.getPort(), proto); } - public EdgeUrl sibling(String newPath) { - return new EdgeUrl(proto, domain, port, newPath); - } - - private static Integer port(Integer port, String protocol) { if (null == port || port < 1) { return null; @@ -120,5 +115,7 @@ public class EdgeUrl implements WideHashable { return (int) path.chars().filter(c -> c=='/').count(); } - + public EdgeUrl withPath(String s) { + return new EdgeUrl(proto, domain, port, s); + } } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchModule.java index 9e1df8d5..9db18272 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchModule.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchModule.java @@ -2,21 +2,14 @@ package nu.marginalia.wmsa.edge.search; import com.google.inject.AbstractModule; import nu.marginalia.util.language.conf.LanguageModels; - -import java.nio.file.Path; +import nu.marginalia.wmsa.configuration.WebsiteUrl; +import nu.marginalia.wmsa.configuration.WmsaHome; public class EdgeSearchModule extends AbstractModule { public void configure() { - - bind(LanguageModels.class).toInstance(new LanguageModels( - Path.of("/var/lib/wmsa/model/ngrams-generous-emstr.bin"), - Path.of("/var/lib/wmsa/model/tfreq-new-algo3.bin"), - Path.of("/var/lib/wmsa/model/opennlp-sentence.bin"), - Path.of("/var/lib/wmsa/model/English.RDR"), - Path.of("/var/lib/wmsa/model/English.DICT"), - Path.of("/var/lib/wmsa/model/opennlp-tok.bin") - )); + bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels()); + bind(WebsiteUrl.class).toInstance(new WebsiteUrl(System.getProperty("website-url", "https://search.marginalia.nu/"))); } } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java index 329322a2..fa2d06e0 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java @@ -8,6 +8,7 @@ import com.google.inject.name.Named; import lombok.SneakyThrows; import nu.marginalia.wmsa.api.model.ApiSearchResult; import nu.marginalia.wmsa.api.model.ApiSearchResults; +import nu.marginalia.wmsa.configuration.WebsiteUrl; import nu.marginalia.wmsa.configuration.server.Context; import nu.marginalia.wmsa.configuration.server.Initialization; import nu.marginalia.wmsa.configuration.server.MetricsServer; @@ -34,7 +35,7 @@ public class EdgeSearchService extends Service { private final EdgeIndexClient indexClient; private final EdgeSearchOperator searchOperator; private final CommandEvaluator searchCommandEvaulator; - + private final WebsiteUrl websiteUrl; private static final Logger logger = LoggerFactory.getLogger(EdgeSearchService.class); @SneakyThrows @@ -45,13 +46,14 @@ public class EdgeSearchService extends Service { Initialization initialization, MetricsServer metricsServer, EdgeSearchOperator searchOperator, - CommandEvaluator searchCommandEvaulator - ) { + CommandEvaluator searchCommandEvaulator, + WebsiteUrl websiteUrl) { super(ip, port, initialization, metricsServer); this.indexClient = indexClient; this.searchOperator = searchOperator; this.searchCommandEvaulator = searchCommandEvaulator; + this.websiteUrl = websiteUrl; Spark.staticFiles.expireTime(600); @@ -79,7 +81,7 @@ public class EdgeSearchService extends Service { final String query = URLEncoder.encode(String.format("%s site:%s", queryRaw, site), StandardCharsets.UTF_8); final String profile = request.queryParamOrDefault("profile", "yolo"); - response.redirect("https://search.marginalia.nu/search?query="+query+"&profile="+profile); + response.redirect(websiteUrl.withPath("search?query="+query+"&profile="+profile)); return null; } @@ -141,7 +143,7 @@ public class EdgeSearchService extends Service { final String queryParam = request.queryParams("query"); if (null == queryParam || queryParam.isBlank()) { - response.redirect("https://search.marginalia.nu/"); + response.redirect(websiteUrl.url()); return null; } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreModule.java index 2de9e931..30bac9d3 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreModule.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/resource_store/ResourceStoreModule.java @@ -7,7 +7,6 @@ import java.nio.file.Path; public class ResourceStoreModule extends AbstractModule { public void configure() { - bind(String.class).annotatedWith(Names.named("external-url")).toInstance("https://reddit.marginalia.nu/"); bind(Path.class).annotatedWith(Names.named("data-path")).toInstance(Path.of("/var/lib/wmsa/archive.fast/resources")); } diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawling/LinkParserTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawling/LinkParserTest.java index 80c62153..d4a7e428 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawling/LinkParserTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/edge/crawling/LinkParserTest.java @@ -11,9 +11,8 @@ import static org.junit.jupiter.api.Assertions.*; class LinkParserTest { - private String parseLink(String href, String base) throws URISyntaxException { - var url = new EdgeUrl("http://www.marginalia.nu/" + base); - var domain = url.domain; + private String parseLink(String href, String relBase) throws URISyntaxException { + var url = new EdgeUrl("http://www.marginalia.nu/" + relBase); var parser = new LinkParser(); var stuff = Jsoup.parseBodyFragment("test"); var lnk = parser.parseLink( @@ -43,6 +42,7 @@ class LinkParserTest { void testAnchor() throws URISyntaxException { assertNull(parseLink("#test", "/")); } + @Test void testRelative() throws URISyntaxException { assertEquals("http://www.marginalia.nu/test", parseLink("../test", "/")); @@ -51,4 +51,32 @@ class LinkParserTest { assertEquals("http://www.marginalia.nu/test", parseLink("../test", "/foo/index.html")); assertEquals("http://www.marginalia.nu/test", parseLink("/test", "/foo/index.html")); } + + private EdgeUrl getBaseUrl(String href, EdgeUrl documentUrl) { + LinkParser lp = new LinkParser(); + + return lp.getBaseLink(Jsoup.parse(""), documentUrl); + } + + @Test + public void getBaseUrlTest() throws URISyntaxException { + assertEquals(new EdgeUrl("https://www.marginalia.nu/base"), + getBaseUrl("/base", new EdgeUrl("https://www.marginalia.nu/test/foo.bar"))); + + assertEquals(new EdgeUrl("https://memex.marginalia.nu/base"), + getBaseUrl("https://memex.marginalia.nu/base", new EdgeUrl("https://www.marginalia.nu/test/foo.bar"))); + + assertEquals(new EdgeUrl("https://www.marginalia.nu/test/base"), + getBaseUrl("base", new EdgeUrl("https://www.marginalia.nu/test/foo.bar"))); + } + + @Test + public void testParseBadBaseLink() throws URISyntaxException { + LinkParser lp = new LinkParser(); + var url = new EdgeUrl("https://memex.marginalia.nu/"); + + assertEquals(url, lp.getBaseLink(Jsoup.parse(""), url)); + assertEquals(url, lp.getBaseLink(Jsoup.parse(""), url)); + assertEquals(url, lp.getBaseLink(Jsoup.parse(""), url)); + } } \ No newline at end of file From 8df48d1c6d112c5542238b81e32c69347e00c792 Mon Sep 17 00:00:00 2001 From: Viktor Lofgren Date: Thu, 16 Jun 2022 14:15:54 +0200 Subject: [PATCH 2/3] Fix front page typo (#29) Co-authored-by: vlofgren Reviewed-on: https://git.marginalia.nu/marginalia/marginalia.nu/pulls/29 --- marginalia_nu/src/main/resources/static/edge/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/marginalia_nu/src/main/resources/static/edge/index.html b/marginalia_nu/src/main/resources/static/edge/index.html index 166e67b8..47d6e314 100644 --- a/marginalia_nu/src/main/resources/static/edge/index.html +++ b/marginalia_nu/src/main/resources/static/edge/index.html @@ -88,7 +88,7 @@ theology, the occult, knitting, - compter science, + computer science, or art.

From 93c274f1d4e17a26969b05df50807235a2b27c5e Mon Sep 17 00:00:00 2001 From: vlofgren Date: Fri, 8 Jul 2022 12:34:05 +0200 Subject: [PATCH 3/3] E2E-test for memex --- .../nu/marginalia/wmsa/edge/E2ETestBase.java | 16 ++ .../nu/marginalia/wmsa/edge/MemexE2ETest.java | 95 ++++++++++ marginalia_nu/src/e2e/resources/init.sh | 1 + marginalia_nu/src/e2e/resources/memex.sh | 39 +++++ .../src/e2e/resources/memex/index.gmi | 6 + .../src/e2e/resources/memex/log/a.gmi | 7 + .../src/e2e/resources/memex/log/b.gmi | 6 + .../src/e2e/resources/memex/log/index.gmi | 7 + .../src/e2e/resources/nginx/memex.conf | 27 +++ .../nu/marginalia/gemini/GeminiService.java | 163 +---------------- .../marginalia/gemini/GeminiServiceDummy.java | 10 ++ .../marginalia/gemini/GeminiServiceImpl.java | 164 ++++++++++++++++++ .../gemini/plugins/BareStaticPagePlugin.java | 7 +- .../nu/marginalia/wmsa/auth/AuthService.java | 23 ++- .../wmsa/configuration/ServiceDescriptor.java | 5 +- .../configuration/command/StartCommand.java | 1 - .../wmsa/configuration/server/Service.java | 48 +++-- .../java/nu/marginalia/wmsa/memex/Memex.java | 4 +- .../wmsa/memex/MemexConfigurationModule.java | 44 ++++- .../nu/marginalia/wmsa/memex/MemexMain.java | 2 +- .../marginalia/wmsa/memex/MemexService.java | 17 +- .../wmsa/memex/client/MemexApiClient.java | 2 +- .../memex/system/MemexSourceFileSystem.java | 6 +- .../wmsa/memex/system/git/MemexGitRepo.java | 15 ++ .../memex/system/git/MemexGitRepoDummy.java | 36 ++++ .../MemexGitRepoImpl.java} | 16 +- .../wmsa/memex/change/GemtextChangeTest.java | 14 +- .../memex/change/GemtextTaskUpdateTest.java | 14 +- .../GemtextTombstoneUpdateCaclulatorTest.java | 14 +- 29 files changed, 579 insertions(+), 230 deletions(-) create mode 100644 marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/MemexE2ETest.java create mode 100644 marginalia_nu/src/e2e/resources/memex.sh create mode 100644 marginalia_nu/src/e2e/resources/memex/index.gmi create mode 100644 marginalia_nu/src/e2e/resources/memex/log/a.gmi create mode 100644 marginalia_nu/src/e2e/resources/memex/log/b.gmi create mode 100644 marginalia_nu/src/e2e/resources/memex/log/index.gmi create mode 100644 marginalia_nu/src/e2e/resources/nginx/memex.conf create mode 100644 marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceDummy.java create mode 100644 marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceImpl.java create mode 100644 marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepo.java create mode 100644 marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepoDummy.java rename marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/{MemexGitRepo.java => git/MemexGitRepoImpl.java} (90%) diff --git a/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java index 0c329a79..da40a7fc 100644 --- a/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java +++ b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java @@ -43,6 +43,22 @@ public abstract class E2ETestBase { .withReadTimeout(Duration.ofSeconds(15))) ; } + public static GenericContainer forService(ServiceDescriptor service, GenericContainer mariaDB, String setupScript) { + return new GenericContainer<>("openjdk:17-alpine") + .dependsOn(mariaDB) + .withCopyFileToContainer(jarFile(), "/WMSA.jar") + .withCopyFileToContainer(MountableFile.forClasspathResource(setupScript), "/" + setupScript) + .withExposedPorts(service.port) + .withFileSystemBind(modelsPath(), "/wmsa/model", BindMode.READ_ONLY) + .withNetwork(network) + .withNetworkAliases(service.name) + .withLogConsumer(new Slf4jLogConsumer(LoggerFactory.getLogger(service.name))) + .withCommand("sh", setupScript, service.name) + .waitingFor(Wait.forHttp("/internal/ping") + .forPort(service.port) + .withReadTimeout(Duration.ofSeconds(15))) + ; + } public static MountableFile jarFile() { Path cwd = Path.of(System.getProperty("user.dir")); diff --git a/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/MemexE2ETest.java b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/MemexE2ETest.java new file mode 100644 index 00000000..7410b3b3 --- /dev/null +++ b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/MemexE2ETest.java @@ -0,0 +1,95 @@ +package nu.marginalia.wmsa.edge; + + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import okhttp3.OkHttpClient; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.mariadb.jdbc.Driver; +import org.openqa.selenium.OutputType; +import org.openqa.selenium.chrome.ChromeOptions; +import org.slf4j.LoggerFactory; +import org.testcontainers.containers.*; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; +import org.testcontainers.utility.MountableFile; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDateTime; +import java.util.concurrent.TimeUnit; + +import static nu.marginalia.wmsa.configuration.ServiceDescriptor.AUTH; +import static nu.marginalia.wmsa.configuration.ServiceDescriptor.MEMEX; + +@Tag("e2e") +@Testcontainers +public class MemexE2ETest extends E2ETestBase { + @Container + public MariaDBContainer mariaDB = getMariaDBContainer(); + + @Container + public GenericContainer auth = forService(AUTH, mariaDB); + + @Container + public GenericContainer memexContainer = forService(MEMEX, mariaDB, "memex.sh") + .withClasspathResourceMapping("/memex", "/memex", BindMode.READ_ONLY); + + @Container + public NginxContainer proxyNginx = new NginxContainer<>("nginx:stable") + .dependsOn(auth) + .dependsOn(memexContainer) + .withLogConsumer(new Slf4jLogConsumer(LoggerFactory.getLogger("nginx"))) + .withCopyFileToContainer(MountableFile.forClasspathResource("nginx/memex.conf"), "/etc/nginx/conf.d/default.conf") + .withNetwork(network) + .withNetworkAliases("proxyNginx"); + + @Container + public BrowserWebDriverContainer chrome = new BrowserWebDriverContainer<>() + .withNetwork(network) + .withCapabilities(new ChromeOptions()); + + private Gson gson = new GsonBuilder().create(); + private OkHttpClient httpClient = new OkHttpClient.Builder() + .connectTimeout(100, TimeUnit.MILLISECONDS) + .readTimeout(6000, TimeUnit.SECONDS) + .retryOnConnectionFailure(true) + .followRedirects(true) + .build(); + + @Test + public void run() throws IOException, InterruptedException { + Thread.sleep(10_000); + new Driver(); + + var driver = chrome.getWebDriver(); + + driver.get("http://proxyNginx/"); + Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("frontpage")); + + driver.get("http://proxyNginx/log/"); + Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("log")); + + driver.get("http://proxyNginx/log/a.gmi"); + Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("log-a.gmi")); + + driver.get("http://proxyNginx/log/b.gmi"); + Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("log-b.gmi")); + } + + private static Path screenshotFilename(String operation) throws IOException { + var path = Path.of(System.getProperty("user.dir")).resolve("build/test/e2e/"); + Files.createDirectories(path); + + String name = String.format("test-%s-%s.png", operation, LocalDateTime.now()); + path = path.resolve(name); + + System.out.println("Screenshot in " + path); + return path; + } + + +} diff --git a/marginalia_nu/src/e2e/resources/init.sh b/marginalia_nu/src/e2e/resources/init.sh index 5409f787..2f9fa103 100644 --- a/marginalia_nu/src/e2e/resources/init.sh +++ b/marginalia_nu/src/e2e/resources/init.sh @@ -69,4 +69,5 @@ memex memex dating dating EOF +echo "*** Starting $1" WMSA_HOME=${HOME} java -Dsmall-ram=TRUE -Dservice-host=0.0.0.0 -jar /WMSA.jar start $1 \ No newline at end of file diff --git a/marginalia_nu/src/e2e/resources/memex.sh b/marginalia_nu/src/e2e/resources/memex.sh new file mode 100644 index 00000000..6ce801b5 --- /dev/null +++ b/marginalia_nu/src/e2e/resources/memex.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +HOME=/wmsa + +mkdir -p ${HOME}/conf + +cat > ${HOME}/conf/db.properties < ${HOME}/conf/hosts < serve(connection)); - } - } - } - catch (IOException ex) { - logger.error("IO Exception in gemini server", ex); - } - } - - private void serve(SSLSocket socket) { - final GeminiConnection connection; - try { - connection = new GeminiConnection(socket); - } - catch (IOException ex) { - logger.error("Failed to create connection object", ex); - return; - } - - try { - handleRequest(connection); - } - catch (GeminiUserException ex) { - errorResponse(connection, ex.getMessage()); - } - catch (SSLException ex) { - logger.error(connection.getAddress() + " SSL error"); - connection.close(); - } - catch (Exception ex) { - errorResponse(connection, "Error"); - logger.error(connection.getAddress(), ex); - } - finally { - connection.close(); - } - } - - private void errorResponse(GeminiConnection connection, String message) { - if (connection.isConnected()) { - try { - logger.error("=> " + connection.getAddress(), message); - connection.writeStatusLine(GeminiStatusCode.ERROR_PERMANENT, message); - } - catch (IOException ex) { - logger.error("Exception while sending error", ex); - } - } - } - - private void handleRequest(GeminiConnection connection) throws Exception { - - final String address = connection.getAddress(); - logger.info("Connect: " + address); - - final Optional maybeUri = connection.readUrl(); - if (maybeUri.isEmpty()) { - logger.info("Done: {}", address); - return; - } - - final URI uri = maybeUri.get(); - logger.info("Request {}", uri); - - if (!uri.getScheme().equals("gemini")) { - throw new GeminiUserException("Unsupported protocol"); - } - - servePage(connection, uri); - logger.info("Done: {}", address); - } - - private void servePage(GeminiConnection connection, URI url) throws IOException { - String path = url.getPath(); - - for (Plugin p : plugins) { - if (p.serve(url, connection)) { - return; - } - } - - logger.error("FileNotFound {}", path); - connection.writeStatusLine(GeminiStatusCode.ERROR_TEMPORARY, "No such file"); - } - +public interface GeminiService { + String DEFAULT_FILENAME = "index.gmi"; + void run(); } diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceDummy.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceDummy.java new file mode 100644 index 00000000..81586f31 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceDummy.java @@ -0,0 +1,10 @@ +package nu.marginalia.gemini; + +import com.google.inject.Singleton; + +@Singleton +public class GeminiServiceDummy implements GeminiService { + @Override + public void run() { + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceImpl.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceImpl.java new file mode 100644 index 00000000..0381be48 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/GeminiServiceImpl.java @@ -0,0 +1,164 @@ +package nu.marginalia.gemini; + +import com.google.inject.Inject; +import com.google.inject.Singleton; +import com.google.inject.name.Named; +import nu.marginalia.gemini.io.GeminiConnection; +import nu.marginalia.gemini.io.GeminiSSLSetUp; +import nu.marginalia.gemini.io.GeminiStatusCode; +import nu.marginalia.gemini.io.GeminiUserException; +import nu.marginalia.gemini.plugins.BareStaticPagePlugin; +import nu.marginalia.gemini.plugins.Plugin; +import nu.marginalia.gemini.plugins.SearchPlugin; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.net.ssl.SSLException; +import javax.net.ssl.SSLServerSocket; +import javax.net.ssl.SSLServerSocketFactory; +import javax.net.ssl.SSLSocket; +import java.io.IOException; +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Optional; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; + +@Singleton +public class GeminiServiceImpl implements GeminiService { + + public final Path serverRoot; + + private final Logger logger = LoggerFactory.getLogger(getClass().getSimpleName()); + private final Executor pool = Executors.newFixedThreadPool(32); + private final SSLServerSocket serverSocket; + + private final Plugin[] plugins; + private final BadBotList badBotList = BadBotList.INSTANCE; + + @Inject + public GeminiServiceImpl(@Named("gemini-server-root") Path serverRoot, + @Named("gemini-server-port") Integer port, + GeminiSSLSetUp sslSetUp, + BareStaticPagePlugin pagePlugin, + SearchPlugin searchPlugin) throws Exception { + this.serverRoot = serverRoot; + logger.info("Setting up crypto"); + final SSLServerSocketFactory socketFactory = sslSetUp.getServerSocketFactory(); + + serverSocket = (SSLServerSocket) socketFactory.createServerSocket(port /* 1965 */); + serverSocket.setEnabledCipherSuites(socketFactory.getSupportedCipherSuites()); + serverSocket.setEnabledProtocols(new String[] {"TLSv1.3", "TLSv1.2"}); + + logger.info("Verifying setup"); + if (!Files.exists(this.serverRoot)) { + logger.error("Could not find SERVER_ROOT {}", this.serverRoot); + System.exit(255); + } + + plugins = new Plugin[] { + pagePlugin, + searchPlugin + }; + } + + @Override + public void run() { + logger.info("Awaiting connections"); + + try { + for (;;) { + SSLSocket connection = (SSLSocket) serverSocket.accept(); + connection.setSoTimeout(10_000); + + if (!badBotList.isAllowed(connection.getInetAddress())) { + connection.close(); + } else { + pool.execute(() -> serve(connection)); + } + } + } + catch (IOException ex) { + logger.error("IO Exception in gemini server", ex); + } + } + + private void serve(SSLSocket socket) { + final GeminiConnection connection; + try { + connection = new GeminiConnection(socket); + } + catch (IOException ex) { + logger.error("Failed to create connection object", ex); + return; + } + + try { + handleRequest(connection); + } + catch (GeminiUserException ex) { + errorResponse(connection, ex.getMessage()); + } + catch (SSLException ex) { + logger.error(connection.getAddress() + " SSL error"); + connection.close(); + } + catch (Exception ex) { + errorResponse(connection, "Error"); + logger.error(connection.getAddress(), ex); + } + finally { + connection.close(); + } + } + + private void errorResponse(GeminiConnection connection, String message) { + if (connection.isConnected()) { + try { + logger.error("=> " + connection.getAddress(), message); + connection.writeStatusLine(GeminiStatusCode.ERROR_PERMANENT, message); + } + catch (IOException ex) { + logger.error("Exception while sending error", ex); + } + } + } + + private void handleRequest(GeminiConnection connection) throws Exception { + + final String address = connection.getAddress(); + logger.info("Connect: " + address); + + final Optional maybeUri = connection.readUrl(); + if (maybeUri.isEmpty()) { + logger.info("Done: {}", address); + return; + } + + final URI uri = maybeUri.get(); + logger.info("Request {}", uri); + + if (!uri.getScheme().equals("gemini")) { + throw new GeminiUserException("Unsupported protocol"); + } + + servePage(connection, uri); + logger.info("Done: {}", address); + } + + private void servePage(GeminiConnection connection, URI url) throws IOException { + String path = url.getPath(); + + for (Plugin p : plugins) { + if (p.serve(url, connection)) { + return; + } + } + + logger.error("FileNotFound {}", path); + connection.writeStatusLine(GeminiStatusCode.ERROR_TEMPORARY, "No such file"); + } + + +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/BareStaticPagePlugin.java b/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/BareStaticPagePlugin.java index fbfb502b..46bdfb7d 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/BareStaticPagePlugin.java +++ b/marginalia_nu/src/main/java/nu/marginalia/gemini/plugins/BareStaticPagePlugin.java @@ -2,6 +2,7 @@ package nu.marginalia.gemini.plugins; import com.google.inject.Inject; import com.google.inject.name.Named; +import nu.marginalia.gemini.GeminiService; import nu.marginalia.gemini.io.GeminiConnection; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -11,8 +12,6 @@ import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; -import static nu.marginalia.gemini.GeminiService.DEFAULT_FILENAME; - public class BareStaticPagePlugin implements Plugin { private final Logger logger = LoggerFactory.getLogger(getClass()); @@ -43,8 +42,8 @@ public class BareStaticPagePlugin implements Plugin { private Path getServerPath(String requestPath) { final Path serverPath = Path.of(geminiServerRoot + requestPath); - if (Files.isDirectory(serverPath) && Files.isRegularFile(serverPath.resolve(DEFAULT_FILENAME))) { - return serverPath.resolve(DEFAULT_FILENAME); + if (Files.isDirectory(serverPath) && Files.isRegularFile(serverPath.resolve(GeminiService.DEFAULT_FILENAME))) { + return serverPath.resolve(GeminiService.DEFAULT_FILENAME); } return serverPath; diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthService.java index 60c22b9b..4c93db95 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthService.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/auth/AuthService.java @@ -1,6 +1,5 @@ package nu.marginalia.wmsa.auth; -import com.github.jknack.handlebars.internal.Files; import com.google.inject.Inject; import com.google.inject.name.Named; import nu.marginalia.wmsa.auth.model.LoginFormModel; @@ -14,11 +13,12 @@ import spark.Request; import spark.Response; import spark.Spark; -import java.io.FileReader; import java.io.IOException; +import java.nio.file.Files; import java.nio.file.Path; import java.util.Objects; import java.util.Optional; +import java.util.UUID; import static spark.Spark.*; @@ -40,11 +40,8 @@ public class AuthService extends Service { super(ip, port, initialization, metricsServer); - try (var is = new FileReader(topSecretPasswordFile.toFile())) { - password = Files.read(is); - } catch (IOException e) { - logger.error("Could not read password from file " + topSecretPasswordFile, e); - } + password = initPassword(topSecretPasswordFile); + loginFormRenderer = rendererFactory.renderer("auth/login"); Spark.path("public/api", () -> { @@ -60,6 +57,18 @@ public class AuthService extends Service { }); } + private String initPassword(Path topSecretPasswordFile) { + if (Files.exists(topSecretPasswordFile)) { + try { + return Files.readString(topSecretPasswordFile); + } catch (IOException e) { + logger.error("Could not read password from file " + topSecretPasswordFile, e); + } + } + logger.error("Setting random password"); + return UUID.randomUUID().toString(); + } + private Object loginForm(Request request, Response response) { String redir = Objects.requireNonNull(request.queryParams("redirect")); String service = Objects.requireNonNull(request.queryParams("service")); diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/ServiceDescriptor.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/ServiceDescriptor.java index e0aff247..c0f7dde2 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/ServiceDescriptor.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/ServiceDescriptor.java @@ -1,7 +1,7 @@ package nu.marginalia.wmsa.configuration; -import nu.marginalia.wmsa.auth.AuthMain; import nu.marginalia.wmsa.api.ApiMain; +import nu.marginalia.wmsa.auth.AuthMain; import nu.marginalia.wmsa.configuration.command.Command; import nu.marginalia.wmsa.configuration.command.ListCommand; import nu.marginalia.wmsa.configuration.command.StartCommand; @@ -35,7 +35,7 @@ public enum ServiceDescriptor { EDGE_SEARCH("edge-search", 5023, EdgeSearchMain.class), EDGE_ASSISTANT("edge-assistant", 5025, EdgeAssistantMain.class), - EDGE_MEMEX("memex", 5030, MemexMain.class), + MEMEX("memex", 5030, MemexMain.class), ENCYCLOPEDIA("encyclopedia", 5040, EncyclopediaMain.class), @@ -79,7 +79,6 @@ public enum ServiceDescriptor { } public static void main(String... args) { - MainMapLookup.setMainArguments(args); Map functions = Stream.of(new ListCommand(), new StartCommand(), diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/StartCommand.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/StartCommand.java index 55d46813..cb63d749 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/StartCommand.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/command/StartCommand.java @@ -16,7 +16,6 @@ public class StartCommand extends Command { System.err.println("Usage: start service-descriptor"); System.exit(255); } - var mainMethod = getKind(args[1]).mainClass.getMethod("main", String[].class); String[] args2 = Arrays.copyOfRange(args, 2, args.length); mainMethod.invoke(null, (Object) args2); diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Service.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Service.java index c9f618da..9674611f 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Service.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/configuration/server/Service.java @@ -37,7 +37,7 @@ public class Service { private static volatile boolean initialized = false; - public Service(String ip, int port, Initialization initialization, MetricsServer metricsServer) { + public Service(String ip, int port, Initialization initialization, MetricsServer metricsServer, Runnable configureStaticFiles) { this.initialization = initialization; serviceName = System.getProperty("service-name"); @@ -51,8 +51,7 @@ public class Service { logger.info("{} Listening to {}:{}", getClass().getSimpleName(), ip == null ? "" : ip, port); - Spark.staticFiles.expireTime(3600); - Spark.staticFiles.header("Cache-control", "public"); + configureStaticFiles.run(); Spark.before(this::filterPublicRequests); Spark.before(this::auditRequestIn); @@ -66,24 +65,35 @@ public class Service { } } + public Service(String ip, int port, Initialization initialization, MetricsServer metricsServer) { + this(ip, port, initialization, metricsServer, () -> { + // configureStaticFiles can't be an overridable method in Service because it may + // need to depend on parameters to the constructor, and super-constructors + // must run first + Spark.staticFiles.expireTime(3600); + Spark.staticFiles.header("Cache-control", "public"); + }); + } + private void filterPublicRequests(Request request, Response response) { - if (null != request.headers("X-Public")) { - - String context = Optional - .ofNullable(request.headers("X-Context")) - .orElseGet(request::ip); - - if (!request.pathInfo().startsWith("/public/")) { - logger.warn(httpMarker, "External connection to internal API: {} -> {} {}", context, request.requestMethod(), request.pathInfo()); - Spark.halt(HttpStatus.SC_FORBIDDEN); - } - - String url = request.pathInfo(); - if (request.queryString() != null) { - url = url + "?" + request.queryString(); - } - logger.info(httpMarker, "PUBLIC {}: {} {}", Context.fromRequest(request).getIpHash().orElse("?"), request.requestMethod(), url); + if (null == request.headers("X-Public")) { + return; } + + String context = Optional + .ofNullable(request.headers("X-Context")) + .orElseGet(request::ip); + + if (!request.pathInfo().startsWith("/public/")) { + logger.warn(httpMarker, "External connection to internal API: {} -> {} {}", context, request.requestMethod(), request.pathInfo()); + Spark.halt(HttpStatus.SC_FORBIDDEN); + } + + String url = request.pathInfo(); + if (request.queryString() != null) { + url = url + "?" + request.queryString(); + } + logger.info(httpMarker, "PUBLIC {}: {} {}", Context.fromRequest(request).getIpHash().orElse("?"), request.requestMethod(), url); } private Object isInitialized(Request request, Response response) { diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/Memex.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/Memex.java index 5b5ac2f7..febdc5af 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/Memex.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/Memex.java @@ -6,9 +6,9 @@ import com.google.inject.name.Named; import io.reactivex.rxjava3.schedulers.Schedulers; import nu.marginalia.gemini.GeminiService; import nu.marginalia.gemini.gmi.GemtextDatabase; +import nu.marginalia.gemini.gmi.GemtextDocument; import nu.marginalia.util.graphics.dithering.FloydSteinbergDither; import nu.marginalia.util.graphics.dithering.Palettes; -import nu.marginalia.gemini.gmi.GemtextDocument; import nu.marginalia.wmsa.memex.change.GemtextTombstoneUpdateCaclulator; import nu.marginalia.wmsa.memex.model.MemexImage; import nu.marginalia.wmsa.memex.model.MemexNode; @@ -16,7 +16,7 @@ import nu.marginalia.wmsa.memex.model.MemexNodeUrl; import nu.marginalia.wmsa.memex.renderer.MemexRendererers; import nu.marginalia.wmsa.memex.system.MemexFileSystemMonitor; import nu.marginalia.wmsa.memex.system.MemexFileWriter; -import nu.marginalia.wmsa.memex.system.MemexGitRepo; +import nu.marginalia.wmsa.memex.system.git.MemexGitRepo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexConfigurationModule.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexConfigurationModule.java index 676ebc05..2533a9d1 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexConfigurationModule.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexConfigurationModule.java @@ -5,23 +5,59 @@ import com.google.inject.Inject; import com.google.inject.Provider; import com.google.inject.name.Named; import com.google.inject.name.Names; +import lombok.SneakyThrows; +import nu.marginalia.gemini.GeminiService; +import nu.marginalia.gemini.GeminiServiceDummy; +import nu.marginalia.gemini.GeminiServiceImpl; import nu.marginalia.wmsa.memex.system.MemexFileWriter; +import nu.marginalia.wmsa.memex.system.git.MemexGitRepo; +import nu.marginalia.wmsa.memex.system.git.MemexGitRepoDummy; +import nu.marginalia.wmsa.memex.system.git.MemexGitRepoImpl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.nio.file.Path; public class MemexConfigurationModule extends AbstractModule { + private static final Logger logger = LoggerFactory.getLogger(MemexConfigurationModule.class); + + private static final String MEMEX_ROOT_PROPERTY = System.getProperty("memex-root", "/var/lib/wmsa/memex"); + private static final String MEMEX_HTML_PROPERTY = System.getProperty("memex-html-resources", "/var/lib/wmsa/memex-html"); + private static final String MEMEX_GMI_PROPERTY = System.getProperty("memex-gmi-resources", "/var/lib/wmsa/memex-gmi"); + + private static final boolean MEMEX_DISABLE_GIT = Boolean.getBoolean("memex-disable-git"); + private static final boolean MEMEX_DISABLE_GEMINI = Boolean.getBoolean("memex-disable-gemini"); + + @SneakyThrows + public MemexConfigurationModule() { + Thread.sleep(100); + } + public void configure() { - bind(Path.class).annotatedWith(Names.named("memex-root")).toInstance(Path.of("/var/lib/wmsa/memex")); - bind(Path.class).annotatedWith(Names.named("memex-html-resources")).toInstance(Path.of("/var/lib/wmsa/memex-html")); - bind(Path.class).annotatedWith(Names.named("memex-gmi-resources")).toInstance(Path.of("/var/lib/wmsa/memex-gmi")); + bind(Path.class).annotatedWith(Names.named("memex-root")).toInstance(Path.of(MEMEX_ROOT_PROPERTY)); + bind(Path.class).annotatedWith(Names.named("memex-html-resources")).toInstance(Path.of(MEMEX_HTML_PROPERTY)); + bind(Path.class).annotatedWith(Names.named("memex-gmi-resources")).toInstance(Path.of(MEMEX_GMI_PROPERTY)); + bind(String.class).annotatedWith(Names.named("tombestone-special-file")).toInstance("/special/tombstone.gmi"); bind(String.class).annotatedWith(Names.named("redirects-special-file")).toInstance("/special/redirect.gmi"); + switchImpl(MemexGitRepo.class, MEMEX_DISABLE_GIT, MemexGitRepoDummy.class, MemexGitRepoImpl.class); + switchImpl(GeminiService.class, MEMEX_DISABLE_GEMINI, GeminiServiceDummy.class, GeminiServiceImpl.class); + bind(MemexFileWriter.class).annotatedWith(Names.named("html")).toProvider(MemexHtmlWriterProvider.class); bind(MemexFileWriter.class).annotatedWith(Names.named("gmi")).toProvider(MemexGmiWriterProvider.class); } - + void switchImpl(Class impl, boolean param, Class ifEnabled, Class ifDisabled) { + final Class choice; + if (param) { + choice = ifEnabled; + } + else { + choice = ifDisabled; + } + bind(impl).to(choice).asEagerSingleton(); + } public static class MemexHtmlWriterProvider implements Provider { private final Path path; diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexMain.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexMain.java index e58848d2..f46ce4d1 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexMain.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexMain.java @@ -18,7 +18,7 @@ public class MemexMain extends MainClass { } public static void main(String... args) { - init(ServiceDescriptor.EDGE_MEMEX, args); + init(ServiceDescriptor.MEMEX, args); Injector injector = Guice.createInjector( new MemexConfigurationModule(), diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexService.java index 4d22f1af..16440960 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexService.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/MemexService.java @@ -3,6 +3,7 @@ package nu.marginalia.wmsa.memex; import com.google.inject.Inject; import com.google.inject.name.Named; import lombok.SneakyThrows; +import nu.marginalia.gemini.gmi.GemtextDocument; import nu.marginalia.gemini.gmi.renderer.GemtextRendererFactory; import nu.marginalia.wmsa.auth.client.AuthClient; import nu.marginalia.wmsa.configuration.server.Context; @@ -10,12 +11,11 @@ import nu.marginalia.wmsa.configuration.server.Initialization; import nu.marginalia.wmsa.configuration.server.MetricsServer; import nu.marginalia.wmsa.configuration.server.Service; import nu.marginalia.wmsa.memex.change.GemtextMutation; -import nu.marginalia.gemini.gmi.GemtextDocument; import nu.marginalia.wmsa.memex.change.update.GemtextDocumentUpdateCalculator; -import nu.marginalia.wmsa.memex.renderer.MemexHtmlRenderer; import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; import nu.marginalia.wmsa.memex.model.MemexNodeUrl; import nu.marginalia.wmsa.memex.model.render.*; +import nu.marginalia.wmsa.memex.renderer.MemexHtmlRenderer; import org.apache.http.HttpStatus; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,9 +49,18 @@ public class MemexService extends Service { MemexHtmlRenderer renderer, AuthClient authClient, Initialization initialization, - MetricsServer metricsServer) { + MetricsServer metricsServer, + @Named("memex-html-resources") Path memexHtmlDir + ) { - super(ip, port, initialization, metricsServer); + super(ip, port, initialization, metricsServer, () -> { + staticFiles.externalLocation(memexHtmlDir.toString()); + staticFiles.disableMimeTypeGuessing(); + staticFiles.registerMimeType("gmi", "text/html"); + staticFiles.registerMimeType("png", "text/html"); + staticFiles.expireTime(60); + staticFiles.header("Cache-control", "public,proxy-revalidate"); + }); this.updateCalculator = updateCalculator; this.memex = memex; diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/client/MemexApiClient.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/client/MemexApiClient.java index b98b34c7..b038637d 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/client/MemexApiClient.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/client/MemexApiClient.java @@ -8,7 +8,7 @@ import nu.marginalia.wmsa.configuration.ServiceDescriptor; public class MemexApiClient extends AbstractDynamicClient { @Inject public MemexApiClient() { - super(ServiceDescriptor.EDGE_MEMEX); + super(ServiceDescriptor.MEMEX); } } diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexSourceFileSystem.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexSourceFileSystem.java index c72e2383..9d165272 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexSourceFileSystem.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexSourceFileSystem.java @@ -4,11 +4,15 @@ import com.google.inject.Inject; import com.google.inject.Singleton; import com.google.inject.name.Named; import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import nu.marginalia.wmsa.memex.system.git.MemexGitRepo; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.nio.file.*; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.nio.file.StandardOpenOption; @Singleton public class MemexSourceFileSystem { diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepo.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepo.java new file mode 100644 index 00000000..d4e55491 --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepo.java @@ -0,0 +1,15 @@ +package nu.marginalia.wmsa.memex.system.git; + +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; + +public interface MemexGitRepo { + void pull(); + + void remove(MemexNodeUrl url); + + void add(MemexNodeUrl url); + + void update(MemexNodeUrl url); + + void rename(MemexNodeUrl src, MemexNodeUrl dst); +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepoDummy.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepoDummy.java new file mode 100644 index 00000000..4d5116ff --- /dev/null +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepoDummy.java @@ -0,0 +1,36 @@ +package nu.marginalia.wmsa.memex.system.git; + +import com.google.inject.Singleton; +import nu.marginalia.wmsa.memex.model.MemexNodeUrl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Singleton +public class MemexGitRepoDummy implements MemexGitRepo { + private static final Logger logger = LoggerFactory.getLogger(MemexGitRepoDummy.class); + + @Override + public void pull() { + logger.info("Would perform a pull here"); + } + + @Override + public void remove(MemexNodeUrl url) { + logger.info("Would perform a remove here"); + } + + @Override + public void add(MemexNodeUrl url) { + logger.info("Would perform an add here"); + } + + @Override + public void update(MemexNodeUrl url) { + logger.info("Would perform an update here"); + } + + @Override + public void rename(MemexNodeUrl src, MemexNodeUrl dst) { + logger.info("Would perform a rename here"); + } +} diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexGitRepo.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepoImpl.java similarity index 90% rename from marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexGitRepo.java rename to marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepoImpl.java index 05ca6603..10c72060 100644 --- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/MemexGitRepo.java +++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/memex/system/git/MemexGitRepoImpl.java @@ -1,4 +1,4 @@ -package nu.marginalia.wmsa.memex.system; +package nu.marginalia.wmsa.memex.system.git; import com.google.inject.Inject; import com.google.inject.Singleton; @@ -10,7 +10,8 @@ import org.eclipse.jgit.api.Git; import org.eclipse.jgit.api.errors.GitAPIException; import org.eclipse.jgit.lib.Repository; import org.eclipse.jgit.storage.file.FileRepositoryBuilder; -import org.eclipse.jgit.transport.*; +import org.eclipse.jgit.transport.JschConfigSessionFactory; +import org.eclipse.jgit.transport.SshSessionFactory; import org.eclipse.jgit.util.FS; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -19,13 +20,13 @@ import java.io.IOException; import java.nio.file.Path; @Singleton -public class MemexGitRepo { +public class MemexGitRepoImpl implements MemexGitRepo { private final Git git; - private final Logger logger = LoggerFactory.getLogger(MemexGitRepo.class); + private final Logger logger = LoggerFactory.getLogger(MemexGitRepoImpl.class); @Inject - public MemexGitRepo(@Named("memex-root") Path root) throws IOException { + public MemexGitRepoImpl(@Named("memex-root") Path root) throws IOException { FileRepositoryBuilder repositoryBuilder = new FileRepositoryBuilder(); @@ -49,6 +50,7 @@ public class MemexGitRepo { pull(); } + @Override public void pull() { try { git.pull().call(); @@ -58,6 +60,7 @@ public class MemexGitRepo { } } + @Override public void remove(MemexNodeUrl url) { try { git.rm() @@ -72,6 +75,7 @@ public class MemexGitRepo { } } + @Override public void add(MemexNodeUrl url) { try { git.add() @@ -87,6 +91,7 @@ public class MemexGitRepo { logger.error("Git operation failed", ex); } } + @Override public void update(MemexNodeUrl url) { try { git.add() @@ -105,6 +110,7 @@ public class MemexGitRepo { } + @Override public void rename(MemexNodeUrl src, MemexNodeUrl dst) { try { git.rm().addFilepattern(filePattern(src)).call(); diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextChangeTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextChangeTest.java index 9699bcf9..e3e670c7 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextChangeTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextChangeTest.java @@ -2,16 +2,18 @@ package nu.marginalia.wmsa.memex.change; import io.reactivex.rxjava3.plugins.RxJavaPlugins; import lombok.SneakyThrows; -import nu.marginalia.gemini.GeminiService; +import nu.marginalia.gemini.GeminiServiceImpl; import nu.marginalia.util.test.TestUtil; -import nu.marginalia.wmsa.memex.*; +import nu.marginalia.wmsa.memex.Memex; +import nu.marginalia.wmsa.memex.MemexData; +import nu.marginalia.wmsa.memex.MemexLoader; import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; import nu.marginalia.wmsa.memex.model.MemexNodeUrl; import nu.marginalia.wmsa.memex.renderer.MemexRendererers; import nu.marginalia.wmsa.memex.system.MemexFileSystemModifiedTimes; import nu.marginalia.wmsa.memex.system.MemexFileWriter; -import nu.marginalia.wmsa.memex.system.MemexGitRepo; import nu.marginalia.wmsa.memex.system.MemexSourceFileSystem; +import nu.marginalia.wmsa.memex.system.git.MemexGitRepoImpl; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; @@ -61,13 +63,13 @@ class GemtextChangeTest { var data = new MemexData(); memex = new Memex(data, null, - Mockito.mock(MemexGitRepo.class), new MemexLoader(data, new MemexFileSystemModifiedTimes(), - new MemexSourceFileSystem(tempDir, Mockito.mock(MemexGitRepo.class)), + Mockito.mock(MemexGitRepoImpl.class), new MemexLoader(data, new MemexFileSystemModifiedTimes(), + new MemexSourceFileSystem(tempDir, Mockito.mock(MemexGitRepoImpl.class)), tempDir, tombstonePath, redirectPath), Mockito.mock(MemexFileWriter.class), null, Mockito.mock(MemexRendererers.class), - Mockito.mock(GeminiService.class)); + Mockito.mock(GeminiServiceImpl.class)); } @SneakyThrows diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTaskUpdateTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTaskUpdateTest.java index 8aefc613..d80d32eb 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTaskUpdateTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTaskUpdateTest.java @@ -2,18 +2,20 @@ package nu.marginalia.wmsa.memex.change; import io.reactivex.rxjava3.plugins.RxJavaPlugins; import lombok.SneakyThrows; -import nu.marginalia.gemini.GeminiService; +import nu.marginalia.gemini.GeminiServiceImpl; import nu.marginalia.gemini.gmi.GemtextDocument; import nu.marginalia.util.test.TestUtil; -import nu.marginalia.wmsa.memex.*; +import nu.marginalia.wmsa.memex.Memex; +import nu.marginalia.wmsa.memex.MemexData; +import nu.marginalia.wmsa.memex.MemexLoader; import nu.marginalia.wmsa.memex.change.update.GemtextDocumentUpdateCalculator; import nu.marginalia.wmsa.memex.model.MemexNodeHeadingId; import nu.marginalia.wmsa.memex.model.MemexNodeUrl; import nu.marginalia.wmsa.memex.renderer.MemexRendererers; import nu.marginalia.wmsa.memex.system.MemexFileSystemModifiedTimes; import nu.marginalia.wmsa.memex.system.MemexFileWriter; -import nu.marginalia.wmsa.memex.system.MemexGitRepo; import nu.marginalia.wmsa.memex.system.MemexSourceFileSystem; +import nu.marginalia.wmsa.memex.system.git.MemexGitRepoImpl; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; @@ -67,12 +69,12 @@ class GemtextTaskUpdateTest { Files.createDirectory(tempDir.resolve("special")); var data = new MemexData(); - memex = new Memex(data, null, Mockito.mock(MemexGitRepo.class), new MemexLoader(data, new MemexFileSystemModifiedTimes(), - new MemexSourceFileSystem(tempDir, Mockito.mock(MemexGitRepo.class)), tempDir, tombstonePath, redirectPath), + memex = new Memex(data, null, Mockito.mock(MemexGitRepoImpl.class), new MemexLoader(data, new MemexFileSystemModifiedTimes(), + new MemexSourceFileSystem(tempDir, Mockito.mock(MemexGitRepoImpl.class)), tempDir, tombstonePath, redirectPath), Mockito.mock(MemexFileWriter.class), null, Mockito.mock(MemexRendererers.class), - Mockito.mock(GeminiService.class)); + Mockito.mock(GeminiServiceImpl.class)); } @SneakyThrows diff --git a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTombstoneUpdateCaclulatorTest.java b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTombstoneUpdateCaclulatorTest.java index bfe3b104..51120654 100644 --- a/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTombstoneUpdateCaclulatorTest.java +++ b/marginalia_nu/src/test/java/nu/marginalia/wmsa/memex/change/GemtextTombstoneUpdateCaclulatorTest.java @@ -2,15 +2,17 @@ package nu.marginalia.wmsa.memex.change; import io.reactivex.rxjava3.plugins.RxJavaPlugins; import lombok.SneakyThrows; -import nu.marginalia.gemini.GeminiService; +import nu.marginalia.gemini.GeminiServiceImpl; import nu.marginalia.util.test.TestUtil; -import nu.marginalia.wmsa.memex.*; +import nu.marginalia.wmsa.memex.Memex; +import nu.marginalia.wmsa.memex.MemexData; +import nu.marginalia.wmsa.memex.MemexLoader; import nu.marginalia.wmsa.memex.model.MemexNodeUrl; import nu.marginalia.wmsa.memex.renderer.MemexRendererers; import nu.marginalia.wmsa.memex.system.MemexFileSystemModifiedTimes; import nu.marginalia.wmsa.memex.system.MemexFileWriter; -import nu.marginalia.wmsa.memex.system.MemexGitRepo; import nu.marginalia.wmsa.memex.system.MemexSourceFileSystem; +import nu.marginalia.wmsa.memex.system.git.MemexGitRepoImpl; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; @@ -64,13 +66,13 @@ class GemtextTombstoneUpdateCaclulatorTest { var data = new MemexData(); memex = new Memex(data, null, - Mockito.mock(MemexGitRepo.class), + Mockito.mock(MemexGitRepoImpl.class), new MemexLoader(data, new MemexFileSystemModifiedTimes(), - new MemexSourceFileSystem(tempDir, Mockito.mock(MemexGitRepo.class)), tempDir, tombstonePath, redirectPath), + new MemexSourceFileSystem(tempDir, Mockito.mock(MemexGitRepoImpl.class)), tempDir, tombstonePath, redirectPath), Mockito.mock(MemexFileWriter.class), updateCaclulator, Mockito.mock(MemexRendererers.class), - Mockito.mock(GeminiService.class)); + Mockito.mock(GeminiServiceImpl.class)); } @SneakyThrows