diff --git a/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java
index d86e85e6..769eca40 100644
--- a/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java
+++ b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/E2ETestBase.java
@@ -15,9 +15,9 @@ import java.nio.file.Path;
import java.time.Duration;
public abstract class E2ETestBase {
- public Network network = Network.newNetwork();
+ public static Network network = Network.newNetwork();
- public MariaDBContainer> getMariaDBContainer() {
+ public static MariaDBContainer> getMariaDBContainer() {
return new MariaDBContainer<>("mariadb")
.withDatabaseName("WMSA_prod")
.withUsername("wmsa")
@@ -27,7 +27,7 @@ public abstract class E2ETestBase {
.withNetworkAliases("mariadb");
}
- public GenericContainer> forService(ServiceDescriptor service, GenericContainer> mariaDB) {
+ public static GenericContainer> forService(ServiceDescriptor service, GenericContainer> mariaDB) {
return new GenericContainer<>("openjdk:17-alpine")
.dependsOn(mariaDB)
.withCopyFileToContainer(jarFile(), "/WMSA.jar")
diff --git a/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/EdgeSearchE2ETest.java b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/EdgeSearchE2ETest.java
index 20103f15..0475d0e9 100644
--- a/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/EdgeSearchE2ETest.java
+++ b/marginalia_nu/src/e2e/java/nu/marginalia/wmsa/edge/EdgeSearchE2ETest.java
@@ -7,6 +7,7 @@ import org.jsoup.Jsoup;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import org.openqa.selenium.By;
+import org.openqa.selenium.OutputType;
import org.openqa.selenium.chrome.ChromeOptions;
import org.openzim.ZIMTypes.ZIMFile;
import org.openzim.ZIMTypes.ZIMReader;
@@ -22,6 +23,7 @@ import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Duration;
+import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
@@ -31,31 +33,33 @@ import static nu.marginalia.wmsa.configuration.ServiceDescriptor.*;
@Testcontainers
public class EdgeSearchE2ETest extends E2ETestBase {
@Container
- public GenericContainer> mariaDB = getMariaDBContainer();
+ public static GenericContainer> mariaDB = getMariaDBContainer();
@Container
- public GenericContainer> searchContainer = forService(EDGE_SEARCH, mariaDB);
+ public static GenericContainer> searchContainer = forService(EDGE_SEARCH, mariaDB);
@Container
- public GenericContainer> assistantContainer = forService(EDGE_ASSISTANT, mariaDB);
+ public static GenericContainer> assistantContainer = forService(EDGE_ASSISTANT, mariaDB);
@Container
- public GenericContainer> indexContainer = forService(EDGE_INDEX, mariaDB);
+ public static GenericContainer> encyclopediaContainer = forService(ENCYCLOPEDIA, mariaDB);
+ @Container
+ public static GenericContainer> indexContainer = forService(EDGE_INDEX, mariaDB);
@Container
- public NginxContainer> mockWikipedia = new NginxContainer<>("nginx:stable")
+ public static NginxContainer> mockWikipedia = new NginxContainer<>("nginx:stable")
.dependsOn(searchContainer)
.withLogConsumer(new Slf4jLogConsumer(LoggerFactory.getLogger("wikipedia")))
.withFileSystemBind(getWikipediaFiles(), "/usr/share/nginx/html/", BindMode.READ_ONLY)
.withNetwork(network)
- .withNetworkAliases("wikipedia");
+ .withNetworkAliases("wikipedia.local");
@Container
- public BrowserWebDriverContainer> chrome = new BrowserWebDriverContainer<>()
+ public static BrowserWebDriverContainer> chrome = new BrowserWebDriverContainer<>()
.withNetwork(network)
.withCapabilities(new ChromeOptions());
@Container
- public GenericContainer> crawlerContainer = new GenericContainer<>("openjdk:17-alpine")
+ public static GenericContainer> crawlerContainer = new GenericContainer<>("openjdk:17-alpine")
.dependsOn(mockWikipedia)
.dependsOn(indexContainer)
.withNetwork(network)
@@ -69,14 +73,13 @@ public class EdgeSearchE2ETest extends E2ETestBase {
.waitingFor(Wait.forLogMessage(".*ALL DONE.*", 1).withStartupTimeout(Duration.ofMinutes(10)));
@Container
- public NginxContainer> proxyNginx = new NginxContainer<>("nginx:stable")
+ public static NginxContainer> proxyNginx = new NginxContainer<>("nginx:stable")
.dependsOn(searchContainer)
.dependsOn(crawlerContainer)
.withLogConsumer(new Slf4jLogConsumer(LoggerFactory.getLogger("nginx")))
.withCopyFileToContainer(MountableFile.forClasspathResource("nginx/search.conf"), "/etc/nginx/conf.d/default.conf")
.withNetwork(network)
.withNetworkAliases("proxyNginx");
- ;
public static MountableFile ipDatabasePath() {
Path modelsPath = Path.of(System.getProperty("user.dir")).resolve("data/models/IP2LOC/IP2LOCATION-LITE-DB1.CSV");
@@ -87,11 +90,22 @@ public class EdgeSearchE2ETest extends E2ETestBase {
return MountableFile.forHostPath(modelsPath.toString());
}
- private Path getCrawlPath() {
+ private static Path getCrawlPath() {
return Path.of(System.getProperty("user.dir")).resolve("build/tmp/crawl");
}
- private String getWikipediaFiles() {
+ private static Path screenshotFilename(String operation) throws IOException {
+ var path = Path.of(System.getProperty("user.dir")).resolve("build/test/e2e/");
+ Files.createDirectories(path);
+
+ String name = String.format("test-%s-%s.png", operation, LocalDateTime.now());
+ path = path.resolve(name);
+
+ System.out.println("Screenshot in " + path);
+ return path;
+ }
+
+ private static String getWikipediaFiles() {
Path wikipediaFiles = Path.of(System.getProperty("user.dir")).resolve("build/tmp/wikipedia");
Path crawlFiles = getCrawlPath();
Path zimFile = Path.of(System.getProperty("user.dir")).resolve("data/test/wikipedia_en_100_nopic.zim");
@@ -120,7 +134,7 @@ public class EdgeSearchE2ETest extends E2ETestBase {
var zr = new ZIMReader(new ZIMFile(zimFile.toString()));
zr.forEachArticles((url, art) -> {
- urls.add("http://wikipedia/" + url + ".html");
+ urls.add("http://wikipedia.local/" + url + ".html");
if (art != null) {
try {
@@ -134,7 +148,7 @@ public class EdgeSearchE2ETest extends E2ETestBase {
}, pred -> true);
urls.forEach(System.out::println);
Files.writeString(wikipediaFiles.resolve("index.html"), "
");
- CrawlJobExtractorMain.writeSpec(crawlFiles.resolve("crawl.spec"), "wikipedia", urls);
+ CrawlJobExtractorMain.writeSpec(crawlFiles.resolve("crawl.spec"), "wikipedia.local", urls);
}
catch (IOException ex) {
ex.printStackTrace();
@@ -143,19 +157,80 @@ public class EdgeSearchE2ETest extends E2ETestBase {
}
@Test
- public void run() {
+ public void testFrontPage() throws IOException {
var driver = chrome.getWebDriver();
driver.get("http://proxyNginx/");
System.out.println(driver.getTitle());
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
+ Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("frontpage"));
+ }
+
+ @Test
+ public void testQuery() throws IOException {
+ var driver = chrome.getWebDriver();
+
driver.get("http://proxyNginx/search?query=bird&profile=corpo");
System.out.println(driver.getTitle());
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
- driver.get("http://proxyNginx/search?query=site:wikipedia");
+
+ Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("query"));
+ }
+
+ @Test
+ public void testSiteInfo() throws IOException {
+ var driver = chrome.getWebDriver();
+
+ driver.get("http://proxyNginx/search?query=site:wikipedia.local");
System.out.println(driver.getTitle());
System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
+
+
+ Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("site-info"));
+ }
+ @Test
+ public void testSiteSearch() throws IOException {
+ var driver = chrome.getWebDriver();
+
+ driver.get("http://proxyNginx/search?query=site:wikipedia.local%20frog");
+ System.out.println(driver.getTitle());
+ System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
+
+
+ Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("site-search"));
+ }
+ @Test
+ public void testBrowse() throws IOException {
+ var driver = chrome.getWebDriver();
+
+ driver.get("http://proxyNginx/search?query=browse:wikipedia.local");
+ System.out.println(driver.getTitle());
+ System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
+
+
+ Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("browse"));
+ }
+ @Test
+ public void testDefine() throws IOException {
+ var driver = chrome.getWebDriver();
+
+ driver.get("http://proxyNginx/search?query=define:adiabatic");
+ System.out.println(driver.getTitle());
+ System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
+
+
+ Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("define"));
+ }
+ @Test
+ public void testEval() throws IOException {
+ var driver = chrome.getWebDriver();
+
+ driver.get("http://proxyNginx/search?query=3%2B3");
+ System.out.println(driver.getTitle());
+ System.out.println(driver.findElement(new By.ByXPath("//*")).getAttribute("outerHTML"));
+
+ Files.move(driver.getScreenshotAs(OutputType.FILE).toPath(), screenshotFilename("eval"));
}
}
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDao.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDao.java
index 4b5d1b7a..81e8dd58 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDao.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDao.java
@@ -4,10 +4,8 @@ import com.google.inject.ImplementedBy;
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
import nu.marginalia.wmsa.edge.model.*;
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
-import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainLink;
-import nu.marginalia.wmsa.edge.model.crawl.EdgeUrlVisit;
import nu.marginalia.wmsa.edge.model.search.EdgeUrlDetails;
-import nu.marginalia.wmsa.edge.search.BrowseResult;
+import nu.marginalia.wmsa.edge.search.model.BrowseResult;
import java.util.Collection;
import java.util.List;
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java
index 54757834..a214bb15 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/data/dao/EdgeDataStoreDaoImpl.java
@@ -13,7 +13,7 @@ import nu.marginalia.wmsa.edge.model.EdgeUrl;
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
import nu.marginalia.wmsa.edge.model.search.EdgePageScoreAdjustment;
import nu.marginalia.wmsa.edge.model.search.EdgeUrlDetails;
-import nu.marginalia.wmsa.edge.search.BrowseResult;
+import nu.marginalia.wmsa.edge.search.model.BrowseResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingService.java
index 406a7dfd..08a4b6c1 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingService.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingService.java
@@ -10,7 +10,7 @@ import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService;
import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
import nu.marginalia.wmsa.edge.model.EdgeId;
-import nu.marginalia.wmsa.edge.search.BrowseResult;
+import nu.marginalia.wmsa.edge.search.model.BrowseResult;
import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
import org.jetbrains.annotations.NotNull;
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingSessionObject.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingSessionObject.java
index c67cb51d..d842b212 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingSessionObject.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/dating/DatingSessionObject.java
@@ -4,7 +4,7 @@ import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
import nu.marginalia.wmsa.edge.model.EdgeDomain;
import nu.marginalia.wmsa.edge.model.EdgeId;
-import nu.marginalia.wmsa.edge.search.BrowseResult;
+import nu.marginalia.wmsa.edge.search.model.BrowseResult;
import java.util.LinkedList;
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeUrlDetails.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeUrlDetails.java
index ecbb3a3a..ed5fd013 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeUrlDetails.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/model/search/EdgeUrlDetails.java
@@ -4,7 +4,7 @@ import lombok.*;
import nu.marginalia.wmsa.edge.converting.processor.logic.HtmlFeature;
import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
import nu.marginalia.wmsa.edge.model.EdgeUrl;
-import nu.marginalia.wmsa.edge.search.EdgeSearchRankingSymbols;
+import nu.marginalia.wmsa.edge.search.model.EdgeSearchRankingSymbols;
import java.util.Objects;
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java
index 5e999371..dd37c515 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchOperator.java
@@ -13,6 +13,8 @@ import nu.marginalia.wmsa.edge.index.model.IndexBlock;
import nu.marginalia.wmsa.edge.index.service.SearchOrder;
import nu.marginalia.wmsa.edge.model.*;
import nu.marginalia.wmsa.edge.model.search.*;
+import nu.marginalia.wmsa.edge.search.model.DecoratedSearchResultSet;
+import nu.marginalia.wmsa.edge.search.model.DecoratedSearchResults;
import nu.marginalia.wmsa.edge.search.query.model.EdgeSearchQuery;
import nu.marginalia.wmsa.edge.search.query.QueryFactory;
import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters;
@@ -26,7 +28,10 @@ import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import javax.annotation.Nullable;
import java.util.*;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@Singleton
@@ -77,17 +82,16 @@ public class EdgeSearchOperator {
return queryResults.resultSet;
}
- public DecoratedSearchResults doSearch(Context ctx, EdgeUserSearchParameters params, String evalResult) {
-
-
+ public DecoratedSearchResults doSearch(Context ctx, EdgeUserSearchParameters params, @Nullable Future eval) {
Observable definitions = getWikiArticle(ctx, params.getHumanQuery());
-
var processedQuery = queryFactory.createQuery(params);
logger.info("Human terms: {}", Strings.join(processedQuery.searchTermsHuman, ','));
DecoratedSearchResultSet queryResults = performQuery(ctx, processedQuery, false);
+ String evalResult = getEvalResult(eval);
+
return new DecoratedSearchResults(params,
getProblems(ctx, params.getHumanQuery(), evalResult, queryResults, processedQuery),
evalResult,
@@ -97,6 +101,19 @@ public class EdgeSearchOperator {
getDomainId(processedQuery.domain));
}
+ private String getEvalResult(@Nullable Future eval) {
+ if (eval == null || eval.isCancelled()) {
+ return "";
+ }
+ try {
+ return eval.get(50, TimeUnit.MILLISECONDS);
+ }
+ catch (Exception ex) {
+ logger.warn("Error fetching eval result", ex);
+ return "";
+ }
+ }
+
private int getDomainId(String domain) {
int domainId = -1;
try {
diff --git a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java
index d117df15..bc318ddf 100644
--- a/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java
+++ b/marginalia_nu/src/main/java/nu/marginalia/wmsa/edge/search/EdgeSearchService.java
@@ -13,20 +13,11 @@ import nu.marginalia.wmsa.configuration.server.Context;
import nu.marginalia.wmsa.configuration.server.Initialization;
import nu.marginalia.wmsa.configuration.server.MetricsServer;
import nu.marginalia.wmsa.configuration.server.Service;
-import nu.marginalia.wmsa.edge.assistant.client.AssistantClient;
-import nu.marginalia.wmsa.edge.assistant.dict.DictionaryResponse;
-import nu.marginalia.wmsa.edge.assistant.screenshot.ScreenshotService;
-import nu.marginalia.wmsa.edge.data.dao.EdgeDataStoreDao;
-import nu.marginalia.wmsa.edge.data.dao.task.EdgeDomainBlacklist;
import nu.marginalia.wmsa.edge.index.client.EdgeIndexClient;
-import nu.marginalia.wmsa.edge.index.model.IndexBlock;
-import nu.marginalia.wmsa.edge.model.EdgeDomain;
-import nu.marginalia.wmsa.edge.model.EdgeId;
-import nu.marginalia.wmsa.edge.model.crawl.EdgeDomainIndexingState;
+import nu.marginalia.wmsa.edge.search.command.CommandEvaluator;
+import nu.marginalia.wmsa.edge.search.command.ResponseType;
+import nu.marginalia.wmsa.edge.search.command.SearchParameters;
import nu.marginalia.wmsa.edge.search.query.model.EdgeUserSearchParameters;
-import nu.marginalia.wmsa.edge.search.siteinfo.DomainInformationService;
-import nu.marginalia.wmsa.renderer.mustache.MustacheRenderer;
-import nu.marginalia.wmsa.renderer.mustache.RendererFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
@@ -35,84 +26,39 @@ import spark.Spark;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
-import java.nio.file.Path;
import java.util.*;
import java.util.stream.Collectors;
public class EdgeSearchService extends Service {
- private final EdgeDataStoreDao edgeDataStoreDao;
private final EdgeIndexClient indexClient;
- private final AssistantClient assistantClient;
- private final UnitConversion unitConversion;
private final EdgeSearchOperator searchOperator;
- private final EdgeDomainBlacklist blacklist;
- private final ScreenshotService screenshotService;
- private DomainInformationService domainInformationService;
-
- private final MustacheRenderer browseResultsRenderer;
- private final MustacheRenderer searchResultsRenderer;
- private final MustacheRenderer searchResultsRendererGmi;
- private final MustacheRenderer dictionaryRenderer;
- private final MustacheRenderer dictionaryRendererGmi;
- private final MustacheRenderer