mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Get suggestions working again
This commit is contained in:
parent
7c58ddce81
commit
964014860a
@ -14,6 +14,7 @@ import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.Duration;
|
||||
import java.util.Optional;
|
||||
import java.util.Properties;
|
||||
|
||||
@ -90,6 +91,9 @@ public class DatabaseModule extends AbstractModule {
|
||||
|
||||
config.setMaximumPoolSize(100);
|
||||
config.setMinimumIdle(10);
|
||||
|
||||
config.setMaxLifetime(Duration.ofMinutes(9).toMillis());
|
||||
|
||||
return new HikariDataSource(config);
|
||||
}
|
||||
finally {
|
||||
|
@ -1,13 +0,0 @@
|
||||
package nu.marginalia.service.module;
|
||||
|
||||
import com.google.inject.name.Named;
|
||||
|
||||
import javax.inject.Inject;
|
||||
|
||||
public class LoggerConfiguration {
|
||||
@Inject
|
||||
public LoggerConfiguration(@Named("service-name") String serviceName) {
|
||||
System.setProperty("service-name", serviceName);
|
||||
}
|
||||
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
package nu.marginalia.service.module;
|
||||
|
||||
import com.google.inject.name.Named;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import javax.inject.Provider;
|
||||
|
||||
public class MetricsPortProvider implements Provider<Integer> {
|
||||
private final Integer servicePort;
|
||||
|
||||
@Inject
|
||||
public MetricsPortProvider(@Named("service-port") Integer servicePort) {
|
||||
this.servicePort = servicePort;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Integer get() {
|
||||
return servicePort+1000;
|
||||
}
|
||||
|
||||
}
|
@ -10,7 +10,7 @@ import static com.google.inject.name.Names.named;
|
||||
|
||||
public class AssistantModule extends AbstractModule {
|
||||
public void configure() {
|
||||
bind(Path.class).annotatedWith(named("suggestions-file")).toInstance(WmsaHome.getHomePath().resolve("suggestions.txt"));
|
||||
bind(Path.class).annotatedWith(named("suggestions-file")).toInstance(WmsaHome.getHomePath().resolve("data/suggestions.txt"));
|
||||
|
||||
bind(LanguageModels.class).toInstance(WmsaHome.getLanguageModels());
|
||||
}
|
||||
|
@ -137,6 +137,10 @@ public class Suggestions {
|
||||
public Stream<String> getSuggestionsForKeyword(int count, String prefix) {
|
||||
var start = suggestionsTrie.select(prefix);
|
||||
|
||||
if (start == null) {
|
||||
return Stream.empty();
|
||||
}
|
||||
|
||||
if (!start.getKey().startsWith(prefix)) {
|
||||
return Stream.empty();
|
||||
}
|
||||
|
@ -1,23 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Marginalia Search - About</title>
|
||||
<link rel="stylesheet" href="https://www.marginalia.nu/style.css" />
|
||||
<link rel="stylesheet" href="https://search.marginalia.nu/style.css" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<nav>
|
||||
<a href="/">Search</a>
|
||||
<a href="/about.html">About</a>
|
||||
<a class="marginalia-link" href="https://www.marginalia.nu/">Marginalia(🇸🇪)</a>
|
||||
</nav>
|
||||
</header>
|
||||
<article>
|
||||
<p>
|
||||
This page has been moved to <a href="https://memex.marginalia.nu/projects/edge/about.gmi">the memex</a>.
|
||||
</p>
|
||||
</article>
|
||||
</body>
|
@ -1,23 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html xmlns="http://www.w3.org/1999/html">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Marginalia Search - Change Log</title>
|
||||
<link rel="stylesheet" href="https://www.marginalia.nu/style.css" />
|
||||
<link rel="stylesheet" href="https://search.marginalia.nu/style.css" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<nav>
|
||||
<a href="/">Search</a>
|
||||
<a href="/about.html">About</a>
|
||||
<a class="marginalia-link" href="https://www.marginalia.nu/">Marginalia(🇸🇪)</a>
|
||||
</nav>
|
||||
</header>
|
||||
<article>
|
||||
<p>
|
||||
This page has been moved to <a href="https://memex.marginalia.nu/projects/edge/changelog.gmi">the memex</a>.
|
||||
</p>
|
||||
</article>
|
||||
</body>
|
@ -1,23 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Error</title>
|
||||
<link rel="stylesheet" href="https://www.marginalia.nu/style.css" />
|
||||
<link rel="stylesheet" href="https://search.marginalia.nu/style.css" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<nav>
|
||||
<a href="/">Search</a>
|
||||
<a href="/about.html">About</a>
|
||||
</nav>
|
||||
</header>
|
||||
<article>
|
||||
<h1>An error has occurred!</h1>
|
||||
<p>
|
||||
Something went wrong while processing your query. Please try again later.
|
||||
</p>
|
||||
</article>
|
||||
</body>
|
@ -1,29 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Marginalia Search - Known Issues</title>
|
||||
<link rel="stylesheet" href="https://www.marginalia.nu/style.css" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<nav>
|
||||
<a href="/">Search</a>
|
||||
<a href="/about.html">About</a>
|
||||
<a class="marginalia-link" href="https://www.marginalia.nu/">Marginalia(🇸🇪)</a>
|
||||
</nav>
|
||||
</header>
|
||||
<article>
|
||||
<h1>Known Issues</h1>
|
||||
<ul>
|
||||
<li>Non-Latin text becomes horribly garbled in the summary and title description.</li>
|
||||
</ul>
|
||||
<h2>Mitigated Issues</h2>
|
||||
<ul>
|
||||
<li><s>Non-latin characters are stripped from search results (<q>Ålö AB</q> becomes <q>l AB</q>)</s></li>
|
||||
<li><s>The page doesn't look good on mobile</s></li>
|
||||
<li><s>Still a few link farms getting good results</s></li>
|
||||
</ul>
|
||||
</article>
|
||||
</body>
|
@ -1,10 +0,0 @@
|
||||
<html><head>
|
||||
<head><title>Marginalia Search - Maintenance Notification</title></head>
|
||||
</head>
|
||||
<body>
|
||||
<h1>
|
||||
Down For Maintenance!
|
||||
</h1>
|
||||
<p>The search engine is currently down for maintenance.</p>
|
||||
<a href="https://search.marginalia.nu/">To The Start Page</a>
|
||||
</body></html>
|
@ -1,25 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Marginalia Search - Notes on Designing a Search Engine</title>
|
||||
<link rel="preload" as="font" href="https://www.marginalia.nu/fonts/Inconsolata-Medium.ttf" />
|
||||
<link rel="preload" as="font" href="https://www.marginalia.nu/fonts/Roboto-Medium.ttf" />
|
||||
<link rel="stylesheet" href="https://www.marginalia.nu/style.css" />
|
||||
<link rel="stylesheet" href="https://search.marginalia.nu/style.css" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<nav>
|
||||
<a href="/">Search</a>
|
||||
<a href="/about.html">About</a>
|
||||
<a class="marginalia-link" href="https://www.marginalia.nu/">Marginalia(🇸🇪)</a>
|
||||
</nav>
|
||||
</header>
|
||||
<article>
|
||||
<p>
|
||||
This page has been moved to <a href="https://memex.marginalia.nu/projects/edge/design-notes.gmi">the memex</a>.
|
||||
</p>
|
||||
</article>
|
||||
</body>
|
@ -72,7 +72,7 @@ if(!window.matchMedia("(pointer: coarse)").matches) {
|
||||
document.getElementsByClassName('input')[0].appendChild(suggestions);
|
||||
}
|
||||
|
||||
req.open("GET", "https://api.marginalia.nu/suggest/?partial="+encodeURIComponent(query.value));
|
||||
req.open("GET", "/suggest/?partial="+encodeURIComponent(query.value));
|
||||
req.send();
|
||||
}, 250);
|
||||
}
|
||||
|
@ -1,76 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Marginalia Search - About: Easy Read Wikipedia</title>
|
||||
<link rel="stylesheet" href="https://www.marginalia.nu/style.css" />
|
||||
<link rel="stylesheet" href="https://search.marginalia.nu/style.css" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<nav>
|
||||
<a href="/">Search</a>
|
||||
<a href="/about.html">About</a>
|
||||
</nav>
|
||||
</header>
|
||||
<article>
|
||||
<h1>About: High Readability Wikipedia</h1>
|
||||
<section>
|
||||
<p>
|
||||
This is a wikipedia client that strips away most links and almost all visual clutter
|
||||
to provide a more book-like reading experience with fewer distractions.
|
||||
</p>
|
||||
<p>
|
||||
This is primarily a helpful utility for a search engine focusing on similarly text-oriented
|
||||
websites.
|
||||
</p>
|
||||
<p>
|
||||
You are welcome to use it for general article reading as well. This may be useful
|
||||
if you are on a low bandwidth connection, since the download size is typically reduced
|
||||
from megabytes to dozens of kilobytes.
|
||||
</p>
|
||||
<p>
|
||||
What's taken away is all the design elements that your brain would have to filter out
|
||||
to read the text of the article. It seems as though overburdening this mental process
|
||||
causes the reader to start scanning the text instead of reading it, which is experienced
|
||||
as an inability to pay focus.
|
||||
</p>
|
||||
<p>
|
||||
The cleaning process is not perfect and will occasionally produce strange results,
|
||||
but significant problems should be relatively rare.
|
||||
</p>
|
||||
<a href="https://search.marginalia.nu/about.html">About the Search Engine</a>
|
||||
|
||||
<h2>Limitations</h2>
|
||||
<p>This is a "stale" copy of wikipedia, based on an archived copy from January 2021. On the
|
||||
other hand, we used to abide printed encyclopedias that didn't update at all. </p>
|
||||
<p>
|
||||
Be aware that the cleaning strips away a lot of information, including most references,
|
||||
footnotes, quality warnings, and so forth. Refer to the original wikipedia article for
|
||||
that information.
|
||||
</p>
|
||||
</section>
|
||||
<h1>Legal</h1>
|
||||
<section>
|
||||
The Wikipedia text is available under the the Creative Commons Attribution-ShareAlike 3.0 license,
|
||||
and so is the wikipedia text forwarded to you through this service.
|
||||
</section>
|
||||
<section>
|
||||
<h2>Further reading</h2>
|
||||
<dt>Blom et al. 2017 - Comprehension and navigation of networked hypertexts</dt>
|
||||
<dd><a class="teknisk" href="https://onlinelibrary.wiley.com/doi/pdf/10.1111/jcal.12243">https://onlinelibrary.wiley.com/doi/pdf/10.1111/jcal.12243</a></dd>
|
||||
</section>
|
||||
<h1>Have something to say?</h1>
|
||||
<section>
|
||||
<p>Send me an e-mail at <a href="mailto:kontakt@marginalia.nu"
|
||||
class="teknisk">kontakt@marginalia.nu</a>.
|
||||
</p>
|
||||
<p>
|
||||
Don't hesitate to let me know if the website is somehow being a nuisance,
|
||||
it should respect robots.txt and reduce outgoing requests, but the format
|
||||
isn't super-standardized, so occasionally it doesn't understand every directive.
|
||||
</p>
|
||||
</section>
|
||||
</article>
|
||||
</body>
|
@ -8,6 +8,7 @@
|
||||
<link rel="search" type="application/opensearchdescription+xml" href="/opensearch.xml" title="Marginalia">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<meta name="robots" content="noindex" />
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
||||
|
@ -103,4 +103,5 @@
|
||||
<tt><a href="https://git.marginalia.nu/">https://git.marginalia.nu/</a></tt>.
|
||||
|
||||
</section>
|
||||
</footer>
|
||||
</footer>
|
||||
<script src="tts.js" rel="javascript"></script>
|
@ -15,6 +15,8 @@ import nu.marginalia.service.server.RateLimiter;
|
||||
import nu.marginalia.service.server.Service;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.slf4j.Marker;
|
||||
import org.slf4j.MarkerFactory;
|
||||
import spark.Request;
|
||||
import spark.Response;
|
||||
import spark.Spark;
|
||||
@ -30,6 +32,9 @@ public class ApiService extends Service {
|
||||
private final ConcurrentHashMap<String, ApiLicense> licenseCache = new ConcurrentHashMap<>();
|
||||
private final ConcurrentHashMap<ApiLicense, RateLimiter> rateLimiters = new ConcurrentHashMap<>();
|
||||
|
||||
// Marker for filtering out sensitive content from the persistent logs
|
||||
private final Marker queryMarker = MarkerFactory.getMarker("QUERY");
|
||||
|
||||
@Inject
|
||||
public ApiService(@Named("service-host") String ip,
|
||||
@Named("service-port") Integer port,
|
||||
@ -78,7 +83,7 @@ public class ApiService extends Service {
|
||||
int count = Integer.parseInt(request.queryParamOrDefault("count", "20"));
|
||||
int index = Integer.parseInt(request.queryParamOrDefault("index", "3"));
|
||||
|
||||
logger.info("{} Search {}", license.key, args[0]);
|
||||
logger.info(queryMarker, "{} Search {}", license.key, args[0]);
|
||||
|
||||
return searchClient.query(Context.fromRequest(request), args[0], count, index)
|
||||
.blockingFirst().withLicense(license.getLicense());
|
||||
|
@ -29,6 +29,10 @@ server {
|
||||
location /site/ {
|
||||
rewrite ^/site/(.*)$ /search?query=site:$1&profile=yolo;
|
||||
}
|
||||
location /suggest/ {
|
||||
proxy_pass http://assistant-service:5025/public$request_uri;
|
||||
access_log off;
|
||||
}
|
||||
location / {
|
||||
proxy_pass http://search-service:5023/public/;
|
||||
}
|
||||
|
@ -26,6 +26,12 @@ download_model model/tfreq-new-algo3.bin https://downloads.marginalia.nu/model/t
|
||||
download_model data/IP2LOCATION-LITE-DB1.CSV.ZIP https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP
|
||||
unzip -qn -d data data/IP2LOCATION-LITE-DB1.CSV.ZIP
|
||||
|
||||
download_model data/adblock.txt https://downloads.marginalia.nu/data/adblock.txt
|
||||
if [ ! -f data/suggestions.txt ]; then
|
||||
download_model data/suggestions.txt.gz https://downloads.marginalia.nu/data/suggestions.txt.gz
|
||||
gunzip data/suggestions.txt.gz
|
||||
fi
|
||||
|
||||
if [ ! -d conf ]; then
|
||||
cp -r template/conf .
|
||||
fi
|
||||
|
Loading…
Reference in New Issue
Block a user