mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-22 20:48:59 +00:00
Merge branch 'master' into term-positions
This commit is contained in:
commit
8ed5b51a32
@ -44,6 +44,15 @@ public class ExecutorCrawlClient {
|
||||
.build());
|
||||
}
|
||||
|
||||
public void triggerRecrawlSingleDomain(int node, FileStorageId fid, String domainName) {
|
||||
channelPool.call(ExecutorCrawlApiBlockingStub::triggerSingleDomainRecrawl)
|
||||
.forNode(node)
|
||||
.run(RpcFileStorageIdWithDomainName.newBuilder()
|
||||
.setFileStorageId(fid.id())
|
||||
.setTargetDomainName(domainName)
|
||||
.build());
|
||||
}
|
||||
|
||||
public void triggerConvert(int node, FileStorageId fid) {
|
||||
channelPool.call(ExecutorCrawlApiBlockingStub::triggerConvert)
|
||||
.forNode(node)
|
||||
|
@ -22,6 +22,7 @@ service ExecutorApi {
|
||||
service ExecutorCrawlApi {
|
||||
rpc triggerCrawl(RpcFileStorageId) returns (Empty) {}
|
||||
rpc triggerRecrawl(RpcFileStorageId) returns (Empty) {}
|
||||
rpc triggerSingleDomainRecrawl(RpcFileStorageIdWithDomainName) returns (Empty) {}
|
||||
rpc triggerConvert(RpcFileStorageId) returns (Empty) {}
|
||||
rpc triggerConvertAndLoad(RpcFileStorageId) returns (Empty) {}
|
||||
rpc loadProcessedData(RpcFileStorageIds) returns (Empty) {}
|
||||
@ -55,6 +56,10 @@ message RpcProcessId {
|
||||
message RpcFileStorageId {
|
||||
int64 fileStorageId = 1;
|
||||
}
|
||||
message RpcFileStorageIdWithDomainName {
|
||||
int64 fileStorageId = 1;
|
||||
string targetDomainName = 2;
|
||||
}
|
||||
message RpcFileStorageIds {
|
||||
repeated int64 fileStorageIds = 1;
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ package nu.marginalia.actor;
|
||||
public enum ExecutorActor {
|
||||
CRAWL,
|
||||
RECRAWL,
|
||||
RECRAWL_SINGLE_DOMAIN,
|
||||
CONVERT_AND_LOAD,
|
||||
PROC_CONVERTER_SPAWNER,
|
||||
PROC_LOADER_SPAWNER,
|
||||
|
@ -26,6 +26,7 @@ public class ExecutorActorControlService {
|
||||
private final ExecutorActorStateMachines stateMachines;
|
||||
public Map<ExecutorActor, ActorPrototype> actorDefinitions = new HashMap<>();
|
||||
private final int node;
|
||||
|
||||
@Inject
|
||||
public ExecutorActorControlService(MessageQueueFactory messageQueueFactory,
|
||||
BaseServiceParams baseServiceParams,
|
||||
@ -33,6 +34,7 @@ public class ExecutorActorControlService {
|
||||
ConvertAndLoadActor convertAndLoadActor,
|
||||
CrawlActor crawlActor,
|
||||
RecrawlActor recrawlActor,
|
||||
RecrawlSingleDomainActor recrawlSingleDomainActor,
|
||||
RestoreBackupActor restoreBackupActor,
|
||||
ConverterMonitorActor converterMonitorFSM,
|
||||
CrawlerMonitorActor crawlerMonitorActor,
|
||||
@ -57,6 +59,8 @@ public class ExecutorActorControlService {
|
||||
|
||||
register(ExecutorActor.CRAWL, crawlActor);
|
||||
register(ExecutorActor.RECRAWL, recrawlActor);
|
||||
register(ExecutorActor.RECRAWL_SINGLE_DOMAIN, recrawlSingleDomainActor);
|
||||
|
||||
register(ExecutorActor.CONVERT, convertActor);
|
||||
register(ExecutorActor.RESTORE_BACKUP, restoreBackupActor);
|
||||
register(ExecutorActor.CONVERT_AND_LOAD, convertAndLoadActor);
|
||||
|
@ -50,7 +50,9 @@ public class CrawlActor extends RecordActorPrototype {
|
||||
storageService.relateFileStorages(storage.id(), dataArea.id());
|
||||
|
||||
// Send convert request
|
||||
long msgId = mqCrawlerOutbox.sendAsync(new CrawlRequest(List.of(fid), dataArea.id()));
|
||||
long msgId = mqCrawlerOutbox.sendAsync(
|
||||
CrawlRequest.forSpec(fid, dataArea.id())
|
||||
);
|
||||
|
||||
yield new Crawl(msgId);
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ public class RecrawlActor extends RecordActorPrototype {
|
||||
|
||||
refreshService.synchronizeDomainList();
|
||||
|
||||
long id = mqCrawlerOutbox.sendAsync(new CrawlRequest(null, fid));
|
||||
long id = mqCrawlerOutbox.sendAsync(CrawlRequest.forRecrawl(fid));
|
||||
|
||||
yield new Crawl(id, fid, cascadeLoad);
|
||||
}
|
||||
|
@ -0,0 +1,85 @@
|
||||
package nu.marginalia.actor.task;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||
import nu.marginalia.actor.state.ActorResumeBehavior;
|
||||
import nu.marginalia.actor.state.ActorStep;
|
||||
import nu.marginalia.actor.state.Resume;
|
||||
import nu.marginalia.mq.MqMessageState;
|
||||
import nu.marginalia.mq.outbox.MqOutbox;
|
||||
import nu.marginalia.mqapi.crawling.CrawlRequest;
|
||||
import nu.marginalia.process.ProcessOutboxes;
|
||||
import nu.marginalia.process.ProcessService;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.storage.model.FileStorageId;
|
||||
import nu.marginalia.storage.model.FileStorageType;
|
||||
|
||||
@Singleton
|
||||
public class RecrawlSingleDomainActor extends RecordActorPrototype {
|
||||
|
||||
private final MqOutbox mqCrawlerOutbox;
|
||||
private final FileStorageService storageService;
|
||||
private final ActorProcessWatcher processWatcher;
|
||||
|
||||
/** Initial step
|
||||
* @param storageId - the id of the storage to recrawl
|
||||
* @param targetDomainName - domain to be recrawled
|
||||
*/
|
||||
public record Initial(FileStorageId storageId, String targetDomainName) implements ActorStep {}
|
||||
|
||||
/** The action step */
|
||||
@Resume(behavior = ActorResumeBehavior.RETRY)
|
||||
public record Crawl(long messageId) implements ActorStep {}
|
||||
|
||||
@Override
|
||||
public ActorStep transition(ActorStep self) throws Exception {
|
||||
return switch (self) {
|
||||
case Initial (FileStorageId fid, String targetDomainName) -> {
|
||||
var crawlStorage = storageService.getStorage(fid);
|
||||
|
||||
if (crawlStorage == null) yield new Error("Bad storage id");
|
||||
if (crawlStorage.type() != FileStorageType.CRAWL_DATA) yield new Error("Bad storage type " + crawlStorage.type());
|
||||
|
||||
long id = mqCrawlerOutbox.sendAsync(
|
||||
CrawlRequest.forSingleDomain(targetDomainName, fid)
|
||||
);
|
||||
|
||||
yield new Crawl(id);
|
||||
}
|
||||
case Crawl (long msgId) -> {
|
||||
var rsp = processWatcher.waitResponse(
|
||||
mqCrawlerOutbox,
|
||||
ProcessService.ProcessId.CRAWLER,
|
||||
msgId);
|
||||
|
||||
if (rsp.state() != MqMessageState.OK) {
|
||||
yield new Error("Crawler failed");
|
||||
}
|
||||
|
||||
yield new End();
|
||||
}
|
||||
default -> new End();
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String describe() {
|
||||
return "Run the crawler only re-fetching a single domain";
|
||||
}
|
||||
|
||||
@Inject
|
||||
public RecrawlSingleDomainActor(ActorProcessWatcher processWatcher,
|
||||
ProcessOutboxes processOutboxes,
|
||||
FileStorageService storageService,
|
||||
Gson gson)
|
||||
{
|
||||
super(gson);
|
||||
|
||||
this.processWatcher = processWatcher;
|
||||
this.mqCrawlerOutbox = processOutboxes.getCrawlerOutbox();
|
||||
this.storageService = storageService;
|
||||
}
|
||||
|
||||
}
|
@ -47,6 +47,22 @@ public class ExecutorCrawlGrpcService extends ExecutorCrawlApiGrpc.ExecutorCrawl
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void triggerSingleDomainRecrawl(RpcFileStorageIdWithDomainName request, StreamObserver<Empty> responseObserver) {
|
||||
try {
|
||||
actorControlService.startFrom(ExecutorActor.RECRAWL_SINGLE_DOMAIN,
|
||||
new RecrawlSingleDomainActor.Initial(
|
||||
FileStorageId.of(request.getFileStorageId()),
|
||||
request.getTargetDomainName()));
|
||||
|
||||
responseObserver.onNext(Empty.getDefaultInstance());
|
||||
responseObserver.onCompleted();
|
||||
}
|
||||
catch (Exception e) {
|
||||
responseObserver.onError(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void triggerConvert(RpcFileStorageId request, StreamObserver<Empty> responseObserver) {
|
||||
try {
|
||||
|
@ -6,6 +6,7 @@ import nu.marginalia.model.EdgeDomain;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
@ -24,6 +25,10 @@ public class AnchorTagsImpl implements AnchorTagsSource {
|
||||
|
||||
logger.info("Loading atags from " + atagsPath);
|
||||
|
||||
if (!Files.exists(atagsPath)) {
|
||||
throw new IllegalArgumentException("atags file does not exist: " + atagsPath);
|
||||
}
|
||||
|
||||
try (var stmt = duckdbConnection.createStatement()) {
|
||||
// Insert the domains into a temporary table, then use that to filter the atags table
|
||||
|
||||
@ -35,13 +40,18 @@ public class AnchorTagsImpl implements AnchorTagsSource {
|
||||
}
|
||||
}
|
||||
|
||||
// Project the atags table down to only the relevant domains. This looks like an SQL injection
|
||||
// vulnerability if you're a validation tool, but the string comes from a trusted source.
|
||||
// This is a SQL injection vulnerability if you're a validation tool, but the string comes from a trusted source
|
||||
// -- we validate nonetheless to present a better error message
|
||||
String path = atagsPath.toAbsolutePath().toString();
|
||||
if (path.contains("'")) {
|
||||
throw new IllegalArgumentException("atags file path contains a single quote: " + path + " and would break the query.");
|
||||
}
|
||||
|
||||
stmt.executeUpdate("""
|
||||
create table atags as
|
||||
select * from '%s'
|
||||
where dest in (select * from domains)
|
||||
""".formatted(atagsPath.toAbsolutePath()));
|
||||
""".formatted(path));
|
||||
|
||||
// Free up the memory used by the domains table
|
||||
stmt.executeUpdate("drop table domains");
|
||||
|
1
code/libraries/array/cpp/.gitignore
vendored
Normal file
1
code/libraries/array/cpp/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
resources/libcpp.so
|
2
code/libraries/array/cpp/compile.sh
Normal file → Executable file
2
code/libraries/array/cpp/compile.sh
Normal file → Executable file
@ -7,4 +7,4 @@ if ! which ${CXX} > /dev/null; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
${CXX} -O3 -march=native -shared -Isrc/main/public src/main/cpp/*.cpp -o resources/libcpp.so
|
||||
${CXX} -O3 -march=native -std=c++14 -shared -Isrc/main/public src/main/cpp/*.cpp -o resources/libcpp.so
|
||||
|
@ -14,8 +14,24 @@ public class CrawlRequest {
|
||||
*/
|
||||
public List<FileStorageId> specStorage;
|
||||
|
||||
/** (optional) Name of a single domain to be re-crawled */
|
||||
public String targetDomainName;
|
||||
|
||||
/** File storage where the crawl data will be written. If it contains existing crawl data,
|
||||
* this crawl data will be referenced for e-tags and last-mofified checks.
|
||||
*/
|
||||
public FileStorageId crawlStorage;
|
||||
|
||||
public static CrawlRequest forSpec(FileStorageId specStorage, FileStorageId crawlStorage) {
|
||||
return new CrawlRequest(List.of(specStorage), null, crawlStorage);
|
||||
}
|
||||
|
||||
public static CrawlRequest forSingleDomain(String targetDomainName, FileStorageId crawlStorage) {
|
||||
return new CrawlRequest(null, targetDomainName, crawlStorage);
|
||||
}
|
||||
|
||||
public static CrawlRequest forRecrawl(FileStorageId crawlStorage) {
|
||||
return new CrawlRequest(null, null, crawlStorage);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -23,6 +23,7 @@ import nu.marginalia.crawling.io.CrawledDomainReader;
|
||||
import nu.marginalia.crawling.io.CrawlerOutputFile;
|
||||
import nu.marginalia.crawling.parquet.CrawledDocumentParquetRecordFileWriter;
|
||||
import nu.marginalia.crawlspec.CrawlSpecFileNames;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.service.ProcessMainClass;
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.model.crawlspec.CrawlSpecRecord;
|
||||
@ -136,7 +137,12 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
|
||||
var instructions = crawler.fetchInstructions();
|
||||
try {
|
||||
crawler.run(instructions.specProvider, instructions.outputDir);
|
||||
if (instructions.targetDomainName != null) {
|
||||
crawler.runForSingleDomain(instructions.targetDomainName, instructions.outputDir);
|
||||
}
|
||||
else {
|
||||
crawler.run(instructions.specProvider, instructions.outputDir);
|
||||
}
|
||||
instructions.ok();
|
||||
} catch (Exception ex) {
|
||||
logger.error("Crawler failed", ex);
|
||||
@ -200,6 +206,26 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
}
|
||||
}
|
||||
|
||||
public void runForSingleDomain(String targetDomainName, Path outputDir) throws Exception {
|
||||
|
||||
heartbeat.start();
|
||||
|
||||
try (WorkLog workLog = new WorkLog(outputDir.resolve("crawler-" + targetDomainName.replace('/', '-') + ".log"));
|
||||
WarcArchiverIf warcArchiver = warcArchiverFactory.get(outputDir);
|
||||
AnchorTagsSource anchorTagsSource = anchorTagsSourceFactory.create(List.of(new EdgeDomain(targetDomainName)))
|
||||
) {
|
||||
var spec = new CrawlSpecRecord(targetDomainName, 1000, null);
|
||||
var task = new CrawlTask(spec, anchorTagsSource, outputDir, warcArchiver, workLog);
|
||||
task.run();
|
||||
}
|
||||
catch (Exception ex) {
|
||||
logger.warn("Exception in crawler", ex);
|
||||
}
|
||||
finally {
|
||||
heartbeat.shutDown();
|
||||
}
|
||||
}
|
||||
|
||||
class CrawlTask implements SimpleBlockingThreadPool.Task {
|
||||
|
||||
private final CrawlSpecRecord specification;
|
||||
@ -216,7 +242,8 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
AnchorTagsSource anchorTagsSource,
|
||||
Path outputDir,
|
||||
WarcArchiverIf warcArchiver,
|
||||
WorkLog workLog) {
|
||||
WorkLog workLog)
|
||||
{
|
||||
this.specification = specification;
|
||||
this.anchorTagsSource = anchorTagsSource;
|
||||
this.outputDir = outputDir;
|
||||
@ -303,11 +330,19 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
private final MqMessage message;
|
||||
private final MqSingleShotInbox inbox;
|
||||
|
||||
CrawlRequest(CrawlSpecProvider specProvider, Path outputDir, MqMessage message, MqSingleShotInbox inbox) {
|
||||
private final String targetDomainName;
|
||||
|
||||
CrawlRequest(CrawlSpecProvider specProvider,
|
||||
String targetDomainName,
|
||||
Path outputDir,
|
||||
MqMessage message,
|
||||
MqSingleShotInbox inbox)
|
||||
{
|
||||
this.message = message;
|
||||
this.inbox = inbox;
|
||||
this.specProvider = specProvider;
|
||||
this.outputDir = outputDir;
|
||||
this.targetDomainName = targetDomainName;
|
||||
}
|
||||
|
||||
|
||||
@ -325,6 +360,7 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
var inbox = messageQueueFactory.createSingleShotInbox(CRAWLER_INBOX, node, UUID.randomUUID());
|
||||
|
||||
logger.info("Waiting for instructions");
|
||||
|
||||
var msgOpt = getMessage(inbox, nu.marginalia.mqapi.crawling.CrawlRequest.class.getSimpleName());
|
||||
var msg = msgOpt.orElseThrow(() -> new RuntimeException("No message received"));
|
||||
|
||||
@ -350,6 +386,7 @@ public class CrawlerMain extends ProcessMainClass {
|
||||
|
||||
return new CrawlRequest(
|
||||
specProvider,
|
||||
request.targetDomainName,
|
||||
crawlData.asPath(),
|
||||
msg,
|
||||
inbox);
|
||||
|
@ -183,6 +183,8 @@ public class HttpFetcherImpl implements HttpFetcher {
|
||||
|
||||
getBuilder.url(url.toString())
|
||||
.addHeader("Accept-Encoding", "gzip")
|
||||
.addHeader("Accept-Language", "en,*;q=0.5")
|
||||
.addHeader("Accept", "text/html, application/xhtml+xml, */*;q=0.8")
|
||||
.addHeader("User-agent", userAgentString);
|
||||
|
||||
contentTags.paint(getBuilder);
|
||||
@ -225,6 +227,7 @@ public class HttpFetcherImpl implements HttpFetcher {
|
||||
|
||||
getBuilder.url(url.toString())
|
||||
.addHeader("Accept-Encoding", "gzip")
|
||||
.addHeader("Accept", "text/*, */*;q=0.9")
|
||||
.addHeader("User-agent", userAgentString);
|
||||
|
||||
HttpFetchResult result = recorder.fetch(client, getBuilder.build());
|
||||
|
@ -32,7 +32,7 @@ public class NoSecuritySSL {
|
||||
@SneakyThrows
|
||||
public static SSLSocketFactory buildSocketFactory() {
|
||||
// Install the all-trusting trust manager
|
||||
final SSLContext sslContext = SSLContext.getInstance("SSL");
|
||||
final SSLContext sslContext = SSLContext.getInstance("TLS");
|
||||
sslContext.init(null, trustAllCerts, new java.security.SecureRandom());
|
||||
|
||||
var clientSessionContext = sslContext.getClientSessionContext();
|
||||
|
@ -8,7 +8,7 @@ import java.security.NoSuchAlgorithmException;
|
||||
class WarcDigestBuilder {
|
||||
private final MessageDigest digest;
|
||||
|
||||
private static final String digestAlgorithm = "SHA-1";
|
||||
private static final String digestAlgorithm = "SHA-256";
|
||||
|
||||
public WarcDigestBuilder() throws NoSuchAlgorithmException {
|
||||
this.digest = MessageDigest.getInstance(digestAlgorithm);
|
||||
|
@ -1,6 +1,6 @@
|
||||
// This sets the data-has-js attribute on the body tag to true, so we can style the page with the assumption that
|
||||
// This sets the data-has-js attribute on the html tag to true, so we can style the page with the assumption that
|
||||
// the browser supports JS. This is a progressive enhancement, so the page will still work without JS.
|
||||
document.getElementsByTagName('body')[0].setAttribute('data-has-js', 'true');
|
||||
document.documentElement.setAttribute('data-has-js', 'true');
|
||||
|
||||
// To prevent the filter menu from being opened when the user hits enter on the search box, we need to add a keydown
|
||||
// handler to the search box that stops the event from propagating. Janky hack, but it works.
|
||||
|
@ -1,33 +1,102 @@
|
||||
$nicotine-dark: #acae89;
|
||||
$nicotine-light: #f8f8ee;
|
||||
$fg-dark: #000;
|
||||
$fg-light: #fff;
|
||||
$highlight-dark: #2f4858;
|
||||
$highlight-light: #3F5F6F;
|
||||
$highlight-light2: #eee;
|
||||
$border-color: #ccc;
|
||||
$border-color2: #aaa;
|
||||
$heading-fonts: serif;
|
||||
$visited: #fcc;
|
||||
:root {
|
||||
color-scheme: light;
|
||||
|
||||
--clr-bg-page: hsl(60, 42%, 95%); // $nicotine-light
|
||||
|
||||
--clr-bg-ui: hsl(0, 0%, 100%);
|
||||
--clr-text-ui: #000; // $fg-dark
|
||||
|
||||
--clr-bg-theme: hsl(200, 28%, 34%); // $highlight-light
|
||||
--clr-text-theme: #fff; // $fg-light
|
||||
|
||||
--clr-bg-highlight: hsl(0, 0%, 93%); // $highlight-light2
|
||||
--clr-text-highlight: #111111;
|
||||
|
||||
--clr-bg-accent: hsl(63, 19%, 61%); // $nicotine-dark
|
||||
--clr-border-accent: hsl(63, 19%, 35%);
|
||||
|
||||
--clr-border: #aaa; // $border-color2
|
||||
|
||||
--clr-shadow: var(--clr-border);
|
||||
|
||||
--clr-link: #0066cc;
|
||||
--clr-link-visited: #531a89;
|
||||
--clr-heading-link-visited: #fcc; // $visited
|
||||
|
||||
--font-family: sans-serif;
|
||||
--font-size: 14px;
|
||||
--font-family-heading: serif; // $heading-fonts
|
||||
}
|
||||
|
||||
|
||||
@mixin dark-theme-mixin {
|
||||
color-scheme: dark;
|
||||
|
||||
--clr-bg-page: hsl(0, 0%, 6%);
|
||||
|
||||
--clr-bg-ui: hsl(0, 0%, 18%);
|
||||
--clr-text-ui: #ddd;
|
||||
|
||||
--clr-bg-theme: hsl(0, 0%, 2%);
|
||||
--clr-text-theme: var(--clr-text-ui);
|
||||
|
||||
--clr-bg-highlight: hsl(0, 0%, 11%);
|
||||
--clr-text-highlight: #fff;
|
||||
|
||||
--clr-bg-accent: hsl(200, 32%, 28%);
|
||||
--clr-border-accent: hsl(200, 8%, 12%);
|
||||
|
||||
--clr-border: hsl(0, 0%, 30%);
|
||||
|
||||
--clr-shadow: #000;
|
||||
|
||||
--clr-link: #8a8aff;
|
||||
--clr-link-visited: #ffadff;
|
||||
--clr-heading-link-visited: var(--clr-link-visited);
|
||||
}
|
||||
|
||||
:root[data-theme='dark'] {
|
||||
@include dark-theme-mixin;
|
||||
}
|
||||
|
||||
// Makes theme match the user's OS preference when JS is disabled
|
||||
@media (prefers-color-scheme: dark) {
|
||||
:root:not([data-has-js="true"]) {
|
||||
@include dark-theme-mixin;
|
||||
}
|
||||
}
|
||||
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
a {
|
||||
color: var(--clr-link);
|
||||
}
|
||||
|
||||
a:visited {
|
||||
color: var(--clr-link-visited);
|
||||
}
|
||||
|
||||
input, textarea, select {
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
h1 a, h2 a {
|
||||
color: $fg-light;
|
||||
color: var(--clr-text-theme);
|
||||
}
|
||||
h1 a:visited, h2 a:visited {
|
||||
color: $visited;
|
||||
color: var(--clr-heading-link-visited);
|
||||
}
|
||||
progress {
|
||||
width: 10ch;
|
||||
}
|
||||
|
||||
body {
|
||||
background-color: $nicotine-light;
|
||||
color: $fg-dark;
|
||||
font-family: sans-serif;
|
||||
font-size: 14px;
|
||||
background-color: var(--clr-bg-page);
|
||||
color: var(--clr-text-ui);
|
||||
font-family: var(--font-family);
|
||||
font-size: var(--font-size);
|
||||
line-height: 1.6;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
@ -99,28 +168,28 @@ body {
|
||||
li {
|
||||
display: inline;
|
||||
padding: 1ch;
|
||||
background-color: $highlight-light2;
|
||||
background-color: var(--clr-bg-highlight);
|
||||
|
||||
a {
|
||||
text-decoration: none;
|
||||
display: inline-block;
|
||||
color: #000;
|
||||
color: var(--clr-text-highlight);
|
||||
}
|
||||
}
|
||||
|
||||
li.current {
|
||||
background-color: $highlight-light;
|
||||
background-color: var(--clr-bg-theme);
|
||||
a {
|
||||
color: #fff;
|
||||
color: var(--clr-text-theme);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.dialog {
|
||||
border: 1px solid $border-color2;
|
||||
box-shadow: 0 0 1ch $border-color;
|
||||
background-color: #fff;
|
||||
border: 1px solid var(--clr-border);
|
||||
box-shadow: 0 0 1ch var(--clr-shadow);
|
||||
background-color: var(--clr-bg-ui);
|
||||
padding: 1ch;
|
||||
|
||||
h2 {
|
||||
@ -129,43 +198,58 @@ body {
|
||||
font-weight: normal;
|
||||
padding: 0.5ch;
|
||||
font-size: 12pt;
|
||||
background-color: $highlight-light;
|
||||
color: #fff;
|
||||
background-color: var(--clr-bg-theme);
|
||||
color: var(--clr-text-theme);
|
||||
}
|
||||
}
|
||||
|
||||
header {
|
||||
background-color: $nicotine-dark;
|
||||
color: #fff;
|
||||
border: 1px solid #888;
|
||||
box-shadow: 0 0 0.5ch #888;
|
||||
background-color: var(--clr-bg-accent);
|
||||
border: 1px solid var(--clr-border-accent);
|
||||
color: var(--clr-text-ui);
|
||||
box-shadow: 0 0 0.5ch var(--clr-shadow);
|
||||
margin-bottom: 1ch;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
|
||||
nav {
|
||||
a {
|
||||
text-decoration: none;
|
||||
color: #000;
|
||||
|
||||
color: var(--clr-text-ui);
|
||||
padding: .5ch;
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
a:visited {
|
||||
color: var(--clr-text-ui);
|
||||
}
|
||||
|
||||
a.extra {
|
||||
background: #ccc linear-gradient(45deg,
|
||||
rgba(255,100,100,1) 0%,
|
||||
rgba(100,255,100,1) 50%,
|
||||
rgba(100,100,255,1) 100%);
|
||||
hsl(0, 100%, 70%) 0%,
|
||||
hsl(120, 100%, 70%) 50%,
|
||||
hsl(240, 100%, 70%) 100%);
|
||||
color: black;
|
||||
text-shadow: 0 0 0.5ch #fff;
|
||||
}
|
||||
|
||||
a:hover, a:focus {
|
||||
background: #2f4858;
|
||||
color: #fff !important;
|
||||
background: var(--clr-bg-theme);
|
||||
color: var(--clr-text-theme);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#theme {
|
||||
padding: .5ch;
|
||||
display: none;
|
||||
|
||||
[data-has-js='true'] & {
|
||||
display: block;
|
||||
}
|
||||
}
|
||||
|
||||
#complaint {
|
||||
@extend .dialog;
|
||||
max-width: 60ch;
|
||||
@ -210,11 +294,11 @@ header {
|
||||
@extend .heading;
|
||||
}
|
||||
|
||||
background-color: #fff;
|
||||
background-color: var(--clr-bg-ui);
|
||||
padding: 1ch;
|
||||
margin: 1ch;
|
||||
border: 1px solid $border-color2;
|
||||
box-shadow: 0 0 1ch $border-color;
|
||||
border: 1px solid var(--clr-border);
|
||||
box-shadow: 0 0 1ch var(--clr-shadow);
|
||||
}
|
||||
|
||||
section.cards {
|
||||
@ -226,11 +310,10 @@ section.cards {
|
||||
justify-content: flex-start;
|
||||
|
||||
.card {
|
||||
border: 2px #ccc;
|
||||
background-color: #fff;
|
||||
background-color: var(--clr-bg-ui);
|
||||
border-left: 1px solid #ecb;
|
||||
border-top: 1px solid #ecb;
|
||||
box-shadow: #0008 0 0 5px;
|
||||
box-shadow: var(--clr-shadow) 0 0 5px;
|
||||
|
||||
h2 {
|
||||
@extend .heading;
|
||||
@ -239,7 +322,7 @@ section.cards {
|
||||
|
||||
h2 a {
|
||||
display: block !important;
|
||||
color: #fff;
|
||||
color: inherit;
|
||||
text-decoration: none;
|
||||
}
|
||||
a:focus img {
|
||||
@ -271,12 +354,17 @@ section.cards {
|
||||
padding-right: 1ch;
|
||||
line-height: 1.6;
|
||||
}
|
||||
|
||||
[data-theme='dark'] & {
|
||||
border: 1px solid var(--clr-border);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.positions {
|
||||
box-shadow: 0 0 2px #888;
|
||||
background-color: #e4e4e4;
|
||||
box-shadow: 0 0 2px var(--clr-shadow);
|
||||
backdrop-filter: brightness(90%);
|
||||
color: var(--clr-text-highlight);
|
||||
padding: 2px;
|
||||
margin-right: -1ch;
|
||||
margin-left: 1ch;
|
||||
@ -297,13 +385,13 @@ footer {
|
||||
|
||||
h1 {
|
||||
font-weight: normal;
|
||||
border-bottom: 4px solid $highlight-light;
|
||||
border-bottom: 4px solid var(--clr-bg-theme);
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 14pt;
|
||||
font-weight: normal;
|
||||
border-bottom: 2px solid $highlight-dark;
|
||||
border-bottom: 2px solid var(--clr-bg-theme);
|
||||
width: 80%;
|
||||
}
|
||||
|
||||
@ -312,9 +400,9 @@ footer {
|
||||
flex-basis: 40ch;
|
||||
flex-grow: 1.1;
|
||||
|
||||
background-color: #fff;
|
||||
border-left: 1px solid $border-color2;
|
||||
box-shadow: -1px -1px 5px $border-color;
|
||||
background-color: var(--clr-bg-ui);
|
||||
border-left: 1px solid var(--clr-border);
|
||||
box-shadow: -1px -1px 5px var(--clr-shadow);
|
||||
|
||||
padding-left: 1ch;
|
||||
padding-right: 1ch;
|
||||
@ -329,18 +417,18 @@ footer {
|
||||
}
|
||||
|
||||
.shadowbox {
|
||||
box-shadow: 0 0 1ch $border-color2;
|
||||
border: 1px solid $border-color;
|
||||
box-shadow: 0 0 1ch var(--clr-shadow);
|
||||
border: 1px solid var(--clr-border);
|
||||
}
|
||||
|
||||
.heading {
|
||||
margin: 0;
|
||||
padding: 0.5ch;
|
||||
background-color: $highlight-light;
|
||||
border-bottom: 1px solid $border-color2;
|
||||
font-family: $heading-fonts;
|
||||
background-color: var(--clr-bg-theme);
|
||||
border-bottom: 1px solid var(--clr-border);
|
||||
font-family: var(--font-family-heading);
|
||||
font-weight: normal;
|
||||
color: $fg-light;
|
||||
color: var(--clr-text-theme);
|
||||
font-size: 12pt;
|
||||
word-break: break-word;
|
||||
}
|
||||
@ -440,7 +528,7 @@ footer {
|
||||
@extend .shadowbox;
|
||||
|
||||
padding: 0.5ch;
|
||||
background-color: $fg-light;
|
||||
background-color: var(--clr-bg-ui);
|
||||
display: grid;
|
||||
grid-template-columns: max-content 0 auto max-content;
|
||||
grid-gap: 0.5ch;
|
||||
@ -452,12 +540,13 @@ footer {
|
||||
padding: 0.5ch;
|
||||
font-size: 14pt;
|
||||
word-break: keep-all;
|
||||
background-color: $highlight-light;
|
||||
color: $fg-light;
|
||||
font-family: $heading-fonts;
|
||||
background-color: var(--clr-bg-theme);
|
||||
color: var(--clr-text-theme);
|
||||
font-family: var(--font-family-heading);
|
||||
font-weight: normal;
|
||||
border: 1px solid;
|
||||
text-align: center;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
}
|
||||
|
||||
#suggestions-anchor {
|
||||
@ -469,18 +558,18 @@ footer {
|
||||
font-family: monospace;
|
||||
font-size: 12pt;
|
||||
padding: 0.5ch;
|
||||
border: 1px solid $border-color2;
|
||||
background-color: $fg-light;
|
||||
color: $fg-dark;
|
||||
border: 1px solid var(--clr-border);
|
||||
background-color: inherit;
|
||||
}
|
||||
|
||||
input[type="submit"] {
|
||||
font-size: 12pt;
|
||||
border: 1px solid $border-color2;
|
||||
background-color: $fg-light;
|
||||
color: $fg-dark;
|
||||
border: 1px solid var(--clr-border);
|
||||
background-color: var(--clr-bg-ui);
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
// white suggesitons looks fine in dark mode
|
||||
.suggestions {
|
||||
background-color: #fff;
|
||||
padding: .5ch;
|
||||
@ -491,7 +580,7 @@ footer {
|
||||
width: 300px;
|
||||
border-left: 1px solid #ccc;
|
||||
border-top: 1px solid #ccc;
|
||||
box-shadow: 5px 5px 5px #888;
|
||||
box-shadow: 5px 5px 5px var(--clr-shadow);
|
||||
z-index: 10;
|
||||
|
||||
a {
|
||||
@ -528,22 +617,22 @@ footer {
|
||||
#filters {
|
||||
@extend .shadowbox;
|
||||
margin-top: 1ch;
|
||||
background-color: $fg-light;
|
||||
background-color: var(--clr-bg-ui);
|
||||
|
||||
h2 {
|
||||
@extend .heading;
|
||||
background-color: $highlight-light;
|
||||
background-color: var(--clr-bg-theme);
|
||||
}
|
||||
h3 {
|
||||
@extend .heading;
|
||||
background-color: $highlight-light2;
|
||||
background-color: var(--clr-bg-highlight);
|
||||
color: var(--clr-text-highlight);
|
||||
font-family: sans-serif;
|
||||
color: #000;
|
||||
border-bottom: 1px solid #000;
|
||||
}
|
||||
|
||||
hr {
|
||||
border-top: 0.5px solid $border-color2;
|
||||
border-top: 0.5px solid var(--clr-border);
|
||||
border-bottom: none;
|
||||
}
|
||||
ul {
|
||||
@ -553,17 +642,17 @@ footer {
|
||||
li {
|
||||
padding: 1ch;
|
||||
a {
|
||||
color: $fg-dark;
|
||||
color: inherit;
|
||||
text-decoration: none;
|
||||
}
|
||||
a:hover, a:focus {
|
||||
border-bottom: 1px solid $highlight-light;
|
||||
border-bottom: 1px solid var(--clr-bg-theme);
|
||||
}
|
||||
}
|
||||
|
||||
li.current {
|
||||
border-left: 4px solid $highlight-light;
|
||||
background-color: $highlight-light2;
|
||||
border-left: 4px solid var(--clr-bg-theme);
|
||||
background-color: var(--clr-bg-highlight);
|
||||
a {
|
||||
margin-left: -4px;
|
||||
}
|
||||
@ -576,46 +665,46 @@ footer {
|
||||
margin: 1ch 0 2ch 0;
|
||||
|
||||
.url {
|
||||
background-color: $highlight-light;
|
||||
background-color: var(--clr-bg-theme);
|
||||
padding-left: 0.5ch;
|
||||
|
||||
a {
|
||||
word-break: break-all;
|
||||
font-family: monospace;
|
||||
font-size: 8pt;
|
||||
color: $fg-light;
|
||||
color: var(--clr-text-theme);
|
||||
text-shadow: 0 0 1ch #000; // guarantee decent contrast across background colors
|
||||
}
|
||||
a:visited {
|
||||
color: $visited;
|
||||
color: var(--clr-heading-link-visited);
|
||||
}
|
||||
}
|
||||
|
||||
h2 {
|
||||
a {
|
||||
word-break: break-all;
|
||||
color: $fg-dark;
|
||||
color: var(--clr-text-ui);
|
||||
text-decoration: none;
|
||||
}
|
||||
font-size: 12pt;
|
||||
@extend .heading;
|
||||
background-color: $highlight-light2;
|
||||
background-color:var(--clr-bg-highlight);
|
||||
}
|
||||
|
||||
.description {
|
||||
background-color: $fg-light;
|
||||
background-color: var(--clr-bg-ui);
|
||||
word-break: break-word;
|
||||
padding: 1ch;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
ul.additional-results {
|
||||
background-color: $fg-light;
|
||||
background-color: var(--clr-bg-ui);
|
||||
padding: 1ch;
|
||||
list-style: none;
|
||||
margin: 0;
|
||||
a {
|
||||
color: $fg-dark;
|
||||
color: inherit;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -631,7 +720,7 @@ footer {
|
||||
display: flex;
|
||||
font-size: 10pt;
|
||||
padding: 1ch;
|
||||
background-color: #eee;
|
||||
background-color: var(--clr-bg-highlight);
|
||||
|
||||
> * {
|
||||
margin-right: 1ch;
|
||||
@ -645,12 +734,12 @@ footer {
|
||||
padding-left: 4px;
|
||||
}
|
||||
a {
|
||||
color: #000;
|
||||
color: var(--clr-text-highlight);
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-device-width: 624px) {
|
||||
body[data-has-js="true"] { // This property is set via js so we can selectively enable these changes only if JS is enabled;
|
||||
[data-has-js="true"] body { // This property is set via js so we can selectively enable these changes only if JS is enabled;
|
||||
// This is desirable since mobile navigation is JS-driven. If JS is disabled, having a squished
|
||||
// GUI is better than having no working UI.
|
||||
margin: 0 !important;
|
||||
@ -666,6 +755,8 @@ footer {
|
||||
#mcfeast {
|
||||
display: inline;
|
||||
float: right;
|
||||
width: 2rem;
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
#menu-close {
|
||||
|
@ -0,0 +1,57 @@
|
||||
function getTheme() {
|
||||
const theme = window.localStorage.getItem('theme');
|
||||
|
||||
// if a valid theme is set in localStorage, return it
|
||||
if (theme === 'dark' || theme === 'light') {
|
||||
return { value: theme, system: false };
|
||||
}
|
||||
|
||||
// if matchMedia is supported and OS theme is dark
|
||||
if (window.matchMedia('(prefers-color-scheme: dark)').matches) {
|
||||
return { value: 'dark', system: true };
|
||||
}
|
||||
|
||||
return { value: 'light', system: true };
|
||||
}
|
||||
|
||||
function setTheme(value) {
|
||||
if (value === 'dark' || value === 'light') {
|
||||
window.localStorage.setItem('theme', value);
|
||||
} else {
|
||||
window.localStorage.removeItem('theme');
|
||||
}
|
||||
|
||||
const theme = getTheme();
|
||||
|
||||
document.documentElement.setAttribute('data-theme', theme.value);
|
||||
}
|
||||
|
||||
function initializeTheme() {
|
||||
const themeSelect = document.getElementById('theme-select');
|
||||
|
||||
const theme = getTheme();
|
||||
|
||||
document.documentElement.setAttribute('data-theme', theme.value);
|
||||
|
||||
// system is selected by default in the themeSwitcher so ignore it here
|
||||
if (!theme.system) {
|
||||
themeSelect.value = theme.value;
|
||||
}
|
||||
|
||||
themeSelect.addEventListener('change', e => {
|
||||
setTheme(e.target.value);
|
||||
});
|
||||
|
||||
const mql = window.matchMedia('(prefers-color-scheme: dark)');
|
||||
|
||||
// if someone changes their theme at the OS level we need to update
|
||||
// their theme immediately if they're using their OS theme
|
||||
mql.addEventListener('change', e => {
|
||||
if (themeSelect.value !== 'system') return;
|
||||
|
||||
if (e.matches) setTheme('dark');
|
||||
else setTheme('light');
|
||||
});
|
||||
}
|
||||
|
||||
initializeTheme();
|
@ -27,7 +27,7 @@ function setupTypeahead() {
|
||||
|
||||
for (i=0;i<items.length;i++) {
|
||||
item = document.createElement('a');
|
||||
item.innerHTML=items[i];
|
||||
item.textContent=items[i];
|
||||
item.setAttribute('href', '#')
|
||||
|
||||
function suggestionClickHandler(e) {
|
||||
|
@ -7,4 +7,15 @@
|
||||
<a href="https://memex.marginalia.nu/projects/edge/supporting.gmi">Donate</a>
|
||||
<a class="extra" href="https://search.marginalia.nu/explore/random">Random</a>
|
||||
</nav>
|
||||
<div id="theme">
|
||||
<label for="theme-select" class="screenreader-only">Color Theme</label>
|
||||
<select id="theme-select">
|
||||
<option value="system" selected>System</option>
|
||||
<option value="light">Light</option>
|
||||
<option value="dark">Dark</option>
|
||||
</select>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<!-- load this ASAP to avoid color theme flicker -->
|
||||
<script src="/theme.js"></script>
|
@ -86,7 +86,7 @@ public class ControlCrawlDataService {
|
||||
ORDER BY httpStatus
|
||||
""");
|
||||
while (rs.next()) {
|
||||
final boolean isCurrentFilter = selectedContentType.equals(rs.getString("httpStatus"));
|
||||
final boolean isCurrentFilter = selectedHttpStatus.equals(rs.getString("httpStatus"));
|
||||
final int status = rs.getInt("httpStatus");
|
||||
final int cnt = rs.getInt("cnt");
|
||||
|
||||
|
@ -24,6 +24,7 @@ import java.nio.file.Path;
|
||||
import java.sql.SQLException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
@Singleton
|
||||
@ -88,6 +89,9 @@ public class ControlNodeActionsService {
|
||||
Spark.post("/nodes/:id/actions/recrawl", this::triggerAutoRecrawl,
|
||||
redirectControl.renderRedirectAcknowledgement("Recrawling", "..")
|
||||
);
|
||||
Spark.post("/nodes/:id/actions/recrawl-single-domain", this::triggerSingleDomainRecrawl,
|
||||
redirectControl.renderRedirectAcknowledgement("Recrawling", "..")
|
||||
);
|
||||
Spark.post("/nodes/:id/actions/process", this::triggerProcess,
|
||||
redirectControl.renderRedirectAcknowledgement("Processing", "..")
|
||||
);
|
||||
@ -216,6 +220,21 @@ public class ControlNodeActionsService {
|
||||
return "";
|
||||
}
|
||||
|
||||
private Object triggerSingleDomainRecrawl(Request request, Response response) throws SQLException {
|
||||
int nodeId = Integer.parseInt(request.params("id"));
|
||||
|
||||
var toCrawl = parseSourceFileStorageId(request.queryParams("source"));
|
||||
var targetDomainName = Objects.requireNonNull(request.queryParams("targetDomainName"));
|
||||
|
||||
crawlClient.triggerRecrawlSingleDomain(
|
||||
nodeId,
|
||||
toCrawl,
|
||||
targetDomainName
|
||||
);
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
private Object triggerNewCrawl(Request request, Response response) throws SQLException {
|
||||
int nodeId = Integer.parseInt(request.params("id"));
|
||||
|
||||
|
@ -24,12 +24,20 @@
|
||||
<h2>Summary</h2>
|
||||
<table class="table">
|
||||
<tr>
|
||||
<th>Domain</th><th>File</th>
|
||||
<th>Domain</th><th>File</th><th>Crawl</th>
|
||||
</tr>
|
||||
<td>{{domain}}</td>
|
||||
<td>
|
||||
<a class="btn btn-primary" href="/nodes/{{node.id}}/storage/{{storage.id}}/transfer?path={{{path}}}">Download Parquet</a>
|
||||
</td>
|
||||
<td>
|
||||
<form method="post" action="/nodes/{{node.id}}/actions/recrawl-single-domain" onsubmit="return confirm('Confirm recrawl of {{domain}}')">
|
||||
<input type="hidden" name="source" value="{{storage.id}}">
|
||||
<input type="hidden" name="targetDomainName" value="{{domain}}">
|
||||
<button type="submit" class="btn btn-primary">Trigger Recrawl</button>
|
||||
</form>
|
||||
</td>
|
||||
|
||||
</table>
|
||||
|
||||
<h2>Contents</h2>
|
||||
|
@ -41,7 +41,7 @@ echo
|
||||
echo "1) barebones instance (1 node)"
|
||||
echo "2) barebones instance (2 nodes)"
|
||||
echo "3) full Marginalia Search instance?"
|
||||
echo "4) non-docker install? (not recommended)"
|
||||
echo "4) non-docker install? (proof-of-concept, not recommended)"
|
||||
echo
|
||||
|
||||
read -p "Enter 1, 2, 3, or 4: " INSTANCE_TYPE
|
||||
@ -149,17 +149,24 @@ elif [ "${INSTANCE_TYPE}" == "4" ]; then
|
||||
envsubst < install/docker-compose-scaffold.yml.template >${INSTALL_DIR}/docker-compose.yml
|
||||
|
||||
cat <<EOF > ${INSTALL_DIR}/README
|
||||
Quick note about running Marginalia Search in a non-docker environment:
|
||||
Quick note about running Marginalia Search in a non-docker environment.
|
||||
|
||||
* The template sets up a sample (in-docker) setup for
|
||||
mariadb and zookeeper. These can also be run outside
|
||||
of docker, but you will need to update the db.properties
|
||||
file and "zookeeper-hosts" in the system.properties
|
||||
file to point to the correct locations/addresses.
|
||||
* Each service is spawned by the same launcher. When building
|
||||
the project with "gradlew assemble", the launcher is put in
|
||||
"code/services-core/single-service-runner/build/distributions/marginalia.tar".
|
||||
This needs to be extracted.
|
||||
Beware that this installation mode is more of a proof-of-concept and demonstration that the
|
||||
system is not unhealthily dependent on docker, than a production-ready setup, and is not
|
||||
recommended for production use! The container setup is much more robust and easier to manage.
|
||||
|
||||
Note: This script only sets up an install directory, and does not build the system.
|
||||
You will need to build the system with "gradlew assemble" before you can run it.
|
||||
|
||||
Each service is spawned by the same launcher. After building the project with
|
||||
"gradlew assemble", the launcher is put in "code/services-core/single-service-runner/build/distributions/marginalia.tar".
|
||||
This needs to be extracted!
|
||||
|
||||
Note: The template sets up a sample (in-docker) setup for mariadb and zookeeper. These can also be run outside
|
||||
of docker, but you will need to update the db.properties file and "zookeeper-hosts" in the system.properties
|
||||
file to point to the correct locations/addresses.
|
||||
|
||||
Running:
|
||||
|
||||
To launch a process you need to unpack it, and then run the launcher with the
|
||||
appropriate arguments. For example:
|
||||
@ -177,13 +184,16 @@ A working setup needs at all the services
|
||||
* index [ http port is internal ]
|
||||
* executor [ http port is internal ]
|
||||
|
||||
The index and executor services should be on the same partition e.g. index:1 and executor:1,
|
||||
which should be a number larger than 0. You can have multiple pairs of index and executor partitions,
|
||||
but the pair should run on the same physical machine with the same install directory.
|
||||
Since you will need to manage ports yourself, you must assign distinct ports-pairs to each service.
|
||||
|
||||
The query service can use any partition number.
|
||||
* An index and executor services should exist on the same partition e.g. index:1 and executor:1. The partition
|
||||
number is the last digit of the service name, and should be positive. You can have multiple pairs of index
|
||||
and executor partitions, but the pair should run on the same physical machine with the same install directory.
|
||||
|
||||
* The query service can use any partition number.
|
||||
|
||||
* The control service should be on partition 1.
|
||||
|
||||
The control service should be on partition 1.
|
||||
EOF
|
||||
|
||||
echo
|
||||
|
@ -3,11 +3,11 @@
|
||||
This directory is a staging area for running the system. It contains scripts
|
||||
and templates for installing the system on a server, and for running it locally.
|
||||
|
||||
See [https://docs.marginalia.nu/](https://docs.marginalia.nu/) for additional
|
||||
documentation.
|
||||
|
||||
## Requirements
|
||||
|
||||
**x86-64 Linux** - The system is only tested on x86-64 Linux. It may work on other
|
||||
platforms, but for lack of suitable hardware, this can not be guaranteed.
|
||||
|
||||
**Docker** - It is a bit of a pain to install, but if you follow
|
||||
[this guide](https://docs.docker.com/engine/install/ubuntu/#install-using-the-repository) you're on the right track for ubuntu-like systems.
|
||||
|
||||
@ -15,7 +15,12 @@ documentation.
|
||||
The civilized way of installing this is to use [SDKMAN](https://sdkman.io/);
|
||||
graalce is a good distribution choice but it doesn't matter too much.
|
||||
|
||||
## Set up
|
||||
## Quick Set up
|
||||
|
||||
[https://docs.marginalia.nu/](https://docs.marginalia.nu/) has a more comprehensive guide for the install
|
||||
and operation of the search engine. This is a quick guide for the impatient.
|
||||
|
||||
---
|
||||
|
||||
To go from a clean check out of the git repo to a running search engine,
|
||||
follow these steps.
|
||||
@ -51,6 +56,8 @@ you for which installation mode you want to use. The options are:
|
||||
2. Full Marginalia Search instance - This will install an instance of the search engine
|
||||
configured like [search.marginalia.nu](https://search.marginalia.nu). This is useful
|
||||
for local development and testing.
|
||||
3. Non-docker installation - This will install the system outside of docker.
|
||||
This is still an experimental run-mode.
|
||||
|
||||
It will also prompt you for account details for a new mariadb instance, which will be
|
||||
created for you. The database will be initialized with the schema and data required
|
||||
|
@ -210,8 +210,8 @@ dependencyResolutionManagement {
|
||||
library('sqlite','org.xerial','sqlite-jdbc').version('3.41.2.2')
|
||||
library('javax.annotation','javax.annotation','javax.annotation-api').version('1.3.2')
|
||||
|
||||
library('parquet-column', 'org.apache.parquet','parquet-column').version('1.13.1')
|
||||
library('parquet-hadoop', 'org.apache.parquet','parquet-hadoop').version('1.13.1')
|
||||
library('parquet-column', 'org.apache.parquet','parquet-column').version('1.14.0')
|
||||
library('parquet-hadoop', 'org.apache.parquet','parquet-hadoop').version('1.14.0')
|
||||
|
||||
library('curator-framework', 'org.apache.curator','curator-framework').version('5.6.0')
|
||||
library('curator-x-discovery', 'org.apache.curator','curator-x-discovery').version('5.6.0')
|
||||
|
2
third-party/parquet-floor/build.gradle
vendored
2
third-party/parquet-floor/build.gradle
vendored
@ -9,7 +9,7 @@ java {
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation ('org.apache.parquet:parquet-column:1.13.1') {
|
||||
implementation ('org.apache.parquet:parquet-column:1.14.0') {
|
||||
transitive = true
|
||||
}
|
||||
implementation('org.apache.parquet:parquet-hadoop:1.13.1') {
|
||||
|
@ -1,6 +1,7 @@
|
||||
package org.apache.hadoop.conf;
|
||||
|
||||
public class Configuration {
|
||||
public Configuration(boolean x) {}
|
||||
|
||||
public boolean getBoolean(String x, boolean y) {
|
||||
return y;
|
||||
|
Loading…
Reference in New Issue
Block a user