Remove unrelated code, break tools into their own directory.

This commit is contained in:
Viktor Lofgren 2023-03-17 16:03:11 +01:00
parent 449471a076
commit 2eb972dea1
224 changed files with 377 additions and 9972 deletions

View File

@ -3,9 +3,7 @@ package nu.marginalia.model.idx;
import java.util.EnumSet; import java.util.EnumSet;
public enum DocumentFlags { public enum DocumentFlags {
/** Simple processing was done, this document should be de-prioritized as a search result */ UnusedBit1,
Simple,
PlainText, PlainText,
UnusedBit2, UnusedBit2,
UnusedBit3, UnusedBit3,

View File

@ -67,7 +67,7 @@ public class DocumentKeywordExtractor {
String flatWord = AsciiFlattener.flattenUnicode(word.word); String flatWord = AsciiFlattener.flattenUnicode(word.word);
if (WordPatterns.hasWordQualities(flatWord)) { if (!flatWord.isBlank()) {
wordsBuilder.add(flatWord, metadata.getMetadataForWord(word.stemmed)); wordsBuilder.add(flatWord, metadata.getMetadataForWord(word.stemmed));
} }
} }

View File

@ -220,7 +220,7 @@ public class KeywordExtractor {
} }
String word = sentence.constructWordFromSpan(w); String word = sentence.constructWordFromSpan(w);
if (word.isBlank() || !WordPatterns.filter(word)) return false; if (word.isBlank() || !WordPatterns.isNotJunkWord(word)) return false;
if (sentence.posTags[w.start].equals("CC")) return false; if (sentence.posTags[w.start].equals("CC")) return false;
if (sentence.posTags[w.end-1].equals("IN")) return false; if (sentence.posTags[w.end-1].equals("IN")) return false;
if (sentence.posTags[w.end-1].equals("DT")) return false; if (sentence.posTags[w.end-1].equals("DT")) return false;

View File

@ -12,11 +12,6 @@ public class IndexJournalEntryBuilder {
this.documentMeta = documentMeta; this.documentMeta = documentMeta;
} }
public IndexJournalEntryBuilder capacity(int size) {
items.ensureCapacity(size);
return this;
}
public IndexJournalEntryBuilder add(long wordId, long metadata) { public IndexJournalEntryBuilder add(long wordId, long metadata) {
items.add(wordId); items.add(wordId);

View File

@ -13,12 +13,13 @@ java {
} }
dependencies { dependencies {
implementation project(':code:libraries:language-processing') implementation project(':code:libraries:language-processing')
implementation project(':code:libraries:ngram-bloom-filter')
implementation project(':code:libraries:term-frequency-dict') implementation project(':code:libraries:term-frequency-dict')
implementation project(':code:features-convert:keyword-extraction') implementation project(':code:features-convert:keyword-extraction')
implementation project(':code:common:config') implementation project(':code:common:config')
implementation project(':code:common:model') implementation project(':code:common:model')
implementation project(':third-party:porterstemmer')
implementation libs.lombok implementation libs.lombok
annotationProcessor libs.lombok annotationProcessor libs.lombok
implementation libs.bundles.slf4j implementation libs.bundles.slf4j
@ -26,6 +27,7 @@ dependencies {
implementation libs.bundles.handlebars implementation libs.bundles.handlebars
implementation libs.trove implementation libs.trove
implementation libs.guice
testImplementation libs.bundles.slf4j.test testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit testImplementation libs.bundles.junit

View File

@ -1,4 +1,4 @@
package nu.marginalia.language.statistics; package nu.marginalia.language;
import com.google.inject.Inject; import com.google.inject.Inject;
import nu.marginalia.term_frequency_dict.TermFrequencyDict; import nu.marginalia.term_frequency_dict.TermFrequencyDict;

View File

@ -1,4 +1,4 @@
package nu.marginalia.ngram_bloom_filter; package nu.marginalia.ngrams;
import java.io.IOException; import java.io.IOException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;

View File

@ -1,4 +1,4 @@
package nu.marginalia.ngram_bloom_filter; package nu.marginalia.ngrams;
import ca.rmen.porterstemmer.PorterStemmer; import ca.rmen.porterstemmer.PorterStemmer;
import com.google.common.hash.HashFunction; import com.google.common.hash.HashFunction;

View File

@ -6,9 +6,9 @@ import lombok.Getter;
import lombok.ToString; import lombok.ToString;
import nu.marginalia.LanguageModels; import nu.marginalia.LanguageModels;
import nu.marginalia.keyword.KeywordExtractor; import nu.marginalia.keyword.KeywordExtractor;
import nu.marginalia.language.statistics.EnglishDictionary; import nu.marginalia.language.EnglishDictionary;
import nu.marginalia.language.sentence.SentenceExtractor; import nu.marginalia.language.sentence.SentenceExtractor;
import nu.marginalia.ngram_bloom_filter.NGramBloomFilter; import nu.marginalia.ngrams.NGramBloomFilter;
import nu.marginalia.term_frequency_dict.TermFrequencyDict; import nu.marginalia.term_frequency_dict.TermFrequencyDict;
import nu.marginalia.language.model.DocumentSentence; import nu.marginalia.language.model.DocumentSentence;
import nu.marginalia.language.model.WordSpan; import nu.marginalia.language.model.WordSpan;

View File

@ -1,4 +1,4 @@
package nu.marginalia.ngram_bloom_filter; package nu.marginalia.ngrams;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;

View File

@ -1,8 +1,8 @@
package nu.marginalia.query_parser; package nu.marginalia.query_parser;
import nu.marginalia.LanguageModels; import nu.marginalia.LanguageModels;
import nu.marginalia.language.statistics.EnglishDictionary; import nu.marginalia.language.EnglishDictionary;
import nu.marginalia.ngram_bloom_filter.NGramBloomFilter; import nu.marginalia.ngrams.NGramBloomFilter;
import nu.marginalia.term_frequency_dict.TermFrequencyDict; import nu.marginalia.term_frequency_dict.TermFrequencyDict;
import nu.marginalia.query_parser.token.TokenType; import nu.marginalia.query_parser.token.TokenType;
import nu.marginalia.util.TestLanguageModels; import nu.marginalia.util.TestLanguageModels;

View File

@ -1,8 +1,8 @@
package nu.marginalia.query_parser; package nu.marginalia.query_parser;
import nu.marginalia.LanguageModels; import nu.marginalia.LanguageModels;
import nu.marginalia.language.statistics.EnglishDictionary; import nu.marginalia.language.EnglishDictionary;
import nu.marginalia.ngram_bloom_filter.NGramBloomFilter; import nu.marginalia.ngrams.NGramBloomFilter;
import nu.marginalia.term_frequency_dict.TermFrequencyDict; import nu.marginalia.term_frequency_dict.TermFrequencyDict;
import nu.marginalia.util.TestLanguageModels; import nu.marginalia.util.TestLanguageModels;
import nu.marginalia.language.sentence.SentenceExtractor; import nu.marginalia.language.sentence.SentenceExtractor;

View File

@ -41,7 +41,7 @@ class TermCoherenceFactorTest {
assertEquals(0, termCoherenceFactor.calculate(allPositionsSet)); assertEquals(0, termCoherenceFactor.calculate(allPositionsSet));
} }
@Test @Test @SuppressWarnings("unchecked")
public void testLowPosMatches() { public void testLowPosMatches() {
var allPositionsSet = createSet( var allPositionsSet = createSet(
List.of(0, 1, 2, 3), List.of(0, 1, 2, 3) List.of(0, 1, 2, 3), List.of(0, 1, 2, 3)
@ -53,7 +53,7 @@ class TermCoherenceFactorTest {
assertEquals(1.0, termCoherenceFactor.bitPositionFactor(mask), 0.01); assertEquals(1.0, termCoherenceFactor.bitPositionFactor(mask), 0.01);
} }
@Test @Test @SuppressWarnings("unchecked")
public void testHiPosMatches() { public void testHiPosMatches() {
var allPositionsSet = createSet( var allPositionsSet = createSet(
List.of(28, 29, 30, 31), List.of(28, 29, 30, 31) List.of(28, 29, 30, 31), List.of(28, 29, 30, 31)

View File

@ -8,8 +8,6 @@ import java.io.InputStreamReader;
import java.util.HashSet; import java.util.HashSet;
import java.util.Objects; import java.util.Objects;
import java.util.Set; import java.util.Set;
import java.util.function.Predicate;
import java.util.regex.Pattern;
/** Regular expression patterns for deciding which words are eligible to be keywords. /** Regular expression patterns for deciding which words are eligible to be keywords.
* <p/> * <p/>
@ -44,25 +42,17 @@ public class WordPatterns {
} }
} }
private static boolean hasMoreThanTwo(String s, char c, int max) {
int idx = 0;
for (int i = 0; i <= max; i++) {
idx = s.indexOf(c, idx+1);
if (idx < 0 || idx >= s.length() - 1)
return false;
}
return true;
}
/** Run checks on the word and exclude terms with too many special characters
public static boolean filter(String word) { */
public static boolean isNotJunkWord(String word) {
if (word.isBlank()) { if (word.isBlank()) {
return false; return false;
} }
if (hasMoreThanTwo(word, '-', 4)) { if (hasMoreThanN(word, '-', 4)) {
return false; return false;
} }
if (hasMoreThanTwo(word, '+', 2)) { if (hasMoreThanN(word, '+', 2)) {
return false; return false;
} }
if (word.startsWith("-") if (word.startsWith("-")
@ -83,29 +73,13 @@ public class WordPatterns {
return true; return true;
} }
public static boolean hasWordQualities(String s) { private static boolean hasMoreThanN(String s, char c, int max) {
if (s.isBlank()) int idx = 0;
return false; for (int i = 0; i <= max; i++) {
idx = s.indexOf(c, idx+1);
int start = 0; if (idx < 0 || idx >= s.length() - 1)
int end = s.length(); return false;
if (s.charAt(0) == '#') start++;
if (end > 1 && s.charAt(end-1) == '#') end--;
for (int i = start; i < end; i++) {
char c = s.charAt(i);
if (("_@.'+-".indexOf(c) < 0)
&& !(c >= 'a' && c <= 'z')
&& !(c >= 'A' && c <= 'Z')
&& !(c >= '0' && c <= '9')
&& !(c >= '\u00C0' && c <= '\u00D6')
&& !(c >= '\u00D8' && c <= '\u00f6')
&& !(c >= '\u00f8' && c <= '\u00ff'))
{
return false;
}
} }
return true; return true;
} }
@ -113,10 +87,8 @@ public class WordPatterns {
if (s.length() < MIN_WORD_LENGTH) { if (s.length() < MIN_WORD_LENGTH) {
return true; return true;
} }
if (!hasWordQualities(s)) {
return true; if (!isNotJunkWord(s)) {
}
if (!filter(s)) {
return true; return true;
} }

View File

@ -1,42 +0,0 @@
plugins {
id 'java'
id "io.freefair.lombok" version "5.3.3.3"
id "de.undercouch.download" version "5.1.0"
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(17))
}
}
dependencies {
implementation project(':code:common:config')
implementation project(':third-party:porterstemmer')
implementation libs.lombok
annotationProcessor libs.lombok
implementation libs.bundles.slf4j
implementation libs.notnull
implementation libs.bundles.nlp
implementation libs.guice
implementation libs.trove
implementation libs.fastutil
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
}
test {
useJUnitPlatform()
}
task fastTests(type: Test) {
useJUnitPlatform {
excludeTags "slow"
}
}

View File

@ -21,6 +21,7 @@ dependencies {
implementation project(':code:common:model') implementation project(':code:common:model')
implementation project(':code:common:config') implementation project(':code:common:config')
implementation project(':code:libraries:easy-lsh') implementation project(':code:libraries:easy-lsh')
implementation project(':code:libraries:array')
implementation libs.lombok implementation libs.lombok
annotationProcessor libs.lombok annotationProcessor libs.lombok

View File

@ -0,0 +1,12 @@
# Term Frequency Dictionary
This dictionary is used by various parts of the system to evaluate for example
the TF-IDF score of a keyword.
## Central Classes
* [TermFrequencyDict](src/main/java/nu/marginalia/term_frequency_dict/TermFrequencyDict.java)
## See Also
* [tools/term-frequency-extractor](../../tools/term-frequency-extractor) constructs this file

View File

@ -1,8 +1,10 @@
package nu.marginalia.term_frequency_dict; package nu.marginalia.term_frequency_dict;
import ca.rmen.porterstemmer.PorterStemmer; import ca.rmen.porterstemmer.PorterStemmer;
import gnu.trove.map.hash.TLongIntHashMap; import it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap;
import lombok.SneakyThrows;
import nu.marginalia.LanguageModels; import nu.marginalia.LanguageModels;
import nu.marginalia.array.LongArray;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -14,39 +16,45 @@ import java.io.*;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
/** Dictionary with term frequency information for (stemmed) words.
*
*/
@Singleton @Singleton
public class TermFrequencyDict { public class TermFrequencyDict {
private final TLongIntHashMap wordRates = new TLongIntHashMap(1_000_000, 0.5f, 0, 0); private final Long2IntOpenHashMap wordRates;
private final Logger logger = LoggerFactory.getLogger(getClass()); private static final Logger logger = LoggerFactory.getLogger(TermFrequencyDict.class);
private static final PorterStemmer ps = new PorterStemmer(); private static final PorterStemmer ps = new PorterStemmer();
private static final long DOC_COUNT_KEY = ~0L; public static final long DOC_COUNT_KEY = ~0L;
@Inject @Inject
public TermFrequencyDict(@NotNull LanguageModels models) { public TermFrequencyDict(@NotNull LanguageModels models) {
this(models.termFrequencies); this(models.termFrequencies);
} }
@SneakyThrows
public TermFrequencyDict(Path file) { public TermFrequencyDict(Path file) {
try (var frequencyData = new DataInputStream(new BufferedInputStream(new FileInputStream(file.toFile())))) {
wordRates.ensureCapacity((int)(Files.size(file)/16));
for (;;) {
wordRates.put(frequencyData.readLong(), (int) frequencyData.readLong());
}
} catch (EOFException eof) {
// ok
} catch (IOException e) {
logger.error("IO Exception reading " + file, e);
}
wordRates = load(file);
logger.info("Read {} N-grams frequencies", wordRates.size()); logger.info("Read {} N-grams frequencies", wordRates.size());
} }
public TermFrequencyDict(TLongIntHashMap data) { private static Long2IntOpenHashMap load(Path file) throws IOException {
wordRates.putAll(data); LongArray array = LongArray.mmapRead(file);
int size = (int) Files.size(file)/16;
var ret = new Long2IntOpenHashMap(size, 0.5f);
ret.defaultReturnValue(0);
for (int i = 0; i < size; i++) {
ret.put(array.get(2*i), (int) array.get(2*i + 1));
}
return ret;
} }
/** Total number of documents in the corpus */
public int docCount() { public int docCount() {
int cnt = wordRates.get(DOC_COUNT_KEY); int cnt = wordRates.get(DOC_COUNT_KEY);
@ -56,91 +64,20 @@ public class TermFrequencyDict {
return cnt; return cnt;
} }
// WIP refactoring, this needs a new home: /** Get the term frequency for the string s */
// public long getTermFreq(String s) {
// public static void main(String... args) throws IOException, InterruptedException { return wordRates.get(getStringHash(s));
// if (args.length != 2) { }
// System.err.println("Expected arguments: plan.yaml out-file");
// } /** Get the term frequency for the already stemmed string s */
// String outFile = args[1]; public long getTermFreqStemmed(String s) {
// return wordRates.get(longHash(s.getBytes()));
// var plan = new CrawlPlanLoader().load(Path.of(args[0])); }
//
// ThreadLocal<SentenceExtractor> se = ThreadLocal.withInitial(() -> new SentenceExtractor(WmsaHome.getLanguageModels())); /** Get the term frequency for the already stemmed and already hashed value 'hash' */
// LanguageFilter lf = new LanguageFilter(); public long getTermFreqHash(long hash) {
// return wordRates.get(hash);
// TLongIntHashMap counts = new TLongIntHashMap(100_000_000, 0.7f, -1, -1); }
//
// ForkJoinPool fjp = new ForkJoinPool(24);
// AtomicInteger docCount = new AtomicInteger();
//
// for (var domain : plan.domainsIterable()) { // leaks file descriptor, is fine
//
// if (domain.doc == null)
// continue;
//
// fjp.execute(() -> {
//
// TLongHashSet words = new TLongHashSet(10_000);
//
// for (var doc : domain.doc) {
//
// if (doc.documentBody == null)
// continue;
// docCount.incrementAndGet();
//
// Document parsed = Jsoup.parse(doc.documentBody.decode());
// parsed.body().filter(new DomPruningFilter(0.5));
//
// DocumentLanguageData dld = se.get().extractSentences(parsed);
//
// if (lf.dictionaryAgreement(dld) < 0.1) {
// return;
// }
//
// for (var sent : dld.sentences) {
// for (var word : sent) {
// words.add(longHash(word.stemmed().getBytes(StandardCharsets.UTF_8)));
// }
// }
//
// synchronized (counts) {
// words.forEach(w -> {
// counts.adjustOrPutValue(w, 1, 1);
// return true;
// });
// }
//
// words.clear();
// }
//
// System.out.println(domain.domain + "\t" + counts.size());
// });
//
//
// }
//
// fjp.shutdown();
// fjp.awaitTermination(10, TimeUnit.DAYS);
//
// try (var dos = new DataOutputStream(Files.newOutputStream(Path.of(outFile)))) {
// synchronized (counts) {
// counts.put(DOC_COUNT_KEY, docCount.get());
//
// counts.forEachEntry((hash, cnt) -> {
// try {
// dos.writeLong(hash);
// dos.writeLong(cnt);
// } catch (IOException e) {
// throw new RuntimeException(e);
// }
// return true;
// });
// }
// }
//
// System.out.println(docCount.get());
// }
public static long getStringHash(String s) { public static long getStringHash(String s) {
if (s.indexOf(' ') >= 0 || s.indexOf('_') >= 0) { if (s.indexOf(' ') >= 0 || s.indexOf('_') >= 0) {
@ -156,17 +93,11 @@ public class TermFrequencyDict {
} }
} }
public long getTermFreqHash(long hash) { /** The hashing function used by TermFrequencyHash
return wordRates.get(hash); * <p>
} * If this function changes its behavior in any way,
public long getTermFreq(String s) { * it is necessary to re-generate the dictionary.
return wordRates.get(getStringHash(s)); */
}
public long getTermFreqStemmed(String s) {
return wordRates.get(longHash(s.getBytes()));
}
// If this ever changes, we need to re-generate the term frequency dictionary
public static long longHash(byte[]... bytesSets) { public static long longHash(byte[]... bytesSets) {
if (bytesSets == null || bytesSets.length == 0) if (bytesSets == null || bytesSets.length == 0)
return 0; return 0;

View File

@ -2,3 +2,48 @@
Contains models shared by the [converting-process](../../processes/converting-process/) and Contains models shared by the [converting-process](../../processes/converting-process/) and
[loading-process](../../processes/loading-process/). [loading-process](../../processes/loading-process/).
## Design
The two processes communicate through a file-based protocol. The converter serializes [instructions](src/main/java/nu/marginalia/converting/instruction/Instruction.java)
to file, which are deserialized by the loader and fed into an [instructions](src/main/java/nu/marginalia/converting/instruction/Interpreter.java).
The instructions implement a visitor pattern.
Conceptually the pattern can be thought of a bit like remote function calls over file,
or a crude instructions-based programming language.
This
```java
producer.foo("cat");
producer.bar("milk", "eggs", "bread");
```
translates through this paradigm, to this:
```
(producer)
writeInstruction(DoFoo("Cat"))
writeInstruction(DoBar("Milk", "Eggs", "Bread"))
(consumer)
while read instruction:
interpreter.apply(instruction)
(Interpreter)
doFoo(animal):
...
doBar(ingredients):
...
(doFoo)
DoFoo(animal):
apply(interpreter):
interpreter.foo(animal)
(doBar)
DoBar(ingredients):
apply(interpreter):
interpreter.bar(ingredients)
```

View File

@ -7,6 +7,7 @@ Contains models shared by the [crawling-process](../../processes/crawling-proces
* [CrawledDocument](src/main/java/nu/marginalia/crawling/model/CrawledDocument.java) * [CrawledDocument](src/main/java/nu/marginalia/crawling/model/CrawledDocument.java)
* [CrawledDomain](src/main/java/nu/marginalia/crawling/model/CrawledDomain.java) * [CrawledDomain](src/main/java/nu/marginalia/crawling/model/CrawledDomain.java)
* [CrawlingSpecification](src/main/java/nu/marginalia/crawling/model/spec/CrawlingSpecification.java)
### Marshalling ### Marshalling
* [CrawledDomainReader](src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java) * [CrawledDomainReader](src/main/java/nu/marginalia/crawling/io/CrawledDomainReader.java)

View File

@ -1,4 +1,4 @@
package nu.marginalia.language; package nu.marginalia.converting.language;
import nu.marginalia.language.encoding.UnicodeRanges; import nu.marginalia.language.encoding.UnicodeRanges;
import nu.marginalia.language.model.DocumentLanguageData; import nu.marginalia.language.model.DocumentLanguageData;

View File

@ -29,7 +29,7 @@ public class ProcessedDocument {
if (details == null) if (details == null)
return false; return false;
return !details.metadata.hasFlag(DocumentFlags.Simple); return true;
} }
public OptionalDouble quality() { public OptionalDouble quality() {

View File

@ -2,7 +2,7 @@ package nu.marginalia.converting.processor.plugin;
import nu.marginalia.crawling.model.CrawledDocument; import nu.marginalia.crawling.model.CrawledDocument;
import nu.marginalia.crawling.model.CrawledDomain; import nu.marginalia.crawling.model.CrawledDomain;
import nu.marginalia.language.LanguageFilter; import nu.marginalia.converting.language.LanguageFilter;
import nu.marginalia.language.model.DocumentLanguageData; import nu.marginalia.language.model.DocumentLanguageData;
import nu.marginalia.converting.model.HtmlStandard; import nu.marginalia.converting.model.HtmlStandard;
import nu.marginalia.keyword.model.DocumentKeywordsBuilder; import nu.marginalia.keyword.model.DocumentKeywordsBuilder;

View File

@ -4,6 +4,7 @@ package nu.marginalia.converting;
import com.google.inject.Guice; import com.google.inject.Guice;
import com.google.inject.Injector; import com.google.inject.Injector;
import nu.marginalia.bigstring.BigString; import nu.marginalia.bigstring.BigString;
import nu.marginalia.converting.model.HtmlStandard;
import nu.marginalia.converting.processor.DomainProcessor; import nu.marginalia.converting.processor.DomainProcessor;
import nu.marginalia.crawling.model.CrawledDocument; import nu.marginalia.crawling.model.CrawledDocument;
import nu.marginalia.crawling.model.CrawledDomain; import nu.marginalia.crawling.model.CrawledDomain;
@ -22,8 +23,7 @@ import static org.junit.jupiter.api.Assertions.*;
public class ConvertingIntegrationTest { public class ConvertingIntegrationTest {
private DomainProcessor domainProcessor;
DomainProcessor domainProcessor;
@BeforeEach @BeforeEach
public void setUp() { public void setUp() {
@ -60,7 +60,22 @@ public class ConvertingIntegrationTest {
ret.documents.forEach(doc -> { ret.documents.forEach(doc -> {
resultsByStatusCount.merge(doc.state, 1, Integer::sum); resultsByStatusCount.merge(doc.state, 1, Integer::sum);
}); });
assertTrue(resultsByStatusCount.get(UrlIndexingState.OK) > 5);
assertTrue(resultsByStatusCount.get(UrlIndexingState.OK) > 25);
for (var doc : ret.documents) {
if (!doc.isProcessedFully()) {
continue;
}
var details = doc.details;
assertTrue(details.title.length() > 4);
assertTrue(details.description.length() > 4);
assertEquals(HtmlStandard.HTML5, details.standard);
}
} }
private CrawledDomain readMarginaliaWorkingSet() throws IOException { private CrawledDomain readMarginaliaWorkingSet() throws IOException {

View File

@ -1,6 +1,5 @@
package nu.marginalia.crawling; package nu.marginalia.converting.language;
import nu.marginalia.language.LanguageFilter;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;

View File

@ -0,0 +1,3 @@
# Test Data
This is a snapshot of memex.marginalia.nu from 2023-03-17.

View File

@ -1,4 +0,0 @@
# Crawl Job Extractor
The crawl job extractor creates a file containing a list of domains
along with known URLs. This is consumed by the [crawling-process](../crawling-process).

View File

@ -1,22 +1,20 @@
# Processes # Processes
## 1. Crawl Job Extractor ## 1. Crawl Process
The [crawl-job-extractor-process](crawl-job-extractor-process/) creates a crawl job specification
based on the content in the database.
## 2. Crawl Process
The [crawling-process](crawling-process/) fetches website contents and saves them The [crawling-process](crawling-process/) fetches website contents and saves them
as compressed JSON models described in [crawling-model](../process-models/crawling-model/). as compressed JSON models described in [crawling-model](../process-models/crawling-model/).
## 3. Converting Process The operation is specified by a crawl job specification. This is generated by [tools/crawl-job-extractor](../tools/crawl-job-extractor/)
based on the content in the database.
## 2. Converting Process
The [converting-process](converting-process/) reads crawl data from the crawling step and The [converting-process](converting-process/) reads crawl data from the crawling step and
processes them, extracting keywords and metadata and saves them as compressed JSON models processes them, extracting keywords and metadata and saves them as compressed JSON models
described in [converting-model](../process-models/converting-model/). described in [converting-model](../process-models/converting-model/).
## 4. Loading Process ## 3. Loading Process
The [loading-process](loading-process/) reads the processed data and creates an index journal The [loading-process](loading-process/) reads the processed data and creates an index journal
and lexicon, and loads domains and addresses into the MariaDB-database. and lexicon, and loads domains and addresses into the MariaDB-database.

View File

@ -21,11 +21,15 @@ You'll find a short description in each module of what it does and how it relate
Processes are batch jobs that deal with data retrieval, processing and loading. Processes are batch jobs that deal with data retrieval, processing and loading.
* [processes](processes/) * [processes](processes/)
* * [crawl-job-extractor](processes/crawl-job-extractor-process)
* * [crawling-process](processes/crawling-process) * * [crawling-process](processes/crawling-process)
* * [converting-process](processes/converting-process) * * [converting-process](processes/converting-process)
* * [loading-process](processes/loading-process) * * [loading-process](processes/loading-process)
#### Tools
* * [crawl-job-extractor](tools/crawl-job-extractor)
* * [term-frequency-extractor](tools/term-frequency-extractor)
### Features ### Features
Features are relatively stand-alone components that serve some part of the domain. They aren't domain-independent, Features are relatively stand-alone components that serve some part of the domain. They aren't domain-independent,

View File

@ -29,7 +29,6 @@ dependencies {
implementation project(':code:libraries:easy-lsh') implementation project(':code:libraries:easy-lsh')
implementation project(':code:libraries:language-processing') implementation project(':code:libraries:language-processing')
implementation project(':code:libraries:braille-block-punch-cards') implementation project(':code:libraries:braille-block-punch-cards')
implementation project(':code:libraries:ngram-bloom-filter')
implementation project(':code:libraries:term-frequency-dict') implementation project(':code:libraries:term-frequency-dict')
implementation project(':code:api:assistant-api') implementation project(':code:api:assistant-api')

View File

@ -8,8 +8,8 @@ import nu.marginalia.index.client.model.query.SearchSubquery;
import nu.marginalia.index.query.limit.QueryLimits; import nu.marginalia.index.query.limit.QueryLimits;
import nu.marginalia.index.query.limit.QueryStrategy; import nu.marginalia.index.query.limit.QueryStrategy;
import nu.marginalia.index.query.limit.SpecificationLimit; import nu.marginalia.index.query.limit.SpecificationLimit;
import nu.marginalia.language.statistics.EnglishDictionary; import nu.marginalia.language.EnglishDictionary;
import nu.marginalia.ngram_bloom_filter.NGramBloomFilter; import nu.marginalia.ngrams.NGramBloomFilter;
import nu.marginalia.term_frequency_dict.TermFrequencyDict; import nu.marginalia.term_frequency_dict.TermFrequencyDict;
import nu.marginalia.query_parser.QueryParser; import nu.marginalia.query_parser.QueryParser;
import nu.marginalia.query_parser.QueryPermutation; import nu.marginalia.query_parser.QueryPermutation;

View File

@ -2,9 +2,9 @@ package nu.marginalia.search.query;
import nu.marginalia.WmsaHome; import nu.marginalia.WmsaHome;
import nu.marginalia.index.query.limit.SpecificationLimitType; import nu.marginalia.index.query.limit.SpecificationLimitType;
import nu.marginalia.language.statistics.EnglishDictionary; import nu.marginalia.language.EnglishDictionary;
import nu.marginalia.index.client.model.query.SearchSpecification; import nu.marginalia.index.client.model.query.SearchSpecification;
import nu.marginalia.ngram_bloom_filter.NGramBloomFilter; import nu.marginalia.ngrams.NGramBloomFilter;
import nu.marginalia.term_frequency_dict.TermFrequencyDict; import nu.marginalia.term_frequency_dict.TermFrequencyDict;
import nu.marginalia.search.command.SearchJsParameter; import nu.marginalia.search.command.SearchJsParameter;
import nu.marginalia.search.model.SearchProfile; import nu.marginalia.search.model.SearchProfile;

View File

@ -31,7 +31,7 @@ dependencies {
implementation libs.bundles.mariadb implementation libs.bundles.mariadb
implementation libs.guice implementation libs.guice
implementation libs.gson implementation libs.bundles.gson
implementation libs.zstd implementation libs.zstd
testImplementation libs.bundles.slf4j.test testImplementation libs.bundles.slf4j.test

View File

@ -0,0 +1,6 @@
# Crawl Job Extractor
The crawl job extractor creates a file containing a list of domains
along with known URLs.
This is consumed by [processes/crawling-process](../../processes/crawling-process).

View File

@ -0,0 +1,62 @@
plugins {
id 'java'
id "io.freefair.lombok" version "5.3.3.3"
id 'application'
id 'jvm-test-suite'
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(17))
}
}
application {
mainClass = 'nu.marginalia.tools.TermFrequencyExtractor'
applicationName = 'term-frequency-extractor'
}
tasks.distZip.enabled = false
dependencies {
implementation project(':third-party:rdrpostagger')
implementation project(':third-party:porterstemmer')
implementation project(':third-party:monkey-patch-opennlp')
implementation project(':code:common:model')
implementation project(':code:common:config')
implementation project(':code:common:process')
implementation project(':code:libraries:language-processing')
implementation project(':code:libraries:term-frequency-dict')
implementation project(':code:libraries:big-string')
implementation project(':code:processes:converting-process')
implementation project(':code:process-models:crawling-model')
implementation libs.lombok
annotationProcessor libs.lombok
implementation libs.bundles.slf4j
implementation libs.notnull
implementation libs.guice
implementation libs.jsoup
implementation libs.trove
implementation libs.fastutil
implementation libs.bundles.nlp
implementation libs.commons.lang3
testImplementation libs.bundles.slf4j.test
testImplementation libs.bundles.junit
testImplementation libs.mockito
}
test {
useJUnitPlatform()
}
task fastTests(type: Test) {
useJUnitPlatform {
excludeTags "slow"
}
}

View File

@ -0,0 +1,16 @@
# Term Frequency Extractor
Generates a term frequency dictionary file from a batch of crawl data.
Usage:
```shell
PATH_TO_SAMPLES=run/samples/crawl-s
export JAVA_OPTS=-Dcrawl.rootDirRewrite=/crawl:${PATH_TO_SAMPLES}
term-frequency-extractor ${PATH_TO_SAMPLES}/plan.yaml out.dat
```
## See Also
* [libraries/term-frequency-dict](../../libraries/term-frequency-dict)

View File

@ -0,0 +1,114 @@
package nu.marginalia.tools;
import gnu.trove.map.hash.TLongIntHashMap;
import gnu.trove.set.hash.TLongHashSet;
import nu.marginalia.WmsaHome;
import nu.marginalia.converting.language.LanguageFilter;
import nu.marginalia.converting.processor.logic.dom.DomPruningFilter;
import nu.marginalia.language.model.DocumentLanguageData;
import nu.marginalia.language.sentence.SentenceExtractor;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import plan.CrawlPlanLoader;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import static nu.marginalia.term_frequency_dict.TermFrequencyDict.DOC_COUNT_KEY;
import static nu.marginalia.term_frequency_dict.TermFrequencyDict.longHash;
public class TermFrequencyExtractor {
public static void main(String... args) throws IOException, InterruptedException {
if (args.length != 2) {
System.err.println("Expected arguments: plan.yaml out-file");
return;
}
String outFile = args[1];
var plan = new CrawlPlanLoader().load(Path.of(args[0]));
ThreadLocal<SentenceExtractor> se = ThreadLocal.withInitial(() -> new SentenceExtractor(WmsaHome.getLanguageModels()));
LanguageFilter lf = new LanguageFilter();
TLongIntHashMap counts = new TLongIntHashMap(100_000_000, 0.7f, -1, -1);
ForkJoinPool fjp = new ForkJoinPool(24);
AtomicInteger docCount = new AtomicInteger();
for (var domain : plan.domainsIterable()) { // leaks file descriptor, is fine
if (domain.doc == null)
continue;
fjp.execute(() -> {
TLongHashSet words = new TLongHashSet(10_000);
for (var doc : domain.doc) {
if (doc.documentBody == null)
continue;
docCount.incrementAndGet();
Document parsed = Jsoup.parse(doc.documentBody.decode());
parsed.body().filter(new DomPruningFilter(0.5));
DocumentLanguageData dld = se.get().extractSentences(parsed);
if (lf.dictionaryAgreement(dld) < 0.1) {
return;
}
for (var sent : dld.sentences) {
for (var word : sent) {
words.add(longHash(word.stemmed().getBytes(StandardCharsets.UTF_8)));
}
}
synchronized (counts) {
words.forEach(w -> {
counts.adjustOrPutValue(w, 1, 1);
return true;
});
}
words.clear();
}
System.out.println(domain.domain + "\t" + counts.size());
});
}
fjp.shutdown();
fjp.awaitTermination(10, TimeUnit.DAYS);
try (var dos = new DataOutputStream(Files.newOutputStream(Path.of(outFile)))) {
synchronized (counts) {
counts.put(DOC_COUNT_KEY, docCount.get());
counts.forEachEntry((hash, cnt) -> {
try {
dos.writeLong(hash);
dos.writeLong(cnt);
} catch (IOException e) {
throw new RuntimeException(e);
}
return true;
});
}
}
System.out.println(docCount.get());
}
}

View File

@ -1,242 +0,0 @@
plugins {
id 'java'
id "io.freefair.lombok" version "5.3.3.3"
id "me.champeau.jmh" version "0.6.6"
id "de.undercouch.download" version "5.1.0"
id 'jvm-test-suite'
}
repositories {
mavenLocal()
maven { url "https://artifactory.cronapp.io/public-release/" }
maven { url "https://repo1.maven.org/maven2/" }
maven { url "https://www2.ph.ed.ac.uk/maven2/" }
maven { url "https://jitpack.io/" }
exclusiveContent {
forRepository {
maven {
url = uri("https://jitpack.io")
}
}
filter {
// Only use JitPack for the `gson-record-type-adapter-factory` library
includeModule("com.github.Marcono1234", "gson-record-type-adapter-factory")
}
}
}
sourceSets {
e2eTest {
java {
java {
compileClasspath += main.output + test.output
runtimeClasspath += main.output + test.output
srcDir file('src/e2e/java')
}
resources.srcDir file('src/e2e/resources')
}
}
jmh {
java {
java {
compileClasspath += main.output + test.output
runtimeClasspath += main.output + test.output
srcDir file('src/jmh/java')
}
resources.srcDir file('src/jmh/resources')
}
}
}
java {
toolchain {
languageVersion.set(JavaLanguageVersion.of(17))
}
}
jmhJar {
zip64 true
}
dependencies {
implementation project(':code:common:service')
implementation project(':code:common:config')
implementation project(':code:common:service-discovery')
implementation project(':code:common:service-client')
implementation 'org.projectlombok:lombok:1.18.24'
implementation 'org.jetbrains:annotations:20.1.0'
annotationProcessor 'org.projectlombok:lombok:1.18.24'
implementation 'com.github.jknack:handlebars:4.3.1'
implementation 'com.github.jknack:handlebars-markdown:4.2.1'
implementation group: 'com.google.code.gson', name: 'gson', version: '2.9.0'
implementation 'io.reactivex.rxjava3:rxjava:3.1.5'
implementation "com.sparkjava:spark-core:2.9.3"
implementation 'com.opencsv:opencsv:5.6'
implementation group: 'org.apache.logging.log4j', name: 'log4j-api', version: '2.17.2'
implementation group: 'org.apache.logging.log4j', name: 'log4j-core', version: '2.17.2'
implementation group: 'org.apache.logging.log4j', name: 'log4j-slf4j-impl', version: '2.17.2'
implementation 'org.slf4j:slf4j-api:1.7.36'
testImplementation 'org.slf4j:slf4j-jdk14:2.0.3'
implementation 'com.google.guava:guava:31.1-jre'
implementation 'com.google.inject:guice:5.1.0'
implementation 'com.github.jnr:jnr-ffi:2.2.12'
implementation 'org.apache.httpcomponents:httpcore:4.4.15'
implementation 'org.apache.httpcomponents:httpclient:4.5.13'
implementation group: 'com.h2database', name: 'h2', version: '2.1.210'
implementation 'org.jsoup:jsoup:1.15.3'
implementation 'org.mariadb.jdbc:mariadb-java-client:3.0.6'
implementation group: 'net.sf.trove4j', name: 'trove4j', version: '3.0.3'
implementation 'com.zaxxer:HikariCP:5.0.1'
implementation 'org.apache.opennlp:opennlp-tools:1.9.4'
implementation 'io.prometheus:simpleclient:0.16.0'
implementation 'io.prometheus:simpleclient_servlet:0.16.0'
implementation 'io.prometheus:simpleclient_httpserver:0.16.0'
implementation 'io.prometheus:simpleclient_hotspot:0.16.0'
implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.3'
implementation group: 'org.yaml', name: 'snakeyaml', version: '1.30'
implementation 'com.github.luben:zstd-jni:1.5.2-2'
implementation 'org.lz4:lz4-java:1.8.0'
implementation 'com.github.vladimir-bukhtoyarov:bucket4j-core:7.5.0'
implementation 'de.rototor.jeuclid:jeuclid-core:3.1.14'
implementation 'org.imgscalr:imgscalr-lib:4.2'
implementation 'org.jclarion:image4j:0.7'
implementation 'commons-net:commons-net:3.8.0'
implementation 'org.eclipse.jgit:org.eclipse.jgit:5.12.0.202106070339-r'
implementation 'org.eclipse.jgit:org.eclipse.jgit.ssh.jsch:5.12.0.202106070339-r'
implementation 'com.jcraft:jsch:0.1.55'
implementation group: 'it.unimi.dsi', name: 'fastutil', version: '8.5.8'
implementation 'org.roaringbitmap:RoaringBitmap:0.9.32'
testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.2'
testImplementation 'org.mockito:mockito-junit-jupiter:4.5.1'
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine'
testCompileOnly 'org.projectlombok:lombok:1.18.24'
testImplementation 'org.projectlombok:lombok:1.18.24'
testAnnotationProcessor 'org.projectlombok:lombok:1.18.24'
testImplementation group: 'org.mockito', name: 'mockito-core', version: '4.5.1'
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
testImplementation 'org.testcontainers:mariadb:1.17.4'
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
e2eTestImplementation 'org.junit.jupiter:junit-jupiter-api:5.9.0'
e2eTestRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine'
e2eTestImplementation 'org.projectlombok:lombok:1.18.24'
e2eTestAnnotationProcessor 'org.projectlombok:lombok:1.18.24'
e2eTestImplementation 'org.testcontainers:nginx:1.17.4'
e2eTestImplementation "org.testcontainers:junit-jupiter:1.17.2"
e2eTestImplementation 'org.testcontainers:selenium:1.17.4'
e2eTestImplementation 'org.seleniumhq.selenium:selenium-remote-driver:4.5.3'
e2eTestImplementation 'org.seleniumhq.selenium:selenium-chrome-driver:4.5.3'
implementation 'org.seleniumhq.selenium:selenium-chrome-driver:4.5.3'
implementation 'org.seleniumhq.selenium:selenium-java:4.5.3'
implementation 'org.sejda.imageio:webp-imageio:0.1.6'
jmh 'org.openjdk.jmh:jmh-core:1.35'
jmh 'org.openjdk.jmh:jmh-generator-annprocess:1.35'
implementation 'net.agkn:hll:1.6.0'
}
configurations {
e2eTestImplementation.extendsFrom(testImplementation)
}
test {
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
maxHeapSize = "8G"
useJUnitPlatform()
}
task fastTests(type: Test) {
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
maxHeapSize = "8G"
useJUnitPlatform {
excludeTags "slow"
}
}
task e2eTest(type: Test) {
maxParallelForks = 1
forkEvery = 1
maxHeapSize = "8G"
dependsOn ':shadowJar'
dependsOn 'downloadTestData'
dependsOn 'downloadRDRModelData'
dependsOn 'downloadSentenceModelData'
dependsOn 'downloadTokenModelData'
dependsOn 'downloadTermFreqData'
dependsOn 'IP2LocationFile'
classpath = sourceSets.e2eTest.runtimeClasspath
testClassesDirs = sourceSets.e2eTest.output.classesDirs
useJUnitPlatform {
includeTags "e2e"
}
}
task downloadTestData(type: Download) {
src 'http://hammurabi.acc.umu.se/mirror/kiwix.org/zim/wikipedia/wikipedia_en_100_nopic_2022-05.zim'
dest file('data/test/wikipedia_en_100_nopic.zim')
overwrite false
}
task downloadRDRModelData(type: Download) {
src (['https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.DICT',
'https://raw.githubusercontent.com/datquocnguyen/RDRPOSTagger/master/Models/POS/English.RDR'])
dest file('data/models/')
overwrite false
}
task downloadSentenceModelData(type: Download) {
src 'https://dlcdn.apache.org/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin'
dest file('data/models/opennlp-sentence.bin')
overwrite false
}
task downloadTokenModelData(type: Download) {
src 'https://dlcdn.apache.org/opennlp/models/ud-models-1.0/opennlp-en-ud-ewt-tokens-1.0-1.9.3.bin'
dest file('data/models/opennlp-tokens.bin')
overwrite false
}
task downloadIP2LocationFile(type: Download) {
src 'https://download.ip2location.com/lite/IP2LOCATION-LITE-DB1.CSV.ZIP'
dest file('data/models/IP2LOCATION-LITE-DB1.CSV.ZIP')
overwrite false
}
task IP2LocationFile(type: Copy) {
dependsOn 'downloadIP2LocationFile'
def zipFile = file('data/models/IP2LOCATION-LITE-DB1.CSV.ZIP')
def outputDir = file("data/models/IP2LOC")
from zipTree(zipFile)
into outputDir
}
task downloadTermFreqData(type: Download) {
src 'https://downloads.marginalia.nu/model/tfreq-new-algo3.bin'
dest file('data/models/tfreq-new-algo3.bin')
overwrite false
}

View File

@ -1,2 +0,0 @@
# This file is generated by the 'io.freefair.lombok' Gradle plugin
config.stopBubbling = true

View File

@ -1,15 +0,0 @@
package nu.marginalia.memex;
import nu.marginalia.memex.auth.AuthMain;
import nu.marginalia.service.descriptor.ServiceDescriptor;
import nu.marginalia.service.descriptor.ServiceDescriptors;
import nu.marginalia.service.id.ServiceId;
import java.util.List;
public class MemexServiceDescriptors {
public static ServiceDescriptors descriptors = new ServiceDescriptors(
List.of(
new ServiceDescriptor(ServiceId.Other_Memex, 5030),
new ServiceDescriptor (ServiceId.Other_Auth, 5003)));
}

View File

@ -1,14 +0,0 @@
package nu.marginalia.memex.auth;
import com.google.inject.AbstractModule;
import com.google.inject.name.Names;
import nu.marginalia.service.descriptor.HostsFile;
import java.nio.file.Path;
public class AuthConfigurationModule extends AbstractModule {
public void configure() {
bind(Path.class).annotatedWith(Names.named("password-file")).toInstance(Path.of("/var/lib/wmsa/password.dat"));
bind(HostsFile.class).toInstance(new HostsFile());
}
}

View File

@ -1,27 +0,0 @@
package nu.marginalia.memex.auth;
import com.google.inject.Guice;
import com.google.inject.Inject;
import com.google.inject.Injector;
import nu.marginalia.memex.MemexServiceDescriptors;
import nu.marginalia.service.MainClass;
import nu.marginalia.service.id.ServiceId;
import nu.marginalia.service.module.ConfigurationModule;
import nu.marginalia.service.server.Initialization;
public class AuthMain extends MainClass {
@Inject
public AuthMain(AuthService service) {
}
public static void main(String... args) {
MainClass.init(ServiceId.Other_Auth, args);
Injector injector = Guice.createInjector(
new AuthConfigurationModule(),
new ConfigurationModule(MemexServiceDescriptors.descriptors, ServiceId.Other_Auth));
injector.getInstance(AuthMain.class);
injector.getInstance(Initialization.class).setReady();
}
}

View File

@ -1,118 +0,0 @@
package nu.marginalia.memex.auth;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import nu.marginalia.client.Context;
import nu.marginalia.memex.auth.model.LoginFormModel;
import nu.marginalia.memex.renderer.MustacheRenderer;
import nu.marginalia.memex.renderer.RendererFactory;
import nu.marginalia.service.server.Initialization;
import nu.marginalia.service.server.MetricsServer;
import nu.marginalia.service.server.RateLimiter;
import nu.marginalia.service.server.Service;
import org.apache.http.HttpStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
import spark.Response;
import spark.Spark;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Objects;
import java.util.Optional;
import java.util.UUID;
import static spark.Spark.*;
public class AuthService extends Service {
private final Logger logger = LoggerFactory.getLogger(getClass());
private String password;
private final RateLimiter rateLimiter = RateLimiter.forLogin();
private final MustacheRenderer<LoginFormModel> loginFormRenderer;
@Inject
public AuthService(@Named("service-host") String ip,
@Named("service-port") Integer port,
@Named("password-file") Path topSecretPasswordFile,
RendererFactory rendererFactory,
Initialization initialization,
MetricsServer metricsServer) throws IOException {
super(ip, port, initialization, metricsServer);
password = initPassword(topSecretPasswordFile);
loginFormRenderer = rendererFactory.renderer("auth/login");
Spark.path("public/api", () -> {
before((req, rsp) -> {
logger.info("{} {}", req.requestMethod(), req.pathInfo());
});
post("/login", this::login);
get("/login", this::loginForm);
});
Spark.path("api", () -> {
get("/is-logged-in", this::isLoggedIn);
});
}
private String initPassword(Path topSecretPasswordFile) {
if (Files.exists(topSecretPasswordFile)) {
try {
return Files.readString(topSecretPasswordFile);
} catch (IOException e) {
logger.error("Could not read password from file " + topSecretPasswordFile, e);
}
}
logger.error("Setting random password");
return UUID.randomUUID().toString();
}
private Object loginForm(Request request, Response response) {
String redir = Objects.requireNonNull(request.queryParams("redirect"));
String service = Objects.requireNonNull(request.queryParams("service"));
return loginFormRenderer.render(new LoginFormModel(service, redir));
}
private Object login(Request request, Response response) {
var redir = Objects.requireNonNullElse(request.queryParams("redirect"), "/");
if (isLoggedIn(request, response)) {
response.redirect(redir);
return "";
}
if (!rateLimiter.isAllowed(Context.fromRequest(request))) {
Spark.halt(429, "Too many requests");
return null;
}
if (Objects.equals(password, request.queryParams("password"))) {
request.session(true).attribute("logged-in", true);
response.redirect(redir);
return "";
}
response.status(HttpStatus.SC_FORBIDDEN);
return "<h1>Bad password!</h1>";
}
public boolean isLoggedIn(Request request, Response response) {
var session = request.session(false);
if (null == session) {
return false;
}
return Optional.ofNullable(session.attribute("logged-in"))
.map(Boolean.class::cast)
.orElse(false);
}
}

View File

@ -1,45 +0,0 @@
package nu.marginalia.memex.auth.client;
import com.google.gson.GsonBuilder;
import com.google.inject.Inject;
import io.reactivex.rxjava3.core.Observable;
import nu.marginalia.WmsaHome;
import nu.marginalia.client.AbstractDynamicClient;
import nu.marginalia.client.Context;
import nu.marginalia.service.descriptor.ServiceDescriptors;
import nu.marginalia.service.id.ServiceId;
import org.apache.http.HttpStatus;
import spark.Request;
import spark.Response;
import spark.Spark;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.TimeUnit;
public class AuthClient extends AbstractDynamicClient {
@Inject
public AuthClient(ServiceDescriptors descriptors) {
super(descriptors.forId(ServiceId.Other_Auth), WmsaHome.getHostsFile(), new GsonBuilder()::create);
}
public Observable<Boolean> isLoggedIn(Context ctx) {
return get(ctx, "/api/is-logged-in").map(Boolean::parseBoolean);
}
public void redirectToLoginIfUnauthenticated(String domain, Request req, Response rsp) {
if (!isLoggedIn(Context.fromRequest(req)).timeout(1, TimeUnit.SECONDS).blockingFirst()) {
rsp.redirect(req.headers("X-Extern-Domain") + "/auth/login?service="+domain
+"&redirect="+ URLEncoder.encode(req.headers("X-Extern-Url"), StandardCharsets.UTF_8));
Spark.halt();
}
}
public void requireLogIn(Context ctx) {
if (!isLoggedIn(ctx).timeout(1, TimeUnit.SECONDS).blockingFirst()) {
Spark.halt(HttpStatus.SC_FORBIDDEN);
}
}
}

View File

@ -1,10 +0,0 @@
package nu.marginalia.memex.auth.model;
import lombok.AllArgsConstructor;
import lombok.Getter;
@Getter @AllArgsConstructor
public class LoginFormModel {
public final String service;
public final String redirect;
}

View File

@ -1,43 +0,0 @@
package nu.marginalia.memex.gemini;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.InetAddress;
import java.util.HashSet;
import java.util.Set;
public class BadBotList {
private final Set<InetAddress> shitlist = new HashSet<>();
public static final BadBotList INSTANCE = new BadBotList();
private final Logger logger = LoggerFactory.getLogger(getClass().getSimpleName());
private BadBotList() {}
public boolean isAllowed(InetAddress address) {
return !shitlist.contains(address);
}
public boolean isQueryPermitted(InetAddress address, String query) {
if (isBadQuery(query)) {
logger.info("Banning {}", address);
shitlist.add(address);
return false;
}
return true;
}
private boolean isBadQuery(String query) {
if (query.startsWith("GET")) {
return true;
}
if (query.startsWith("OPTIONS")) {
return true;
}
if (query.contains("mstshash")) {
return true;
}
return false;
}
}

View File

@ -1,17 +0,0 @@
package nu.marginalia.memex.gemini;
import com.google.inject.AbstractModule;
import com.google.inject.name.Names;
import java.nio.file.Path;
public class GeminiConfigurationModule extends AbstractModule {
public void configure() {
bind(Path.class).annotatedWith(Names.named("gemini-server-root")).toInstance(Path.of("/var/lib/wmsa/memex-gmi"));
bind(Path.class).annotatedWith(Names.named("gemini-cert-file")).toInstance(Path.of("/var/lib/wmsa/gemini/crypto.jks"));
bind(Path.class).annotatedWith(Names.named("gemini-cert-password-file")).toInstance(Path.of("/var/lib/wmsa/gemini/password.dat"));
bind(Integer.class).annotatedWith(Names.named("gemini-server-port")).toInstance(1965);
}
}

View File

@ -1,7 +0,0 @@
package nu.marginalia.memex.gemini;
public interface GeminiService {
String DEFAULT_FILENAME = "index.gmi";
void run();
}

View File

@ -1,10 +0,0 @@
package nu.marginalia.memex.gemini;
import com.google.inject.Singleton;
@Singleton
public class GeminiServiceDummy implements GeminiService {
@Override
public void run() {
}
}

View File

@ -1,164 +0,0 @@
package nu.marginalia.memex.gemini;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.google.inject.name.Named;
import nu.marginalia.memex.gemini.io.GeminiConnection;
import nu.marginalia.memex.gemini.io.GeminiSSLSetUp;
import nu.marginalia.memex.gemini.io.GeminiStatusCode;
import nu.marginalia.memex.gemini.io.GeminiUserException;
import nu.marginalia.memex.gemini.plugins.BareStaticPagePlugin;
import nu.marginalia.memex.gemini.plugins.Plugin;
import nu.marginalia.memex.gemini.plugins.SearchPlugin;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.net.ssl.SSLException;
import javax.net.ssl.SSLServerSocket;
import javax.net.ssl.SSLServerSocketFactory;
import javax.net.ssl.SSLSocket;
import java.io.IOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Optional;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;
@Singleton
public class GeminiServiceImpl implements GeminiService {
public final Path serverRoot;
private final Logger logger = LoggerFactory.getLogger(getClass().getSimpleName());
private final Executor pool = Executors.newFixedThreadPool(32);
private final SSLServerSocket serverSocket;
private final Plugin[] plugins;
private final BadBotList badBotList = BadBotList.INSTANCE;
@Inject
public GeminiServiceImpl(@Named("gemini-server-root") Path serverRoot,
@Named("gemini-server-port") Integer port,
GeminiSSLSetUp sslSetUp,
BareStaticPagePlugin pagePlugin,
SearchPlugin searchPlugin) throws Exception {
this.serverRoot = serverRoot;
logger.info("Setting up crypto");
final SSLServerSocketFactory socketFactory = sslSetUp.getServerSocketFactory();
serverSocket = (SSLServerSocket) socketFactory.createServerSocket(port /* 1965 */);
serverSocket.setEnabledCipherSuites(socketFactory.getSupportedCipherSuites());
serverSocket.setEnabledProtocols(new String[] {"TLSv1.3", "TLSv1.2"});
logger.info("Verifying setup");
if (!Files.exists(this.serverRoot)) {
logger.error("Could not find SERVER_ROOT {}", this.serverRoot);
System.exit(255);
}
plugins = new Plugin[] {
pagePlugin,
searchPlugin
};
}
@Override
public void run() {
logger.info("Awaiting connections");
try {
for (;;) {
SSLSocket connection = (SSLSocket) serverSocket.accept();
connection.setSoTimeout(10_000);
if (!badBotList.isAllowed(connection.getInetAddress())) {
connection.close();
} else {
pool.execute(() -> serve(connection));
}
}
}
catch (IOException ex) {
logger.error("IO Exception in gemini server", ex);
}
}
private void serve(SSLSocket socket) {
final GeminiConnection connection;
try {
connection = new GeminiConnection(socket);
}
catch (IOException ex) {
logger.error("Failed to create connection object", ex);
return;
}
try {
handleRequest(connection);
}
catch (GeminiUserException ex) {
errorResponse(connection, ex.getMessage());
}
catch (SSLException ex) {
logger.error(connection.getAddress() + " SSL error");
connection.close();
}
catch (Exception ex) {
errorResponse(connection, "Error");
logger.error(connection.getAddress(), ex);
}
finally {
connection.close();
}
}
private void errorResponse(GeminiConnection connection, String message) {
if (connection.isConnected()) {
try {
logger.error("=> " + connection.getAddress(), message);
connection.writeStatusLine(GeminiStatusCode.ERROR_PERMANENT, message);
}
catch (IOException ex) {
logger.error("Exception while sending error", ex);
}
}
}
private void handleRequest(GeminiConnection connection) throws Exception {
final String address = connection.getAddress();
logger.info("Connect: " + address);
final Optional<URI> maybeUri = connection.readUrl();
if (maybeUri.isEmpty()) {
logger.info("Done: {}", address);
return;
}
final URI uri = maybeUri.get();
logger.info("Request {}", uri);
if (!uri.getScheme().equals("gemini")) {
throw new GeminiUserException("Unsupported protocol");
}
servePage(connection, uri);
logger.info("Done: {}", address);
}
private void servePage(GeminiConnection connection, URI url) throws IOException {
String path = url.getPath();
for (Plugin p : plugins) {
if (p.serve(url, connection)) {
return;
}
}
logger.error("FileNotFound {}", path);
connection.writeStatusLine(GeminiStatusCode.ERROR_TEMPORARY, "No such file");
}
}

View File

@ -1,130 +0,0 @@
package nu.marginalia.memex.gemini.client;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSocketFactory;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.security.cert.X509Certificate;
/** Unstable code! */
public class GeminiClient {
private final SSLSocketFactory socketFactory;
// Create a trust manager that does not validate anything
public static final TrustManager[] trustAllCerts = new TrustManager[]{
new X509TrustManager() {
@Override
public void checkClientTrusted(X509Certificate[] chain,
String authType) {
}
@Override
public void checkServerTrusted(X509Certificate[] chain,
String authType) {
}
@Override
public X509Certificate[] getAcceptedIssuers() {
return new X509Certificate[0];
}
}
};
public static SSLSocketFactory buildSocketFactory() throws Exception {
// Install the all-trusting trust manager
final SSLContext sslContext = SSLContext.getInstance("SSL");
sslContext.init(null, trustAllCerts, new java.security.SecureRandom());
return sslContext.getSocketFactory();
}
public GeminiClient() throws Exception {
socketFactory = buildSocketFactory();
}
public Response get(URI uri) throws IOException {
final int port = uri.getPort() == -1 ? 1965 : uri.getPort();
final String host = uri.getHost();
var requestString = String.format("%s\r\n", uri).getBytes(StandardCharsets.UTF_8);
try (var socket = socketFactory.createSocket(host, port)) {
socket.setSoTimeout(10_000);
socket.getOutputStream().write(requestString);
var is = socket.getInputStream();
String statusLine = new GeminiInput(is).get();
int code = Integer.parseInt(statusLine.substring(0,2));
String meta = statusLine.substring(3);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
is.transferTo(baos);
return new Response(code, meta, baos.toByteArray());
}
}
public static class Response {
public final int code;
public final String meta;
public final byte[] data;
Response(int code, String meta, byte[] data) {
this.code = code;
this.meta = meta;
this.data = data;
}
}
public static class GeminiInput {
private final InputStream is;
private final byte[] buffer = new byte[1024];
private int idx;
final String result;
public GeminiInput(InputStream is) throws IOException {
this.is = is;
for (idx = 0; idx < buffer.length; idx++) {
if (hasEndOfLine()) {
result = new String(buffer, 0, idx-2, StandardCharsets.UTF_8);
return;
}
readCharacter();
}
throw new RuntimeException("String too long");
}
public String get() {
return result;
}
private void readCharacter() throws IOException {
int rb = is.read();
if (-1 == rb) {
throw new RuntimeException("URL incomplete (no CR LF)");
}
buffer[idx] = (byte) rb;
}
public boolean hasEndOfLine() {
return idx > 2
&& buffer[idx - 1] == (byte) '\n'
&& buffer[idx - 2] == (byte) '\r';
}
}
}

View File

@ -1,53 +0,0 @@
package nu.marginalia.memex.gemini.gmi;
import lombok.Getter;
import nu.marginalia.memex.gemini.gmi.line.AbstractGemtextLine;
import nu.marginalia.memex.gemini.gmi.parser.GemtextParser;
import nu.marginalia.memex.gemini.gmi.renderer.GemtextRenderer;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import nu.marginalia.memex.memex.model.MemexNodeUrl;
import java.io.IOException;
import java.io.Writer;
import java.util.Arrays;
import java.util.stream.Collectors;
import java.util.stream.Stream;
@Getter
public class Gemtext {
private final AbstractGemtextLine[] lines;
private final MemexNodeUrl url;
public Gemtext(MemexNodeUrl url, String[] lines, MemexNodeHeadingId headingRoot) {
this.lines = GemtextParser.parse(lines, headingRoot);
this.url = url;
}
public Gemtext(MemexNodeUrl url, String[] lines) {
this.lines = GemtextParser.parse(lines, new MemexNodeHeadingId(0));
this.url = url;
}
public String render(GemtextRenderer renderer) {
return Arrays.stream(lines).map(renderer::renderLine).collect(Collectors.joining());
}
public void render(GemtextRenderer renderer, Writer w) throws IOException {
for (var line : lines) {
w.write(renderer.renderLine(line));
w.write('\n');
}
}
public Stream<AbstractGemtextLine> stream() {
return Arrays.stream(lines);
}
public AbstractGemtextLine get(int idx) {
return lines[idx];
}
public int size() {
return lines.length;
}
}

View File

@ -1,71 +0,0 @@
package nu.marginalia.memex.gemini.gmi;
import com.google.common.collect.Sets;
import nu.marginalia.memex.gemini.gmi.line.GemtextLineVisitorAdapter;
import nu.marginalia.memex.gemini.gmi.line.GemtextLink;
import nu.marginalia.memex.memex.model.MemexNodeUrl;
import nu.marginalia.memex.memex.model.MemexUrl;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
public class GemtextDatabase extends Gemtext {
public final Map<String, Integer> links;
public GemtextDatabase(MemexNodeUrl url, String[] lines) {
super(url, lines);
links = new HashMap<>();
for (int i = 0; i < size(); i++) {
int linkIdx = i;
get(i).visit(new GemtextLineVisitorAdapter<>() {
@Override
public Object visit(GemtextLink g) {
links.put(g.getUrl().toString(), linkIdx);
return null;
}
});
}
}
public Set<String> keys() {
return links.keySet();
}
public Optional<String> getLinkData(MemexUrl url) {
Integer idx = links.get(url.getUrl());
if (idx != null) {
return
Optional.of(get(idx).mapLink(GemtextLink::getTitle).orElse(""));
}
return Optional.empty();
}
public static GemtextDatabase of(MemexNodeUrl url, String[] lines) {
return new GemtextDatabase(url, lines);
}
public static GemtextDatabase of(MemexNodeUrl url, Path file) throws IOException {
try (var s = Files.lines(file)) {
return new GemtextDatabase(url, s.toArray(String[]::new));
}
}
public Set<MemexNodeUrl> difference(GemtextDatabase other) {
Set<MemexNodeUrl> differences = new HashSet<>();
Sets.difference(keys(), other.keys()).stream().map(MemexNodeUrl::new).forEach(differences::add);
Sets.intersection(keys(), other.keys())
.stream()
.map(MemexNodeUrl::new)
.filter(url -> !Objects.equals(getLinkData(url), other.getLinkData(url)))
.forEach(differences::add);
return differences;
}
}

View File

@ -1,163 +0,0 @@
package nu.marginalia.memex.gemini.gmi;
import lombok.Getter;
import nu.marginalia.memex.gemini.gmi.renderer.GemtextRenderer;
import nu.marginalia.memex.gemini.gmi.renderer.GemtextRendererFactory;
import nu.marginalia.memex.gemini.gmi.line.*;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import nu.marginalia.memex.memex.model.MemexNodeTaskId;
import nu.marginalia.memex.memex.model.MemexNodeUrl;
import nu.marginalia.memex.memex.model.MemexTaskState;
import org.apache.commons.lang3.tuple.Pair;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
@Getter
public class GemtextDocument extends Gemtext {
private final Map<MemexNodeHeadingId, String> headings;
private final Map<String, List<MemexNodeHeadingId>> headingsByName;
private final Set<String> pragmas;
private final List<GemtextTask> tasks;
private final String title;
private final String date;
private final List<GemtextLink> links;
private final int hashCode;
private static final Pattern datePattern = Pattern.compile(".*(\\d{4}-\\d{2}-\\d{2}).*");
private static final GemtextRenderer rawRenderer = new GemtextRendererFactory().gemtextRendererAsIs();
public GemtextDocument(MemexNodeUrl url, String[] lines, MemexNodeHeadingId headingRoot) {
super(url, lines, headingRoot);
this.hashCode = Arrays.hashCode(lines);
GemtextDataExtractor extractor = new GemtextDataExtractor();
Arrays.stream(this.getLines()).forEach(extractor::take);
this.headings = extractor.getHeadings();
this.links = extractor.getLinks();
this.title = Objects.requireNonNullElse(extractor.getTitle(), url.getUrl());
this.pragmas = extractor.getPragmas();
this.headingsByName = extractor.getHeadingsByName();
this.tasks = extractor.getTasks();
this.date = extractor.getDate();
}
public String getHeadingForElement(AbstractGemtextLine line) {
return headings.getOrDefault(line.getHeading(), "");
}
public List<AbstractGemtextLine> getSection(MemexNodeHeadingId headingId) {
return stream()
.filter(line -> line.getHeading().isChildOf(headingId))
.collect(Collectors.toList());
}
public String getSectionGemtext(MemexNodeHeadingId headingId) {
if (headingId.equals(new MemexNodeHeadingId(0))) {
return stream()
.map(rawRenderer::renderLine)
.collect(Collectors.joining("\n"));
}
return stream()
.filter(line -> line.getHeading().isChildOf(headingId))
.map(rawRenderer::renderLine)
.collect(Collectors.joining("\n"));
}
public Map<MemexNodeTaskId, Pair<String, MemexTaskState>> getOpenTopTasks() {
return tasks.stream()
.filter(task -> MemexTaskState.TODO.equals(task.getState())
|| MemexTaskState.URGENT.equals(task.getState()))
.filter(task -> task.getId().level() == 1)
.collect(Collectors.toMap(GemtextTask::getId, task -> Pair.of(task.getTask(), task.getState())));
}
public static GemtextDocument of(MemexNodeUrl url, String... lines) {
return new GemtextDocument(url, lines, new MemexNodeHeadingId(0));
}
public static GemtextDocument of(MemexNodeUrl url, Path file) throws IOException {
try (var s = Files.lines(file)) {
return new GemtextDocument(url, s.toArray(String[]::new), new MemexNodeHeadingId(0));
}
}
public boolean isIndex() {
return getUrl().getFilename().equals("index.gmi");
}
@Override
public int hashCode() {
return hashCode;
}
public Optional<String> getHeading(MemexNodeHeadingId heading) {
return Optional.ofNullable(headings.get(heading));
}
public Optional<MemexNodeHeadingId> getHeadingByName(MemexNodeHeadingId parent, String name) {
var headings = headingsByName.get(name);
if (null == headings) {
return Optional.empty();
}
return headings.stream().filter(heading -> heading.isChildOf(parent)).findAny();
}
@Getter
private static class GemtextDataExtractor extends GemtextLineVisitorAdapter<Object> {
private String title;
private String date;
private final Map<MemexNodeHeadingId, String> headings = new TreeMap<>((a, b) -> Arrays.compare(a.getIds(), b.getIds()));
private final Map<String, List<MemexNodeHeadingId>> headingsByName = new HashMap<>();
private final Set<String> pragmas = new HashSet<>();
private final List<GemtextLink> links = new ArrayList<>();
private final List<GemtextTask> tasks = new ArrayList<>();
@Override
public Object visit(GemtextHeading g) {
headings.put(g.getLevel(), g.getName());
headingsByName.computeIfAbsent(g.getName(), t -> new ArrayList<>()).add(g.getLevel());
if (title == null) {
title = g.getName();
var dateMatcher = datePattern.matcher(title);
if (dateMatcher.matches()) {
date = dateMatcher.group(1);
}
}
return null;
}
@Override
public Object visit(GemtextLink g) {
links.add(g);
return null;
}
@Override
public Object visit(GemtextTask g) {
tasks.add(g);
return null;
}
@Override
public Object visit(GemtextPragma g) {
pragmas.add(g.getLine());
return null;
}
}
}

View File

@ -1,18 +0,0 @@
package nu.marginalia.memex.gemini.gmi.line;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import java.util.Optional;
import java.util.function.Function;
public abstract class AbstractGemtextLine {
public <T> Optional<T> mapLink(Function<GemtextLink, T> mapper) {
return Optional.empty();
}
public <T> Optional<T> mapHeading(Function<GemtextHeading, T> mapper) { return Optional.empty(); }
public <T> Optional<T> mapTask(Function<GemtextTask, T> mapper) { return Optional.empty(); }
public abstract <T> T visit(GemtextLineVisitor<T> visitor);
public abstract boolean breaksTask();
public abstract MemexNodeHeadingId getHeading();
}

View File

@ -1,21 +0,0 @@
package nu.marginalia.memex.gemini.gmi.line;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.ToString;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
@AllArgsConstructor @Getter @ToString
public class GemtextAside extends AbstractGemtextLine {
private final String line;
private final MemexNodeHeadingId heading;
@Override
public <T> T visit(GemtextLineVisitor<T> visitor) {
return visitor.visit(this);
}
public boolean breaksTask() {
return false;
}
}

View File

@ -1,32 +0,0 @@
package nu.marginalia.memex.gemini.gmi.line;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.ToString;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import java.util.Optional;
import java.util.function.Function;
@AllArgsConstructor
@Getter
@ToString
public class GemtextHeading extends AbstractGemtextLine {
private final MemexNodeHeadingId level;
private final String name;
private final MemexNodeHeadingId heading;
public <T> Optional<T> mapHeading(Function<GemtextHeading, T> mapper) {
return Optional.of(mapper.apply(this));
}
@Override
public <T> T visit(GemtextLineVisitor<T> visitor) {
return visitor.visit(this);
}
public boolean breaksTask() {
return true;
}
}

View File

@ -1,18 +0,0 @@
package nu.marginalia.memex.gemini.gmi.line;
public interface GemtextLineVisitor<T> {
default T take(AbstractGemtextLine line) {
return line.visit(this);
}
T visit(GemtextHeading g);
T visit(GemtextLink g);
T visit(GemtextList g);
T visit(GemtextPreformat g);
T visit(GemtextQuote g);
T visit(GemtextText g);
T visit(GemtextTextLiteral g);
T visit(GemtextAside g);
T visit(GemtextTask g);
T visit(GemtextPragma g);
}

View File

@ -1,53 +0,0 @@
package nu.marginalia.memex.gemini.gmi.line;
public class GemtextLineVisitorAdapter<T> implements GemtextLineVisitor<T> {
@Override
public T visit(GemtextHeading g) {
return null;
}
@Override
public T visit(GemtextLink g) {
return null;
}
@Override
public T visit(GemtextList g) {
return null;
}
@Override
public T visit(GemtextPreformat g) {
return null;
}
@Override
public T visit(GemtextQuote g) {
return null;
}
@Override
public T visit(GemtextText g) {
return null;
}
@Override
public T visit(GemtextTextLiteral g) {
return null;
}
@Override
public T visit(GemtextAside g) {
return null;
}
@Override
public T visit(GemtextTask g) {
return null;
}
@Override
public T visit(GemtextPragma g) {
return null;
}
}

View File

@ -1,33 +0,0 @@
package nu.marginalia.memex.gemini.gmi.line;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.ToString;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import nu.marginalia.memex.memex.model.MemexUrl;
import javax.annotation.Nullable;
import java.util.Optional;
import java.util.function.Function;
@AllArgsConstructor @Getter @ToString
public class GemtextLink extends AbstractGemtextLine {
private final MemexUrl url;
@Nullable
private final String title;
private final MemexNodeHeadingId heading;
public <T> Optional<T> mapLink(Function<GemtextLink, T> mapper) {
return Optional.ofNullable(mapper.apply(this));
}
@Override
public <T> T visit(GemtextLineVisitor<T> visitor) {
return visitor.visit(this);
}
public boolean breaksTask() {
return false;
}
}

View File

@ -1,23 +0,0 @@
package nu.marginalia.memex.gemini.gmi.line;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.ToString;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import java.util.List;
@AllArgsConstructor @Getter @ToString
public class GemtextList extends AbstractGemtextLine {
private final List<String> items;
private final MemexNodeHeadingId heading;
@Override
public <T> T visit(GemtextLineVisitor<T> visitor) {
return visitor.visit(this);
}
public boolean breaksTask() {
return true;
}
}

View File

@ -1,21 +0,0 @@
package nu.marginalia.memex.gemini.gmi.line;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.ToString;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
@AllArgsConstructor @Getter @ToString
public class GemtextPragma extends AbstractGemtextLine {
private final String line;
private final MemexNodeHeadingId heading;
@Override
public <T> T visit(GemtextLineVisitor<T> visitor) {
return visitor.visit(this);
}
public boolean breaksTask() {
return false;
}
}

View File

@ -1,23 +0,0 @@
package nu.marginalia.memex.gemini.gmi.line;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.ToString;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import java.util.List;
@AllArgsConstructor @Getter @ToString
public class GemtextPreformat extends AbstractGemtextLine {
private final List<String> items;
private final MemexNodeHeadingId heading;
@Override
public <T> T visit(GemtextLineVisitor<T> visitor) {
return visitor.visit(this);
}
public boolean breaksTask() {
return true;
}
}

View File

@ -1,23 +0,0 @@
package nu.marginalia.memex.gemini.gmi.line;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.ToString;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import java.util.List;
@AllArgsConstructor @Getter @ToString
public class GemtextQuote extends AbstractGemtextLine {
private final List<String> items;
private final MemexNodeHeadingId heading;
@Override
public <T> T visit(GemtextLineVisitor<T> visitor) {
return visitor.visit(this);
}
public boolean breaksTask() {
return true;
}
}

View File

@ -1,42 +0,0 @@
package nu.marginalia.memex.gemini.gmi.line;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.ToString;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import nu.marginalia.memex.memex.model.MemexNodeTaskId;
import nu.marginalia.memex.memex.model.MemexTaskState;
import nu.marginalia.memex.memex.model.MemexTaskTags;
import java.util.Optional;
import java.util.function.Function;
@AllArgsConstructor @Getter @ToString
public class GemtextTask extends AbstractGemtextLine {
private final MemexNodeTaskId id;
private final String task;
private final MemexNodeHeadingId heading;
private final MemexTaskTags tags;
public MemexTaskState getState() {
return MemexTaskState.of(tags);
}
public int getLevel() {
return id.level();
}
@Override
public <T> T visit(GemtextLineVisitor<T> visitor) {
return visitor.visit(this);
}
@Override
public boolean breaksTask() {
return true;
}
@Override
public <T> Optional<T> mapTask(Function<GemtextTask, T> mapper) {
return Optional.of(mapper.apply(this));
}
}

View File

@ -1,21 +0,0 @@
package nu.marginalia.memex.gemini.gmi.line;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.ToString;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
@AllArgsConstructor @Getter @ToString
public class GemtextText extends AbstractGemtextLine {
private final String line;
private final MemexNodeHeadingId heading;
@Override
public <T> T visit(GemtextLineVisitor<T> visitor) {
return visitor.visit(this);
}
public boolean breaksTask() {
return !line.isBlank();
}
}

View File

@ -1,23 +0,0 @@
package nu.marginalia.memex.gemini.gmi.line;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.ToString;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import java.util.List;
@AllArgsConstructor @Getter @ToString
public class GemtextTextLiteral extends AbstractGemtextLine {
private final List<String> items;
private final MemexNodeHeadingId heading;
@Override
public <T> T visit(GemtextLineVisitor<T> visitor) {
return visitor.visit(this);
}
public boolean breaksTask() {
return false;
}
}

View File

@ -1,20 +0,0 @@
package nu.marginalia.memex.gemini.gmi.parser;
import nu.marginalia.memex.gemini.gmi.line.GemtextAside;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import java.util.regex.Pattern;
public class GemtextAsideParser {
private static final Pattern listItemPattern = Pattern.compile("^\\((.*)\\)$");
public static GemtextAside parse(String s, MemexNodeHeadingId heading) {
var matcher = listItemPattern.matcher(s);
if (!matcher.matches()) {
return null;
}
return new GemtextAside(matcher.group(1), heading);
}
}

View File

@ -1,26 +0,0 @@
package nu.marginalia.memex.gemini.gmi.parser;
import nu.marginalia.memex.gemini.gmi.line.AbstractGemtextLine;
import nu.marginalia.memex.gemini.gmi.line.GemtextHeading;
import nu.marginalia.memex.gemini.gmi.line.GemtextText;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import java.util.regex.Pattern;
public class GemtextHeadingParser {
private static final Pattern headingPattern = Pattern.compile("^(#+)\\s*([^#].*|$)$");
public static AbstractGemtextLine parse(String s, MemexNodeHeadingId heading) {
var matcher = headingPattern.matcher(s);
if (!matcher.matches()) {
return new GemtextText(s, heading);
}
int level = matcher.group(1).length() - 1;
var newHeading = heading.next(level);
return new GemtextHeading(newHeading, matcher.group(2), newHeading);
}
}

View File

@ -1,42 +0,0 @@
package nu.marginalia.memex.gemini.gmi.parser;
import nu.marginalia.memex.gemini.gmi.line.AbstractGemtextLine;
import nu.marginalia.memex.gemini.gmi.line.GemtextLink;
import nu.marginalia.memex.gemini.gmi.line.GemtextText;
import nu.marginalia.memex.memex.model.MemexExternalUrl;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import nu.marginalia.memex.memex.model.MemexNodeUrl;
import nu.marginalia.memex.memex.model.MemexUrl;
import javax.annotation.Nullable;
import java.util.regex.Pattern;
public class GemtextLinkParser {
private static final Pattern linkPattern = Pattern.compile("^=>\\s?([^\\s]+)\\s*(.+)?$");
@Nullable
public static AbstractGemtextLine parse(String s, MemexNodeHeadingId heading) {
var matcher = linkPattern.matcher(s);
if (!matcher.matches()) {
return new GemtextText(s, heading);
}
if (matcher.groupCount() == 2) {
return new GemtextLink(toMemexUrl(matcher.group(1)), matcher.group(2), heading);
}
else {
return new GemtextLink(toMemexUrl(matcher.group(1)), null, heading);
}
}
private static MemexUrl toMemexUrl(String url) {
if (url.startsWith("/")) {
return new MemexNodeUrl(url);
}
else {
return new MemexExternalUrl(url);
}
}
}

View File

@ -1,17 +0,0 @@
package nu.marginalia.memex.gemini.gmi.parser;
import java.util.regex.Pattern;
public class GemtextListParser {
private static final Pattern listItemPattern = Pattern.compile("^\\*\\s?(.+)$");
public static String parse(String s) {
var matcher = listItemPattern.matcher(s);
if (!matcher.matches()) {
return null;
}
return matcher.group(1);
}
}

View File

@ -1,135 +0,0 @@
package nu.marginalia.memex.gemini.gmi.parser;
import nu.marginalia.memex.gemini.gmi.line.*;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import nu.marginalia.memex.memex.model.MemexNodeTaskId;
import java.util.*;
public class GemtextParser {
private static final String PREFORMAT_MARKER = "```";
private static final String LITERAL_MARKER = " ";
private static final String LINK_MARKER = "=>";
private static final String HEADING_MARKER = "#";
private static final String LIST_MARKER = "*";
private static final String QUOTE_MARKER = ">";
private static final String ASIDE_MARKER = "(";
private static final String TASK_MARKER = "-";
private static final String PRAGMA_MARKER = "%%%";
public static AbstractGemtextLine[] parse(String[] lines, MemexNodeHeadingId headingRoot) {
List<AbstractGemtextLine> items = new ArrayList<>();
MemexNodeHeadingId heading = headingRoot;
MemexNodeTaskId task = new MemexNodeTaskId(0);
Set<String> pragmas = new HashSet<>();
for (int i = 0; i < lines.length; i++) {
String line = lines[i];
if (line.startsWith(PREFORMAT_MARKER)) {
i = getBlockQuote(items, lines, heading, i);
}
else if (line.startsWith(PRAGMA_MARKER)) {
var pragma = GemtextPragmaParser.parse(line, heading);
if (pragma instanceof GemtextPragma) {
GemtextPragma gtp = (GemtextPragma) pragma;
pragmas.add(gtp.getLine());
}
items.add(pragma);
}
else if (line.startsWith(LINK_MARKER)) {
var link = GemtextLinkParser.parse(line, heading);
items.add(link);
}
else if (line.startsWith(HEADING_MARKER)) {
var tag = GemtextHeadingParser.parse(line, heading);
heading = tag.mapHeading(GemtextHeading::getHeading).orElse(heading);
items.add(tag);
}
else if (line.startsWith(LIST_MARKER)) {
i = getList(items, lines, heading, i);
}
else if (line.startsWith(LITERAL_MARKER)) {
i = getLitteral(items, lines, heading, i);
}
else if (pragmas.contains("TASKS")
&& line.startsWith(TASK_MARKER))
{
var tag = GemtextTaskParser.parse(line, heading, task);
task = tag.mapTask(GemtextTask::getId).orElse(task);
items.add(tag);
}
else if (line.startsWith(QUOTE_MARKER)) {
i = getQuote(items, lines, heading, i);
}
else if (line.startsWith(ASIDE_MARKER)) {
var aside = GemtextAsideParser.parse(line, heading);
items.add(Objects.requireNonNullElse(aside, new GemtextText(line, heading)));
}
else {
items.add(new GemtextText(line, heading));
}
}
return items.toArray(AbstractGemtextLine[]::new);
}
private static int getBlockQuote(List<AbstractGemtextLine> items, String[] lines, MemexNodeHeadingId heading, int i) {
int j = i+1;
List<String> quotedLines = new ArrayList<>();
for (;j < lines.length; j++) {
if (lines[j].startsWith(PREFORMAT_MARKER)) {
break;
}
quotedLines.add(lines[j]);
}
items.add(new GemtextPreformat(quotedLines, heading));
return j;
}
private static int getList(List<AbstractGemtextLine> items, String[] lines, MemexNodeHeadingId heading, int i) {
int j = i;
List<String> listLines = new ArrayList<>();
for (;j < lines.length; j++) {
if (!lines[j].startsWith(LIST_MARKER)) {
break;
}
listLines.add(GemtextListParser.parse(lines[j]));
}
items.add(new GemtextList(listLines, heading));
return j-1;
}
private static int getLitteral(List<AbstractGemtextLine> items, String[] lines, MemexNodeHeadingId heading, int i) {
int j = i;
List<String> listLines = new ArrayList<>();
for (;j < lines.length; j++) {
if (!lines[j].startsWith(LITERAL_MARKER)) {
break;
}
listLines.add(lines[j]);
}
items.add(new GemtextTextLiteral(listLines, heading));
return j-1;
}
private static int getQuote(List<AbstractGemtextLine> items, String[] lines, MemexNodeHeadingId heading, int i) {
int j = i;
List<String> listLines = new ArrayList<>();
for (;j < lines.length; j++) {
if (!lines[j].startsWith(QUOTE_MARKER)) {
break;
}
listLines.add(GemtextQuoteParser.parse(lines[j]));
}
items.add(new GemtextQuote(listLines, heading));
return j-1;
}
}

View File

@ -1,26 +0,0 @@
package nu.marginalia.memex.gemini.gmi.parser;
import nu.marginalia.memex.gemini.gmi.line.AbstractGemtextLine;
import nu.marginalia.memex.gemini.gmi.line.GemtextPragma;
import nu.marginalia.memex.gemini.gmi.line.GemtextText;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import java.util.regex.Pattern;
public class GemtextPragmaParser {
private static final Pattern pragmaPattern = Pattern.compile("^%%%\\s*(.*|$)$");
public static AbstractGemtextLine parse(String s, MemexNodeHeadingId heading) {
var matcher = pragmaPattern.matcher(s);
if (!matcher.matches()) {
return new GemtextText(s, heading);
}
String task = matcher.group(1);
return new GemtextPragma(task, heading);
}
}

View File

@ -1,17 +0,0 @@
package nu.marginalia.memex.gemini.gmi.parser;
import java.util.regex.Pattern;
public class GemtextQuoteParser {
private static final Pattern listItemPattern = Pattern.compile("^>(.+)$");
public static String parse(String s) {
var matcher = listItemPattern.matcher(s);
if (!matcher.matches()) {
return null;
}
return matcher.group(1);
}
}

View File

@ -1,31 +0,0 @@
package nu.marginalia.memex.gemini.gmi.parser;
import nu.marginalia.memex.gemini.gmi.line.AbstractGemtextLine;
import nu.marginalia.memex.gemini.gmi.line.GemtextTask;
import nu.marginalia.memex.gemini.gmi.line.GemtextText;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import nu.marginalia.memex.memex.model.MemexNodeTaskId;
import nu.marginalia.memex.memex.model.MemexTaskTags;
import java.util.regex.Pattern;
public class GemtextTaskParser {
private static final Pattern taskPattern = Pattern.compile("^(-+)\\s*([^-].*|$)$");
public static AbstractGemtextLine parse(String s, MemexNodeHeadingId heading,
MemexNodeTaskId taskId) {
var matcher = taskPattern.matcher(s);
if (!matcher.matches()) {
return new GemtextText(s, heading);
}
int level = matcher.group(1).length() - 1;
String task = matcher.group(2);
return new GemtextTask(taskId.next(level), task, heading, new MemexTaskTags(task));
}
}

View File

@ -1,91 +0,0 @@
package nu.marginalia.memex.gemini.gmi.renderer;
import nu.marginalia.memex.gemini.gmi.line.*;
import java.util.function.Function;
public class GemtextRenderer implements GemtextLineVisitor<String> {
private final Function<GemtextHeading, String> headingConverter;
private final Function<GemtextLink, String> linkConverter;
private final Function<GemtextList, String> listConverter;
private final Function<GemtextPreformat, String> preformatConverter;
private final Function<GemtextQuote, String> quoteConverter;
private final Function<GemtextText, String> textConverter;
private final Function<GemtextAside, String> asideConverter;
private final Function<GemtextTask, String> taskConverter;
private final Function<GemtextTextLiteral, String> literalConverter;
private final Function<GemtextPragma, String> pragmaConverter;
public GemtextRenderer(Function<GemtextHeading, String> headingConverter,
Function<GemtextLink, String> linkConverter,
Function<GemtextList, String> listConverter,
Function<GemtextPreformat, String> preformatConverter,
Function<GemtextQuote, String> quoteConverter,
Function<GemtextText, String> textConverter,
Function<GemtextAside, String> asideConverter,
Function<GemtextTask, String> taskConverter,
Function<GemtextTextLiteral, String> literalConverter,
Function<GemtextPragma, String> pragmaConverter
) {
this.headingConverter = headingConverter;
this.linkConverter = linkConverter;
this.listConverter = listConverter;
this.preformatConverter = preformatConverter;
this.quoteConverter = quoteConverter;
this.textConverter = textConverter;
this.asideConverter = asideConverter;
this.taskConverter = taskConverter;
this.literalConverter = literalConverter;
this.pragmaConverter = pragmaConverter;
}
public String renderLine(AbstractGemtextLine line) {
return line.visit(this);
}
@Override
public String visit(GemtextHeading g) {
return headingConverter.apply(g);
}
@Override
public String visit(GemtextLink g) {
return linkConverter.apply(g);
}
@Override
public String visit(GemtextList g) {
return listConverter.apply(g);
}
@Override
public String visit(GemtextPreformat g) {
return preformatConverter.apply(g);
}
@Override
public String visit(GemtextQuote g) {
return quoteConverter.apply(g);
}
@Override
public String visit(GemtextText g) {
return textConverter.apply(g);
}
@Override
public String visit(GemtextTextLiteral g) {
return literalConverter.apply(g);
}
@Override
public String visit(GemtextAside g) { return asideConverter.apply(g); }
@Override
public String visit(GemtextTask g) { return taskConverter.apply(g); }
@Override
public String visit(GemtextPragma g) { return pragmaConverter.apply(g); }
}

View File

@ -1,227 +0,0 @@
package nu.marginalia.memex.gemini.gmi.renderer;
import nu.marginalia.memex.gemini.gmi.line.*;
import nu.marginalia.memex.memex.model.MemexNodeUrl;
import nu.marginalia.memex.memex.model.MemexUrl;
import org.apache.logging.log4j.util.Strings;
import java.util.Objects;
import java.util.stream.Collectors;
public class GemtextRendererFactory {
public final String urlBase;
public final String docUrl;
public GemtextRendererFactory(String urlBase, String docUrl) {
this.urlBase = Objects.requireNonNull(urlBase, "urlBase must not be null");
this.docUrl = Objects.requireNonNull(docUrl, "docUrl must not be null");
}
public GemtextRendererFactory(String urlBase) {
this.urlBase = Objects.requireNonNull(urlBase, "urlBase must not be null");
this.docUrl = null;
}
public GemtextRendererFactory() {
this.urlBase = null;
this.docUrl = null;
}
public GemtextRenderer htmlRendererEditable() {
return new GemtextRenderer(this::htmlHeadingEditable,
this::htmlLink, this::htmlList,
this::htmlPre, this::htmlQuote,
this::htmlText, this::htmlAside,
this::htmlTask, this::htmlLiteral,
this::htmlPragma);
}
public GemtextRenderer htmlRendererReadOnly() {
return new GemtextRenderer(this::htmlHeadingReadOnly,
this::htmlLink, this::htmlList,
this::htmlPre, this::htmlQuote,
this::htmlText, this::htmlAside,
this::htmlTask, this::htmlLiteral,
this::htmlPragma);
}
public GemtextRenderer gemtextRendererAsIs() {
return new GemtextRenderer(this::rawHeading,
this::rawLink, this::rawList,
this::rawPre, this::rawQuote,
this::rawText, this::rawAside,
this::rawTask, this::rawLiteral,
this::rawPragma);
}
public GemtextRenderer gemtextRendererPublic() {
return new GemtextRenderer(this::rawHeading,
this::rawLink, this::rawList,
this::rawPre, this::rawQuote,
this::rawText, this::rawAside,
this::rawTask, this::rawLiteral,
this::rawSupressPragma);
}
private String htmlPragma(GemtextPragma gemtextPragma) {
return "<!-- pragma: " + sanitizeText(gemtextPragma.getLine()) + " -->\n";
}
public String htmlHeadingEditable(GemtextHeading g) {
if (docUrl == null) {
throw new UnsupportedOperationException("Wrong constructor used, need urlBase and docUrl");
}
// String editLink = String.format("\n<a class=\"utility\" href=\"%s/edit/%s\">Edit</a>\n", urlBase + docUrl, g.getLevel());
return htmlHeadingReadOnly(g);
}
public String htmlHeadingReadOnly(GemtextHeading g) {
if (g.getLevel().getLevel() == 1)
return String.format("<h1 id=\"%s\">%s</h1>\n", g.getLevel(), sanitizeText(g.getName()));
if (g.getLevel().getLevel() == 2)
return String.format("<h2 id=\"%s\">%s</h2>\n", g.getLevel(), sanitizeText(g.getName()));
if (g.getLevel().getLevel() == 3)
return String.format("<h3 id=\"%s\">%s</h3>\n", g.getLevel(), sanitizeText(g.getName()));
return String.format("<h4 id=\"%s\">%s</h4>\n", g.getLevel(), sanitizeText(g.getName()));
}
public String htmlLink(GemtextLink g) {
if (urlBase == null) {
throw new UnsupportedOperationException("Wrong constructor used, need urlBase");
}
final String linkClass = getLinkClass(g.getUrl());
final String linkUrl = getLinkUrl(g.getUrl()).replaceFirst("^gemini://", "https://proxy.vulpes.one/gemini/");
if (g.getTitle() != null) {
return String.format("<dl class=\"link\"><dt><a class=\"%s\" href=\"%s\">%s</a></dt><dd>%s</dd></dl>\n",
linkClass, linkUrl, g.getUrl(), sanitizeText(g.getTitle()));
}
else {
return String.format("<a class=\"%s\" href=\"%s\">%s</a><br>\n",
linkClass, linkUrl, g.getUrl());
}
}
private String getLinkUrl(MemexUrl url) {
if (url instanceof MemexNodeUrl || url.getUrl().startsWith("/")) {
return urlBase + url;
}
return url.toString();
}
private String getLinkClass(MemexUrl url) {
if (url instanceof MemexNodeUrl) {
return "internal";
}
return "external";
}
public String htmlList(GemtextList g) {
return g.getItems()
.stream()
.map(s -> "<li>" + sanitizeText(s) + "</li>")
.collect(
Collectors.joining("\n", "<ul>\n", "</ul>\n"));
}
public String htmlPre(GemtextPreformat g) {
return g.getItems().stream()
.map(this::sanitizeText)
.collect(
Collectors.joining("\n", "<pre>\n", "</pre>\n"));
}
public String htmlLiteral(GemtextTextLiteral g) {
return g.getItems().stream()
.map(this::sanitizeText)
.collect(
Collectors.joining("\n", "<pre class=\"literal\">\n", "</pre>\n"));
}
public String htmlQuote(GemtextQuote g) {
return g.getItems().stream()
.map(this::sanitizeText)
.collect(
Collectors.joining("<br>\n", "<blockquote>\n", "</blockquote>\n"));
}
public String htmlText(GemtextText g) {
return sanitizeText(g.getLine()) + "<br>\n";
}
public String htmlAside(GemtextAside g) {
return "<aside>" + sanitizeText(g.getLine()) + "</aside>\n";
}
public String sanitizeText(String s) {
return s.replaceAll("<", "&lt;").replaceAll(">", "&gt;");
}
public String htmlTask(GemtextTask g) {
return String.format("<a class=\"task-pointer\" name=\"t%s\"></a><div class=\"task %s\" id=\"%s\">%s %s</div>\n",
g.getId(),
g.getState().style,
g.getId(),
"-".repeat(g.getLevel()),
g.getTask());
}
public String rawHeading(GemtextHeading g) {
if (g.getLevel().getLevel() == 1)
return "# " + g.getName();
if (g.getLevel().getLevel() == 2)
return "## " + g.getName();
if (g.getLevel().getLevel() == 3)
return "### " + g.getName();
return "### " + g.getName();
}
public String rawLink(GemtextLink g) {
if (g.getTitle() != null && !g.getTitle().isBlank()) {
return "=> " + g.getUrl().getUrl() + "\t" + g.getTitle();
}
return "=> " + g.getUrl().getUrl();
}
public String rawList(GemtextList g) {
return g.getItems()
.stream()
.map(s -> "* " + s)
.collect(Collectors.joining("\n"));
}
public String rawPre(GemtextPreformat g) {
return g.getItems().stream()
.collect(Collectors.joining("\n", "```\n", "\n```"));
}
public String rawQuote(GemtextQuote g) {
return g.getItems().stream()
.map(s -> "> " + s)
.collect(Collectors.joining());
}
public String rawText(GemtextText g) {
return g.getLine();
}
public String rawLiteral(GemtextTextLiteral g) {
return Strings.join(g.getItems(), '\n');
}
public String rawAside(GemtextAside g) {
return "(" + g.getLine() + ")";
}
public String rawTask(GemtextTask g) {
return "-".repeat(Math.max(0, g.getLevel())) + " " + g.getTask();
}
private String rawPragma(GemtextPragma gemtextPragma) {
return "%%% " + gemtextPragma.getLine();
}
private String rawSupressPragma(GemtextPragma gemtextPragma) {
return "";
}
}

View File

@ -1,185 +0,0 @@
package nu.marginalia.memex.gemini.io;
import nu.marginalia.memex.gemini.BadBotList;
import nu.marginalia.memex.gemini.plugins.FileType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.net.ssl.SSLSocket;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Optional;
import java.util.stream.Stream;
public class GeminiConnection {
private final SSLSocket connection;
private final Logger logger = LoggerFactory.getLogger("Server");
private final OutputStream os;
private final InputStream is;
private static final BadBotList badBotList = BadBotList.INSTANCE;
public GeminiConnection(SSLSocket connection) throws IOException {
this.connection = connection;
this.os = connection.getOutputStream();
this.is = connection.getInputStream();
}
public String getAddress() {
return connection.getInetAddress().getHostAddress();
}
public Optional<URI> readUrl() throws Exception {
var str = new GeminiInput().get();
if (!badBotList.isQueryPermitted(connection.getInetAddress(), str)) {
return Optional.empty();
}
if (!str.isBlank()) {
return Optional.of(new URI(str));
}
throw new GeminiUserException("Bad URI");
}
public void redirect(String address) throws IOException {
writeStatusLine(GeminiStatusCode.REDIRECT, address);
}
public void redirectPermanent(String address) throws IOException {
writeStatusLine(GeminiStatusCode.REDIRECT_PERMANENT, address);
}
public GeminiConnection writeStatusLine(int code, String meta) throws IOException {
write(String.format("%2d %s", code, meta));
return this;
}
public GeminiConnection writeBytes(byte[] data) throws IOException {
write(data);
return this;
}
public GeminiConnection printf(String pattern, Object...args) throws IOException {
write(String.format(pattern, args));
return this;
}
public GeminiConnection writeLines(String... lines) throws IOException {
for (String s : lines) {
write(s);
}
return this;
}
public GeminiConnection writeLinesFromFile(Path file) throws IOException {
try (Stream<String> lines = Files.lines(file)) {
lines.forEach(line -> {
try {
write(line);
} catch (IOException e) {
logger.error("IO Error", e);
}
});
}
return this;
}
public GeminiConnection acceptLines(Stream<String> lines) {
lines.forEach(line -> {
try {
write(line);
} catch (IOException e) {
logger.error("IO exception", e);
}
});
return this;
}
private void write(String s) throws IOException {
os.write(s.getBytes(StandardCharsets.UTF_8));
os.write(new byte[] { '\r', '\n'});
}
private void write(byte[] bs) throws IOException {
os.write(bs);
}
// This is a weird pattern but it makes the listing code very much cleaner
public void error(String message) {
logger.error("{}", message);
throw new GeminiUserException(message);
}
public void close() {
try {
connection.shutdownOutput();
connection.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public boolean isConnected() {
return connection.isConnected();
}
public void respondWithFile(Path serverPath, FileType fileType) throws IOException {
if (fileType.binary) {
writeStatusLine(GeminiStatusCode.SUCCESS, fileType.mime)
.writeBytes(Files.readAllBytes(serverPath));
}
else {
writeStatusLine(GeminiStatusCode.SUCCESS, fileType.mime)
.writeLinesFromFile(serverPath);
}
}
public class GeminiInput {
private final byte[] buffer = new byte[1024];
private int idx = 0;
final String result;
public GeminiInput() throws IOException {
for (idx = 0; idx < buffer.length; idx++) {
if (hasEndOfLine()) {
result = new String(buffer, 0, idx-2, StandardCharsets.UTF_8);
return;
}
readCharacter();
}
error("String too long");
// unreachable
result = "";
}
public String get() {
return result;
}
private void readCharacter() throws IOException {
int rb = is.read();
if (-1 == rb) {
error("URL incomplete (no CR LF)");
}
buffer[idx] = (byte) rb;
}
public boolean hasEndOfLine() {
return idx > 2
&& buffer[idx - 1] == (byte) '\n'
&& buffer[idx - 2] == (byte) '\r';
}
}
}

View File

@ -1,49 +0,0 @@
package nu.marginalia.memex.gemini.io;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import javax.net.ssl.*;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.KeyStore;
import java.security.SecureRandom;
public class GeminiSSLSetUp {
private final Path certPasswordFile;
private final Path certFile;
@Inject
public GeminiSSLSetUp(
@Named("gemini-cert-file") Path certFile,
@Named("gemini-cert-password-file") Path certPasswordFile) {
this.certFile = certFile;
this.certPasswordFile = certPasswordFile;
}
public String getCertPassword() throws IOException {
return Files.readString(certPasswordFile);
}
private SSLContext getContext() throws Exception {
KeyStore ks = KeyStore.getInstance("JKS", "SUN");
ks.load(Files.newInputStream(certFile), getCertPassword().toCharArray());
KeyManagerFactory kmf = KeyManagerFactory.getInstance("SunX509");
kmf.init(ks, getCertPassword().toCharArray());
KeyManager[] keyManagers = kmf.getKeyManagers();
TrustManagerFactory tmf = TrustManagerFactory.getInstance("X509");
tmf.init(ks);
TrustManager[] trustManagers = tmf.getTrustManagers();
var ctx = SSLContext.getInstance("TLSv1.3");
ctx.init(keyManagers, trustManagers, new SecureRandom());
return ctx;
}
public SSLServerSocketFactory getServerSocketFactory() throws Exception {
return getContext().getServerSocketFactory();
}
}

View File

@ -1,11 +0,0 @@
package nu.marginalia.memex.gemini.io;
public class GeminiStatusCode {
public static final int INPUT = 10;
public static final int SUCCESS = 20;
public static final int ERROR_PERMANENT = 50;
public static final int ERROR_TEMPORARY = 40;
public static final int PROXY_ERROR = 43;
public static final int REDIRECT = 30;
public static final int REDIRECT_PERMANENT = 31;
}

View File

@ -1,8 +0,0 @@
package nu.marginalia.memex.gemini.io;
/** Throw to report message to user */
public class GeminiUserException extends RuntimeException {
public GeminiUserException(String message) {
super(message);
}
}

View File

@ -1,52 +0,0 @@
package nu.marginalia.memex.gemini.plugins;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import nu.marginalia.memex.gemini.GeminiService;
import nu.marginalia.memex.gemini.io.GeminiConnection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
public class BareStaticPagePlugin implements Plugin {
private final Logger logger = LoggerFactory.getLogger(getClass());
private final Path geminiServerRoot;
@Inject
public BareStaticPagePlugin(@Named("gemini-server-root") Path geminiServerRoot) {
this.geminiServerRoot = geminiServerRoot;
}
@Override
public boolean serve(URI url, GeminiConnection connection) throws IOException {
final Path serverPath = getServerPath(url.getPath());
if (!Files.isRegularFile(serverPath)) {
return false;
}
verifyPath(geminiServerRoot, serverPath);
logger.info("Serving {}", serverPath);
connection.respondWithFile(serverPath, FileType.match(serverPath));
return true;
}
private Path getServerPath(String requestPath) {
final Path serverPath = Path.of(geminiServerRoot + requestPath);
if (Files.isDirectory(serverPath) && Files.isRegularFile(serverPath.resolve(GeminiService.DEFAULT_FILENAME))) {
return serverPath.resolve(GeminiService.DEFAULT_FILENAME);
}
return serverPath;
}
}

View File

@ -1,58 +0,0 @@
package nu.marginalia.memex.gemini.plugins;
import java.nio.file.Path;
public enum FileType {
GMI("gmi", "text/gemini", FileIcons.DOCUMENT, false),
GEM("gem", "text/gemini", FileIcons.DOCUMENT, false),
TXT("txt", "text/plain", FileIcons.DOCUMENT, false),
MARKDOWN("md", "text/markdown", FileIcons.DOCUMENT, false),
JAVA("java", "text/java", FileIcons.JAVA, false),
PROPERTIES("properties", "text/properties", FileIcons.SETTINGS, false),
GRADLE("gradle", "text/gradle", FileIcons.SETTINGS, false),
ZIP("zip", "application/zip", FileIcons.ZIP, true),
PNG("png", "image/png", FileIcons.IMAGE, true),
JPG("jpg", "image/jpg", FileIcons.IMAGE, true),
JPEG("jpeg", "image/jpg", FileIcons.IMAGE, true),
BIN("bin", "application/binary", FileIcons.BINARY, true),
SH("sh", "text/sh", FileIcons.SETTINGS, false),
XML("xml", "text/xml", FileIcons.DOCUMENT, false),
DOCKERFILE("Dockerfile", "text/dockerfile", FileIcons.SETTINGS, false)
;
public static FileType match(String fileName) {
for (var type : values()) {
if (fileName.endsWith(type.suffix)) {
return type;
}
}
return BIN;
}
public static FileType match(Path path) {
return match(path.toString());
}
FileType(String suffix, String mime, String icon, boolean binary) {
this.suffix = suffix;
this.mime = mime;
this.icon = icon;
this.binary = binary;
}
public final String suffix;
public final String mime;
public final String icon;
public final boolean binary;
}
class FileIcons {
public static final String DOCUMENT = "🗒";
public static final String JAVA = "";
public static final String SETTINGS = "💻";
public static final String ZIP = "🗜";
public static final String IMAGE = "🖼";
public static final String DIRECTORY = "🗂";
public static final String BINARY = "📚";
}

View File

@ -1,19 +0,0 @@
package nu.marginalia.memex.gemini.plugins;
import nu.marginalia.memex.gemini.io.GeminiConnection;
import nu.marginalia.memex.gemini.io.GeminiUserException;
import java.io.IOException;
import java.net.URI;
import java.nio.file.Path;
public interface Plugin {
/** @return true if content served */
boolean serve(URI url, GeminiConnection connection) throws IOException;
default void verifyPath(Path root, Path p) {
if (!p.normalize().startsWith(root)) {
throw new GeminiUserException("ಠ_ಠ That path is off limits!");
}
}
}

View File

@ -1,78 +0,0 @@
package nu.marginalia.memex.gemini.plugins;
import com.google.inject.Inject;
import nu.marginalia.memex.gemini.io.GeminiConnection;
import nu.marginalia.memex.gemini.io.GeminiStatusCode;
import org.apache.http.HttpHost;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.conn.routing.HttpRoute;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class SearchPlugin implements Plugin {
private final PoolingHttpClientConnectionManager connectionManager;
private final Logger logger = LoggerFactory.getLogger(getClass());
@Inject
public SearchPlugin() {
connectionManager = new PoolingHttpClientConnectionManager();
connectionManager.setMaxTotal(200);
connectionManager.setDefaultMaxPerRoute(20);
HttpHost host = new HttpHost("https://search.marginalia.nu/");
connectionManager.setMaxPerRoute(new HttpRoute(host), 20);
}
@Override
public boolean serve(URI url, GeminiConnection connection) throws IOException {
var client = HttpClients.custom()
.setConnectionManager(connectionManager)
.build();
if (!"/search".equals(url.getPath())) {
return false;
}
String query = url.getRawQuery();
if (null == query || "".equals(query)) {
logger.info("Requesting search terms");
connection.writeStatusLine(GeminiStatusCode.INPUT, "Please enter a search query");
}
else {
logger.info("Delegating search query '{}'", query);
final HttpGet get = new HttpGet(createSearchUri(query));
final byte[] binaryResponse;
try (var rsp = client.execute(get)) {
binaryResponse = rsp.getEntity().getContent().readAllBytes();
}
catch (IOException ex) {
logger.error("backend error", ex);
connection.writeStatusLine(GeminiStatusCode.PROXY_ERROR, "Failed to reach backend server");
return true;
}
connection
.writeStatusLine(GeminiStatusCode.SUCCESS, "text/gemini")
.writeBytes(binaryResponse);
}
return true;
}
private URI createSearchUri(String query) {
try {
return new URI("https://search.marginalia.nu/search?format=gmi&query="+query);
} catch (URISyntaxException e) {
throw new RuntimeException(e);
}
}
}

View File

@ -1,244 +0,0 @@
package nu.marginalia.memex.memex;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.google.inject.name.Named;
import io.reactivex.rxjava3.schedulers.Schedulers;
import nu.marginalia.memex.gemini.GeminiService;
import nu.marginalia.memex.gemini.gmi.GemtextDatabase;
import nu.marginalia.memex.gemini.gmi.GemtextDocument;
import nu.marginalia.memex.util.dithering.FloydSteinbergDither;
import nu.marginalia.memex.util.dithering.Palettes;
import nu.marginalia.memex.memex.change.GemtextTombstoneUpdateCaclulator;
import nu.marginalia.memex.memex.model.MemexImage;
import nu.marginalia.memex.memex.model.MemexNode;
import nu.marginalia.memex.memex.model.MemexNodeUrl;
import nu.marginalia.memex.memex.renderer.MemexRendererers;
import nu.marginalia.memex.memex.system.MemexFileSystemMonitor;
import nu.marginalia.memex.memex.system.MemexFileWriter;
import nu.marginalia.memex.memex.system.git.MemexGitRepo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import javax.imageio.ImageIO;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
@Singleton
public class Memex {
private final MemexData data;
private final MemexFileSystemMonitor monitor;
private final MemexGitRepo gitRepo;
private final MemexLoader loader;
private final MemexFileWriter resources;
private final GemtextTombstoneUpdateCaclulator tombstoneUpdateCaclulator;
private final FloydSteinbergDither ditherer = new FloydSteinbergDither(Palettes.MARGINALIA_PALETTE, 640, 480);
private final MemexRendererers renderers;
private static final Logger logger = LoggerFactory.getLogger(Memex.class);
@Inject
public Memex(MemexData data,
@Nullable MemexFileSystemMonitor monitor,
MemexGitRepo gitRepo, MemexLoader loader,
@Named("html") MemexFileWriter htmlFiles,
GemtextTombstoneUpdateCaclulator tombstoneUpdateCaclulator,
MemexRendererers renderers,
GeminiService geminiService) {
this.data = data;
this.monitor = monitor;
this.gitRepo = gitRepo;
this.loader = loader;
this.resources = htmlFiles;
this.tombstoneUpdateCaclulator = tombstoneUpdateCaclulator;
this.renderers = renderers;
Schedulers.io().scheduleDirect(this::load);
if (monitor != null) {
Schedulers.io().schedulePeriodicallyDirect(this::refreshUpdatedUrls, 1, 1, TimeUnit.SECONDS);
}
Schedulers.newThread().scheduleDirect(geminiService::run);
}
private void refreshUpdatedUrls() {
var updatedUrls = monitor.getUpdatedUrls();
for (var url : updatedUrls) {
try {
if (url.toString().endsWith(".gmi")) {
var updates = loader.reloadNode(url);
updates.forEach(renderers::render);
if (!updates.isEmpty()) {
renderers.render(url.getParentUrl());
}
} else if (url.toString().endsWith(".png")) {
var updates = loader.reloadImage(url);
renderers.render(url);
if (!updates.isEmpty()) {
renderers.render(url.getParentUrl());
}
}
if (tombstoneUpdateCaclulator.isTombstoneFile(url)) {
loader.loadTombstones().forEach(renderers::render);
}
if (tombstoneUpdateCaclulator.isRedirectFile(url)) {
loader.loadRedirects().forEach(renderers::render);
}
}
catch (Exception ex) {
logger.error("Failed to refresh URL " + url, ex);
}
}
}
private void load() {
copyStylesheet();
try {
loader.load();
renderAll();
}
catch (IOException ex) {
logger.error("Failed to load", ex);
}
}
private void copyStylesheet() {
try (var resource = Objects.requireNonNull(
ClassLoader.getSystemResourceAsStream("static/memex/style-new.css"), "Could not load stylesheet")) {
resources.write(new MemexNodeUrl("/style-new.css"), resource.readAllBytes());
}
catch (Exception ex) {
logger.error("Failed to copy stylesheet", ex);
}
try (var resource = Objects.requireNonNull(
ClassLoader.getSystemResourceAsStream("static/memex/ico/dir.png"), "Could not copy file")) {
resources.write(new MemexNodeUrl("/ico/dir.png"), resource.readAllBytes());
}
catch (Exception ex) {
logger.error("Failed to copy file", ex);
}
try (var resource = Objects.requireNonNull(
ClassLoader.getSystemResourceAsStream("static/memex/ico/file.png"), "Could not copy file")) {
resources.write(new MemexNodeUrl("/ico/file.png"), resource.readAllBytes());
}
catch (Exception ex) {
logger.error("Failed to copy file", ex);
}
try (var resource = Objects.requireNonNull(
ClassLoader.getSystemResourceAsStream("static/memex/ico/root.png"), "Could not copy file")) {
resources.write(new MemexNodeUrl("/ico/root.png"), resource.readAllBytes());
}
catch (Exception ex) {
logger.error("Failed to copy file", ex);
}
try (var resource = Objects.requireNonNull(
ClassLoader.getSystemResourceAsStream("static/memex/ico/pic16.png"), "Could not copy file")) {
resources.write(new MemexNodeUrl("/ico/pic16.png"), resource.readAllBytes());
}
catch (Exception ex) {
logger.error("Failed to copy file", ex);
}
}
private void renderAll() {
data.forEach((url, doc) -> {
renderers.render(url);
});
data.getDirectories().forEach(renderers::render);
data.getImages().forEach(img -> renderers.render(img.path));
data.getTombstones().ifPresent(this::renderTombstoneFromGemtextDb);
data.getRedirects().ifPresent(this::renderTombstoneFromGemtextDb);
}
private void renderTombstoneFromGemtextDb(GemtextDatabase db) {
db.keys()
.stream()
.map(MemexNodeUrl::new)
.filter(url -> getDocument(url) == null)
.forEach(renderers::render);
}
public void updateNode(MemexNodeUrl node, String text) throws IOException {
var nodes = loader.updateNode(node, text);
nodes.forEach(renderers::render);
renderers.render(node.getParentUrl());
}
public GemtextDocument getDocument(MemexNodeUrl url) {
return data.getDocument(url);
}
public MemexImage getImage(MemexNodeUrl url) {
return data.getImage(url);
}
public void createNode(MemexNodeUrl node, String text) throws IOException {
var nodes = loader.createNode(node, text);
nodes.forEach(renderers::render);
renderers.render(node.getParentUrl());
}
public void uploadImage(MemexNodeUrl url, byte[] bytes) throws IOException {
var image = ImageIO.read(new ByteArrayInputStream(bytes));
var convertedImage = ditherer.convert(image);
var baosOut = new ByteArrayOutputStream();
ImageIO.write(convertedImage, "png", baosOut);
loader.uploadImage(url, baosOut.toByteArray());
renderers.render(url);
renderers.render(url.getParentUrl());
}
public void delete(MemexNode node, String message) throws IOException {
tombstoneUpdateCaclulator.addTombstone(node.getUrl(), message)
.visit(this);
loader.loadTombstones();
loader.delete(node).forEach(renderers::render);
}
public List<GemtextDocument> getDocumentsByPath(MemexNodeUrl url) {
return data.getDocumentsByPath(url);
}
public void gitPull() {
gitRepo.pull();
}
public void rename(MemexNode src, MemexNodeUrl dst) throws IOException {
tombstoneUpdateCaclulator.addRedirect(src.getUrl(), dst.toString())
.visit(this);
loader.loadRedirects();
loader.rename(src, dst).forEach(renderers::render);
}
public byte[] getRaw(MemexNodeUrl url) throws IOException {
return loader.getRaw(url);
}
}

View File

@ -1,87 +0,0 @@
package nu.marginalia.memex.memex;
import com.google.inject.AbstractModule;
import com.google.inject.Inject;
import com.google.inject.Provider;
import com.google.inject.name.Named;
import com.google.inject.name.Names;
import lombok.SneakyThrows;
import nu.marginalia.memex.gemini.GeminiService;
import nu.marginalia.memex.gemini.GeminiServiceDummy;
import nu.marginalia.memex.gemini.GeminiServiceImpl;
import nu.marginalia.memex.memex.system.MemexFileWriter;
import nu.marginalia.memex.memex.system.git.MemexGitRepo;
import nu.marginalia.memex.memex.system.git.MemexGitRepoDummy;
import nu.marginalia.memex.memex.system.git.MemexGitRepoImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.file.Path;
public class MemexConfigurationModule extends AbstractModule {
private static final Logger logger = LoggerFactory.getLogger(MemexConfigurationModule.class);
private static final String MEMEX_ROOT_PROPERTY = System.getProperty("memex-root", "/var/lib/wmsa/memex");
private static final String MEMEX_HTML_PROPERTY = System.getProperty("memex-html-resources", "/var/lib/wmsa/memex-html");
private static final String MEMEX_GMI_PROPERTY = System.getProperty("memex-gmi-resources", "/var/lib/wmsa/memex-gmi");
private static final boolean MEMEX_DISABLE_GIT = Boolean.getBoolean("memex-disable-git");
private static final boolean MEMEX_DISABLE_GEMINI = Boolean.getBoolean("memex-disable-gemini");
@SneakyThrows
public MemexConfigurationModule() {
Thread.sleep(100);
}
public void configure() {
bind(Path.class).annotatedWith(Names.named("memex-root")).toInstance(Path.of(MEMEX_ROOT_PROPERTY));
bind(Path.class).annotatedWith(Names.named("memex-html-resources")).toInstance(Path.of(MEMEX_HTML_PROPERTY));
bind(Path.class).annotatedWith(Names.named("memex-gmi-resources")).toInstance(Path.of(MEMEX_GMI_PROPERTY));
bind(String.class).annotatedWith(Names.named("tombestone-special-file")).toInstance("/special/tombstone.gmi");
bind(String.class).annotatedWith(Names.named("redirects-special-file")).toInstance("/special/redirect.gmi");
switchImpl(MemexGitRepo.class, MEMEX_DISABLE_GIT, MemexGitRepoDummy.class, MemexGitRepoImpl.class);
switchImpl(GeminiService.class, MEMEX_DISABLE_GEMINI, GeminiServiceDummy.class, GeminiServiceImpl.class);
bind(MemexFileWriter.class).annotatedWith(Names.named("html")).toProvider(MemexHtmlWriterProvider.class);
bind(MemexFileWriter.class).annotatedWith(Names.named("gmi")).toProvider(MemexGmiWriterProvider.class);
}
<T> void switchImpl(Class<T> impl, boolean param, Class<? extends T> ifEnabled, Class<? extends T> ifDisabled) {
final Class<? extends T> choice;
if (param) {
choice = ifEnabled;
}
else {
choice = ifDisabled;
}
bind(impl).to(choice).asEagerSingleton();
}
public static class MemexHtmlWriterProvider implements Provider<MemexFileWriter> {
private final Path path;
@Inject
public MemexHtmlWriterProvider(@Named("memex-html-resources") Path resources) {
this.path = resources;
}
@Override
public MemexFileWriter get() {
return new MemexFileWriter(path);
}
}
public static class MemexGmiWriterProvider implements Provider<MemexFileWriter> {
private final Path path;
@Inject
public MemexGmiWriterProvider(@Named("memex-gmi-resources") Path resources) {
this.path = resources;
}
@Override
public MemexFileWriter get() {
return new MemexFileWriter(path);
}
}
}

View File

@ -1,150 +0,0 @@
package nu.marginalia.memex.memex;
import com.google.inject.Singleton;
import nu.marginalia.memex.gemini.gmi.GemtextDatabase;
import nu.marginalia.memex.gemini.gmi.GemtextDocument;
import nu.marginalia.memex.memex.model.MemexImage;
import nu.marginalia.memex.memex.model.MemexLink;
import nu.marginalia.memex.memex.model.MemexNodeUrl;
import nu.marginalia.memex.memex.model.fs.MemexFileSystem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.function.BiConsumer;
@Singleton
public class MemexData {
private final MemexLinks links = new MemexLinks();
private final Map<MemexNodeUrl, GemtextDocument> documents = new HashMap<>();
private final Map<MemexNodeUrl, MemexImage> images = new HashMap<>();
private final MemexFileSystem fileSystem = new MemexFileSystem();
private final Logger logger = LoggerFactory.getLogger(getClass());
private GemtextDatabase tombstones = null;
private GemtextDatabase redirects = null;
public synchronized Collection<MemexImage> getImages() {
return new ArrayList<>(images.values());
}
public synchronized Collection<GemtextDocument> getDocuments() { return new ArrayList<>(documents.values()); }
public synchronized void setTombstones(GemtextDatabase tombstones) {
this.tombstones = tombstones;
}
public synchronized void setRedirects(GemtextDatabase redirects) {
this.redirects = redirects;
}
public synchronized void addDocument(MemexNodeUrl url, GemtextDocument doc) {
logger.debug("addDocument({})", url);
documents.put(url, doc);
fileSystem.register(doc);
}
public synchronized void addImage(MemexNodeUrl url, MemexImage img) {
images.put(url, img);
fileSystem.register(img);
}
public Optional<GemtextDatabase> getTombstones() {
return Optional.ofNullable(tombstones);
}
public Optional<GemtextDatabase> getRedirects() {
return Optional.ofNullable(redirects);
}
public synchronized void updateOutlinks(MemexNodeUrl url, GemtextDocument doc) {
var linksForNode = new TreeSet<>(Comparator.comparing(MemexLink::getDest));
MemexNodeUrl srcUrl = "index.gmi".equals(url.getFilename()) ? url.getParentUrl() : url;
for (var link : doc.getLinks()) {
link.getUrl().visitNodeUrl(nodeUrl ->
linksForNode.add(new MemexLink(nodeUrl, srcUrl, doc.getTitle(), doc.getHeadingForElement(link), link.getHeading()))
);
}
links.setOutlinks(srcUrl, linksForNode);
}
public synchronized Set<MemexNodeUrl> getNeighbors(MemexNodeUrl url) {
return links.getNeighbors(url);
}
public synchronized void forEach(BiConsumer<MemexNodeUrl, GemtextDocument> consumer) {
documents.forEach(consumer);
}
public synchronized GemtextDocument getDocument(MemexNodeUrl url) {
return documents.get(url);
}
public synchronized MemexImage getImage(MemexNodeUrl url) {
return images.get(url);
}
public synchronized List<MemexLink> getBacklinks(MemexNodeUrl... urls) {
return links.getBacklinks(urls);
}
public synchronized List<GemtextDocument> getDocumentsByPath(MemexNodeUrl url) {
return fileSystem.getDocuments(url);
}
public synchronized List<MemexImage> getImagesByPath(MemexNodeUrl url) {
return fileSystem.getImages(url);
}
public synchronized List<MemexNodeUrl> getSubdirsByPath(MemexNodeUrl url) {
return fileSystem.getSubdirs(url);
}
public MemexFileSystem getFilesystem() {
return fileSystem;
}
public List<MemexNodeUrl> getDirectories() {
return fileSystem.getAllDirectories();
}
public boolean isDirectory(MemexNodeUrl url) {
return fileSystem.isDirectory(url);
}
public synchronized Set<MemexNodeUrl> deleteImage(MemexNodeUrl url) {
images.remove(url);
fileSystem.remove(url);
Set<MemexNodeUrl> affectedUrls = new HashSet<>();
affectedUrls.add(url);
affectedUrls.add(url.getParentUrl());
return affectedUrls;
}
public synchronized Set<MemexNodeUrl> deleteDocument(MemexNodeUrl url) {
Set<MemexNodeUrl> affectedUrls = new HashSet<>();
affectedUrls.add(url);
affectedUrls.add(url.getParentUrl());
links.getOutlinks(url)
.stream()
.map(MemexLink::getDest)
.forEach(affectedUrls::add);
documents.remove(url);
fileSystem.remove(url);
links.remove(url);
return affectedUrls;
}
public boolean hasTombstone(MemexNodeUrl url) {
if (tombstones != null && tombstones.getLinkData(url).isPresent())
return true;
if (redirects != null && redirects.getLinkData(url).isPresent())
return true;
return false;
}
}

View File

@ -1,54 +0,0 @@
package nu.marginalia.memex.memex;
import nu.marginalia.memex.memex.model.MemexLink;
import nu.marginalia.memex.memex.model.MemexNodeUrl;
import java.util.*;
import java.util.stream.Collectors;
public class MemexLinks {
private Map<MemexNodeUrl, List<MemexLink>> backLinks = new HashMap<>();
private final Map<MemexNodeUrl, Set<MemexLink>> links = new HashMap<>();
public void updateBacklinks() {
backLinks.clear();
backLinks = links.values().stream()
.flatMap(Set::stream)
.collect(Collectors.groupingBy(MemexLink::getDest));
}
public Set<MemexNodeUrl> getNeighbors(MemexNodeUrl url) {
final Set<MemexNodeUrl> neighbors = new HashSet<>();
links.getOrDefault(url, Collections.emptySet()).stream().map(MemexLink::getDest)
.forEach(neighbors::add);
backLinks.getOrDefault(url, Collections.emptyList()).stream()
.map(MemexLink::getSrc)
.forEach(neighbors::add);
return neighbors;
}
public void setOutlinks(MemexNodeUrl url, TreeSet<MemexLink> linksForNode) {
links.put(url, linksForNode);
updateBacklinks();
}
public List<MemexLink> getBacklinks(MemexNodeUrl... urls) {
return Arrays.stream(urls)
.map(backLinks::get)
.filter(Objects::nonNull)
.flatMap(List::stream)
.sorted(Comparator.comparing(MemexLink::getSrc))
.collect(Collectors.toList());
}
public Set<MemexLink> getOutlinks(MemexNodeUrl url) {
return links.getOrDefault(url, Collections.emptySet());
}
public void remove(MemexNodeUrl url) {
links.remove(url);
updateBacklinks();
}
}

View File

@ -1,265 +0,0 @@
package nu.marginalia.memex.memex;
import com.google.common.collect.Sets;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import nu.marginalia.memex.gemini.gmi.GemtextDatabase;
import nu.marginalia.memex.gemini.gmi.GemtextDocument;
import nu.marginalia.memex.memex.model.MemexImage;
import nu.marginalia.memex.memex.model.MemexNode;
import nu.marginalia.memex.memex.model.MemexNodeUrl;
import nu.marginalia.memex.memex.system.MemexFileSystemModifiedTimes;
import nu.marginalia.memex.memex.system.MemexSourceFileSystem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.CheckReturnValue;
import java.io.File;
import java.io.IOException;
import java.nio.file.*;
import java.util.*;
public class MemexLoader {
private final MemexData data;
private final MemexFileSystemModifiedTimes modifiedTimes;
private final Path root;
private final MemexSourceFileSystem sourceFileSystem;
private final String tombstonePath;
private final String redirectsPath;
private static final Logger logger = LoggerFactory.getLogger(MemexLoader.class);
@Inject
public MemexLoader(MemexData data,
MemexFileSystemModifiedTimes modifiedTimes,
MemexSourceFileSystem sourceFileSystem,
@Named("memex-root") Path root,
@Named("tombestone-special-file") String tombstonePath,
@Named("redirects-special-file") String redirectsPath) {
this.data = data;
this.modifiedTimes = modifiedTimes;
this.sourceFileSystem = sourceFileSystem;
this.root = root;
this.tombstonePath = tombstonePath;
this.redirectsPath = redirectsPath;
}
public void load() throws IOException {
loadTombstones();
loadRedirects();
try (var files = Files.walk(root)) {
files.forEach(this::loadFile);
}
data.getFilesystem().recalculateDirectories();
}
private void loadFile(Path p) {
var file = p.toFile();
try {
if (p.toString().contains(".git")) {
return;
}
if (file.isDirectory() && !file.getName().startsWith(".")) {
data.getFilesystem().registerDir(MemexNodeUrl.ofRelativePath(root, p));
} else if (isGemtext(file)) {
loadNode(p);
} else if (isImage(file)) {
loadImage(p);
}
}
catch (IOException ex) {
logger.error("Failed to load file " + p, ex);
}
}
public void loadImage(Path p) throws IOException {
if (!modifiedTimes.isFreshUpdate(p)) {
return;
}
var url = MemexNodeUrl.ofRelativePath(root, p);
data.addImage(url, new MemexImage(url, p));
logger.info("Loading {}", p);
}
public Set<MemexNodeUrl> loadTombstones() {
var oldValues = data.getTombstones();
var newValues = loadGemtextDb(Path.of(root + tombstonePath));
newValues.ifPresent(data::setTombstones);
if (newValues.isPresent()) {
if (oldValues.isPresent()) {
var oldTs = oldValues.get();
var newTs = newValues.get();
return oldTs.difference(newTs);
}
}
return Collections.emptySet();
}
public Set<MemexNodeUrl> loadRedirects() {
var oldValues = data.getTombstones();
var newValues = loadGemtextDb(Path.of(root + redirectsPath));
newValues.ifPresent(data::setRedirects);
if (newValues.isPresent()) {
if (oldValues.isPresent()) {
var oldTs = oldValues.get();
var newTs = newValues.get();
return oldTs.difference(newTs);
}
}
return Collections.emptySet();
}
private Optional<GemtextDatabase> loadGemtextDb(Path p) {
if (Files.exists(p)) {
try {
return Optional.of(GemtextDatabase.of(MemexNodeUrl.ofRelativePath(root, p), p));
} catch (IOException e) {
logger.error("Failed to load database " + p, e);
}
}
return Optional.empty();
}
private boolean isGemtext(File f) {
return f.isFile() && f.getName().endsWith(".gmi");
}
private boolean isImage(File f) {
return f.isFile() && f.getName().endsWith(".png");
}
@CheckReturnValue
public Collection<MemexNodeUrl> updateNode(MemexNodeUrl url, String contents) throws IOException {
sourceFileSystem.replaceFile(url, contents);
return loadNode(url);
}
@CheckReturnValue
public Collection<MemexNodeUrl> createNode(MemexNodeUrl url, String contents) throws IOException {
sourceFileSystem.createFile(url, contents);
return loadNode(url);
}
public MemexImage uploadImage(MemexNodeUrl url, byte[] bytes) throws IOException {
sourceFileSystem.createFile(url, bytes);
var img = new MemexImage(url, url.asAbsolutePath(root));
data.addImage(url, img);
return img;
}
public Set<MemexNodeUrl> reloadImage(MemexNodeUrl url) throws IOException {
var path = url.asAbsolutePath(root);
if (!Files.exists(path)) {
return data.deleteImage(url);
}
else {
loadImage(path);
Set<MemexNodeUrl> affectedUrls = new HashSet<>();
affectedUrls.add(url);
for (var u = url.getParentUrl(); u != null; u = u.getParentUrl()) {
affectedUrls.add(u);
}
return affectedUrls;
}
}
public Set<MemexNodeUrl> reloadNode(MemexNodeUrl url) throws IOException {
var path = url.asAbsolutePath(root);
if (!Files.exists(path)) {
return data.deleteDocument(url);
}
else {
return loadNode(path);
}
}
public Set<MemexNodeUrl> loadNode(Path path) throws IOException {
if (!modifiedTimes.isFreshUpdate(path)) {
return Set.of(MemexNodeUrl.ofRelativePath(root, path));
}
logger.info("Loading {}", path);
return loadNode(MemexNodeUrl.ofRelativePath(root, path));
}
public Set<MemexNodeUrl> loadNode(MemexNodeUrl url) throws IOException {
var doc = GemtextDocument.of(url, url.asAbsolutePath(root));
data.addDocument(url, doc);
Set<MemexNodeUrl> urlsAffected = data.getNeighbors(url);
data.updateOutlinks(url, doc);
urlsAffected.addAll(data.getNeighbors(url));
urlsAffected.add(url);
urlsAffected.removeIf(u -> null == data.getDocument(u));
for (var u = url.getParentUrl(); u != null; u = u.getParentUrl()) {
urlsAffected.add(u);
}
return urlsAffected;
}
public Set<MemexNodeUrl> delete(MemexNode node) throws IOException {
sourceFileSystem.delete(node.getUrl());
return node.visit(new MemexNode.MemexNodeVisitor<>() {
@Override
public Set<MemexNodeUrl> onDocument(MemexNodeUrl url) {
return data.deleteDocument(url);
}
@Override
public Set<MemexNodeUrl> onImage(MemexNodeUrl url) {
return data.deleteImage(url);
}
});
}
public Set<MemexNodeUrl> rename(MemexNode src, MemexNodeUrl dst) throws IOException {
sourceFileSystem.renameFile(src.getUrl(), dst);
return src.visit(new MemexNode.MemexNodeVisitor<Set<MemexNodeUrl>>() {
@Override
public Set<MemexNodeUrl> onDocument(MemexNodeUrl url) throws IOException {
var changes = data.deleteDocument(url);
return Sets.union(changes, reloadNode(dst));
}
@Override
public Set<MemexNodeUrl> onImage(MemexNodeUrl url) throws IOException {
var changes = data.deleteImage(url);
return Sets.union(changes, reloadImage(dst));
}
});
}
public byte[] getRaw(MemexNodeUrl url) throws IOException {
return sourceFileSystem.getRaw(url);
}
}

View File

@ -1,31 +0,0 @@
package nu.marginalia.memex.memex;
import com.google.inject.Guice;
import com.google.inject.Inject;
import com.google.inject.Injector;
import nu.marginalia.memex.MemexServiceDescriptors;
import nu.marginalia.memex.gemini.GeminiConfigurationModule;
import nu.marginalia.service.MainClass;
import nu.marginalia.service.id.ServiceId;
import nu.marginalia.service.module.ConfigurationModule;
import nu.marginalia.service.server.Initialization;
public class MemexMain extends MainClass {
private final MemexService service;
@Inject
public MemexMain(MemexService service) {
this.service = service;
}
public static void main(String... args) {
MainClass.init(ServiceId.Other_Memex, args);
Injector injector = Guice.createInjector(
new MemexConfigurationModule(),
new GeminiConfigurationModule(),
new ConfigurationModule(MemexServiceDescriptors.descriptors, ServiceId.Other_Memex));
injector.getInstance(MemexMain.class);
injector.getInstance(Initialization.class).setReady();
}
}

View File

@ -1,292 +0,0 @@
package nu.marginalia.memex.memex;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import lombok.SneakyThrows;
import nu.marginalia.client.Context;
import nu.marginalia.memex.gemini.gmi.GemtextDocument;
import nu.marginalia.memex.gemini.gmi.renderer.GemtextRendererFactory;
import nu.marginalia.memex.auth.client.AuthClient;
import nu.marginalia.memex.memex.model.render.*;
import nu.marginalia.memex.memex.change.GemtextMutation;
import nu.marginalia.memex.memex.change.update.GemtextDocumentUpdateCalculator;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import nu.marginalia.memex.memex.model.MemexNodeUrl;
import nu.marginalia.memex.memex.renderer.MemexHtmlRenderer;
import nu.marginalia.service.server.Initialization;
import nu.marginalia.service.server.MetricsServer;
import nu.marginalia.service.server.Service;
import org.apache.http.HttpStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import spark.Request;
import spark.Response;
import spark.Spark;
import javax.servlet.MultipartConfigElement;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.Collections;
import java.util.Objects;
import static spark.Spark.*;
public class MemexService extends Service {
private final GemtextDocumentUpdateCalculator updateCalculator;
private final Memex memex;
private final MemexHtmlRenderer renderer;
private final AuthClient authClient;
private final Logger logger = LoggerFactory.getLogger(getClass());
@Inject
public MemexService(@Named("service-host") String ip,
@Named("service-port") Integer port,
GemtextDocumentUpdateCalculator updateCalculator,
Memex memex,
MemexHtmlRenderer renderer,
AuthClient authClient,
Initialization initialization,
MetricsServer metricsServer,
@Named("memex-html-resources") Path memexHtmlDir
) {
super(ip, port, initialization, metricsServer, () -> {
staticFiles.externalLocation(memexHtmlDir.toString());
staticFiles.disableMimeTypeGuessing();
staticFiles.registerMimeType("gmi", "text/html");
staticFiles.registerMimeType("png", "text/html");
staticFiles.expireTime(60);
staticFiles.header("Cache-control", "public,proxy-revalidate");
});
this.updateCalculator = updateCalculator;
this.memex = memex;
this.renderer = renderer;
this.authClient = authClient;
Spark.get("git-pull", this::gitPull);
Spark.path("public/api", () -> {
before((req, rsp) -> {
logger.info("{} {}", req.requestMethod(), req.pathInfo());
});
after((req, rsp) -> {
rsp.header("Cache-control", "no-cache");
});
post("/create", this::create);
get("/create", this::createForm, this::renderModel);
post("/upload", this::upload);
get("/upload", this::uploadForm, this::renderModel);
post("/update", this::update);
get("/update", this::updateForm, this::renderModel);
post("/rename", this::rename);
get("/rename", this::renameForm, this::renderModel);
post("/delete", this::delete);
get("/delete", this::deleteForm, this::renderModel);
get("/raw", this::raw);
});
}
private Object raw(Request request, Response response) throws IOException {
final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url")));
response.type(url.toNode().getType().mime);
response.header("Content-Disposition", "attachment; filename=" + url.getFilename());
response.raw().getOutputStream().write(memex.getRaw(url));
return "";
}
private Object renameForm(Request request, Response response) {
final String type = Objects.requireNonNull(request.queryParams("type"));
final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url")));
authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response);
if ("gmi".equals(type)) {
var doc = memex.getDocument(url);
if (null == doc) {
Spark.halt(404);
}
final String docHtml = doc.render(new GemtextRendererFactory("", url.toString()).htmlRendererEditable());
return new MemexRendererRenameFormModel(docHtml,
null, url, "gmi");
}
else if ("img".equals(type)) {
var img = memex.getImage(url);
if (null == img) {
Spark.halt(404);
}
return new MemexRendererRenameFormModel(null,
new MemexRendererImageModel(img, Collections.emptyList(), null),
url, "img");
}
Spark.halt(HttpStatus.SC_BAD_REQUEST);
return null;
}
private Object rename(Request request, Response response) throws IOException {
authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response);
var url = Objects.requireNonNull(request.queryParams("url"));
var name = Objects.requireNonNull(request.queryParams("name"));
var type = Objects.requireNonNull(request.queryParams("type"));
var confirm = Objects.requireNonNull(request.queryParams("confirm"));
if (!"on".equals(confirm)) {
logger.error("Confirm dialog not checked, was {}", confirm);
Spark.halt(HttpStatus.SC_BAD_REQUEST, "Confirm was not checked");
}
memex.rename(new MemexNodeUrl(url).toNode(), new MemexNodeUrl(name));
response.redirect("https://memex.marginalia.nu/"+name);
return null;
}
private Object gitPull(Request request, Response response) {
logger.info("Git pull by request");
memex.gitPull();
return "Ok";
}
private String renderModel(Object model) {
return ((MemexRendererableDirect)model).render(renderer);
}
private MemexRendererDeleteFormModel deleteForm(Request request, Response response) {
final String type = Objects.requireNonNull(request.queryParams("type"));
final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url")));
authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response);
if ("gmi".equals(type)) {
var doc = memex.getDocument(url);
if (null == doc) {
Spark.halt(404);
}
final String docHtml = doc.render(new GemtextRendererFactory("", url.toString()).htmlRendererEditable());
return new MemexRendererDeleteFormModel(docHtml,
null, url, "gmi");
}
else if ("img".equals(type)) {
var img = memex.getImage(url);
if (null == img) {
Spark.halt(404);
}
return new MemexRendererDeleteFormModel(null,
new MemexRendererImageModel(img, Collections.emptyList(), null),
url, "img");
}
Spark.halt(HttpStatus.SC_BAD_REQUEST);
return null;
}
private Object delete(Request request, Response response) throws IOException {
authClient.requireLogIn(Context.fromRequest(request));
var url = Objects.requireNonNull(request.queryParams("url"));
var message = Objects.requireNonNull(request.queryParams("note"));
var type = Objects.requireNonNull(request.queryParams("type"));
var confirm = Objects.requireNonNull(request.queryParams("confirm"));
if (!"on".equals(confirm)) {
logger.error("Confirm dialog not checked, was {}", confirm);
Spark.halt(HttpStatus.SC_BAD_REQUEST, "Confirm was not checked");
}
memex.delete(new MemexNodeUrl(url).toNode(), message);
response.redirect("https://memex.marginalia.nu/"+url);
return null;
}
private Object update(Request request, Response response) throws IOException {
authClient.requireLogIn(Context.fromRequest(request));
String extUrl = Objects.requireNonNull(request.queryParams("url"));
String extSection = Objects.requireNonNull(request.queryParams("section"));
String newSectionText = Objects.requireNonNull(request.queryParams("text"));
var url = new MemexNodeUrl(extUrl);
var section = MemexNodeHeadingId.parse(extSection);
var lines = Arrays.asList(newSectionText.split("\r?\n")).toArray(String[]:: new);
var sectionGemtext = new GemtextDocument(url, lines, section);
var updates = updateCalculator.calculateUpdates(memex.getDocument(url), section, sectionGemtext);
for (GemtextMutation mutation : updates) {
mutation.visit(memex);
}
response.redirect("https://memex.marginalia.nu/"+extUrl);
return "";
}
private Object create(Request request, Response response) throws IOException {
authClient.requireLogIn(Context.fromRequest(request));
String directory = Objects.requireNonNull(request.queryParams("directory"));
String filename = Objects.requireNonNull(request.queryParams("filename"));
String text = Objects.requireNonNull(request.queryParams("text"));
var url = new MemexNodeUrl(Path.of(directory).resolve(filename).toString());
memex.createNode(url, text);
response.redirect("https://memex.marginalia.nu/"+directory + "/" + filename);
return "";
}
private Object createForm(Request request, Response response) {
final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url")));
authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response);
return new MemexRenderCreateFormModel(url, memex.getDocumentsByPath(url));
}
private Object uploadForm(Request request, Response response) {
final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url")));
authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response);
return new MemexRenderUploadFormModel(url, memex.getDocumentsByPath(url));
}
private Object updateForm(Request request, Response response) {
final MemexNodeUrl url = new MemexNodeUrl(Objects.requireNonNull(request.queryParams("url")));
authClient.redirectToLoginIfUnauthenticated("MEMEX", request, response);
var doc = memex.getDocument(url);
return new MemexRenderUpdateFormModel(url, doc.getTitle(), "0", doc.getSectionGemtext(MemexNodeHeadingId.ROOT));
}
@SneakyThrows
private Object upload(Request request, Response response) {
authClient.requireLogIn(Context.fromRequest(request));
request.attribute("org.eclipse.jetty.multipartConfig", new MultipartConfigElement("/temp", 50*1024*1024, 50*1024*1024, 25*1024*1024));
String directory = Objects.requireNonNull(request.queryParams("directory"));
String filename = Objects.requireNonNull(request.queryParams("filename"));
var url = new MemexNodeUrl(Path.of(directory).resolve(filename).toString());
try (InputStream input = request.raw().getPart("file").getInputStream()) {
byte[] data = input.readAllBytes();
memex.uploadImage(url, data);
}
response.redirect("https://memex.marginalia.nu/"+directory + "/" + filename);
return "";
}
}

View File

@ -1,70 +0,0 @@
package nu.marginalia.memex.memex.change;
import lombok.AllArgsConstructor;
import lombok.ToString;
import nu.marginalia.memex.memex.Memex;
import nu.marginalia.memex.gemini.gmi.GemtextDocument;
import nu.marginalia.memex.gemini.gmi.renderer.GemtextRendererFactory;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import nu.marginalia.memex.memex.model.MemexNodeUrl;
import java.io.IOException;
@AllArgsConstructor @ToString
public class GemtextAppend implements GemtextMutation {
public final MemexNodeUrl doc;
public final MemexNodeHeadingId id;
public final String[] lines;
@Override
public void visit(Memex memex) throws IOException {
memex.updateNode(doc, calculateAppend(memex.getDocument(doc)));
}
public String calculateAppend(GemtextDocument document) {
StringBuilder result = new StringBuilder();
var renderer = new GemtextRendererFactory().gemtextRendererAsIs();
var lines = document.getLines();
int i = 0;
// Copy from before heading
for (; i < lines.length; i++) {
var item = lines[i];
if (item.getHeading().isChildOf(id)) {
break;
}
else {
result.append(item.visit(renderer)).append('\n');
}
}
// Copy contents of heading
for (; i < lines.length; i++) {
var item = lines[i];
if (!item.getHeading().isChildOf(id)) {
break;
}
else {
result.append(item.visit(renderer)).append('\n');
}
}
// Insert new lines
for (String newLine : this.lines) {
result.append(newLine).append('\n');
}
// Copy contents from after heading
for (;i < lines.length; i++) {
var item = lines[i];
result.append(item.visit(renderer)).append('\n');
}
return result.toString();
}
}

View File

@ -1,19 +0,0 @@
package nu.marginalia.memex.memex.change;
import lombok.AllArgsConstructor;
import lombok.ToString;
import nu.marginalia.memex.memex.Memex;
import nu.marginalia.memex.memex.model.MemexNodeUrl;
import java.io.IOException;
@AllArgsConstructor @ToString
public class GemtextCreate implements GemtextMutation {
public final MemexNodeUrl doc;
public final String text;
@Override
public void visit(Memex memex) throws IOException {
memex.createNode(doc, text);
}
}

View File

@ -1,26 +0,0 @@
package nu.marginalia.memex.memex.change;
import lombok.AllArgsConstructor;
import lombok.ToString;
import nu.marginalia.memex.memex.Memex;
import nu.marginalia.memex.memex.model.MemexNodeUrl;
import java.io.IOException;
@AllArgsConstructor @ToString
public class GemtextCreateOrMutate implements GemtextMutation {
public final MemexNodeUrl doc;
public final String text;
public final GemtextMutation mutation;
@Override
public void visit(Memex memex) throws IOException {
if (memex.getDocument(doc) == null) {
memex.createNode(doc, text);
}
if (memex.getDocument(doc) == null)
throw new IllegalStateException();
mutation.visit(memex);
}
}

View File

@ -1,18 +0,0 @@
package nu.marginalia.memex.memex.change;
import nu.marginalia.memex.memex.Memex;
import nu.marginalia.memex.memex.model.MemexNodeHeadingId;
import nu.marginalia.memex.memex.model.MemexNodeUrl;
import java.io.IOException;
public interface GemtextMutation {
void visit(Memex memex) throws IOException;
static GemtextMutation createOrAppend(MemexNodeUrl url, String template, MemexNodeHeadingId heading, String... lines) {
return new GemtextCreateOrMutate(url, template, new GemtextAppend(url, heading, lines));
}
static GemtextMutation createOrPrepend(MemexNodeUrl url, String template, MemexNodeHeadingId heading, String... lines) {
return new GemtextCreateOrMutate(url, template, new GemtextPrepend(url, heading, lines));
}
}

Some files were not shown because too many files have changed in this diff Show More