Preparation for conversion

This commit is contained in:
vlofgren 2022-09-02 14:51:11 +02:00
parent a04d27692e
commit ccf79f47b0
10 changed files with 85 additions and 82 deletions

View File

@ -7,7 +7,7 @@ import nu.marginalia.util.language.conf.LanguageModels;
import nu.marginalia.util.language.processing.SentenceExtractor; import nu.marginalia.util.language.processing.SentenceExtractor;
import nu.marginalia.util.language.processing.model.DocumentLanguageData; import nu.marginalia.util.language.processing.model.DocumentLanguageData;
import nu.marginalia.wmsa.configuration.WmsaHome; import nu.marginalia.wmsa.configuration.WmsaHome;
import nu.marginalia.wmsa.edge.converting.processor.logic.DomPruner; import nu.marginalia.wmsa.edge.converting.processor.logic.DomPruningFilter;
import nu.marginalia.wmsa.edge.crawling.CrawlPlanLoader; import nu.marginalia.wmsa.edge.crawling.CrawlPlanLoader;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
@ -87,7 +87,6 @@ public class TermFrequencyDict {
var plan = new CrawlPlanLoader().load(Path.of(args[0])); var plan = new CrawlPlanLoader().load(Path.of(args[0]));
ThreadLocal<SentenceExtractor> se = ThreadLocal.withInitial(() -> new SentenceExtractor(WmsaHome.getLanguageModels())); ThreadLocal<SentenceExtractor> se = ThreadLocal.withInitial(() -> new SentenceExtractor(WmsaHome.getLanguageModels()));
DomPruner pruner = new DomPruner();
LanguageFilter lf = new LanguageFilter(); LanguageFilter lf = new LanguageFilter();
TLongIntHashMap counts = new TLongIntHashMap(100_000_000, 0.7f, -1, -1); TLongIntHashMap counts = new TLongIntHashMap(100_000_000, 0.7f, -1, -1);
@ -108,7 +107,7 @@ public class TermFrequencyDict {
docCount.incrementAndGet(); docCount.incrementAndGet();
Document parsed = Jsoup.parse(doc.documentBody); Document parsed = Jsoup.parse(doc.documentBody);
pruner.prune(parsed, 0.5); parsed.body().filter(new DomPruningFilter(0.5));
DocumentLanguageData dld = se.get().extractSentences(parsed); DocumentLanguageData dld = se.get().extractSentences(parsed);

View File

@ -171,16 +171,15 @@ public class DocumentProcessor {
throw new DisqualifiedException(DisqualificationReason.FORBIDDEN); throw new DisqualifiedException(DisqualificationReason.FORBIDDEN);
} }
DomPruner domPruner = new DomPruner();
Document prunedDoc = doc.clone(); Document prunedDoc = doc.clone();
domPruner.prune(prunedDoc, 0.5); prunedDoc.body().filter(new DomPruningFilter(0.5));
var dld = sentenceExtractor.extractSentences(prunedDoc); var dld = sentenceExtractor.extractSentences(prunedDoc);
checkDocumentLanguage(dld); checkDocumentLanguage(dld);
var ret = new ProcessedDocumentDetails(); var ret = new ProcessedDocumentDetails();
ret.length = getLength(doc); ret.length = getLength(doc);
ret.standard = getHtmlStandard(doc); ret.standard = getHtmlStandard(doc);
ret.title = titleExtractor.getTitleAbbreviated(doc, dld, crawledDocument.url); ret.title = titleExtractor.getTitleAbbreviated(doc, dld, crawledDocument.url);
@ -246,12 +245,11 @@ public class DocumentProcessor {
if (linkParser.shouldIndexLink(atag)) { if (linkParser.shouldIndexLink(atag)) {
linkOpt.ifPresent(lp::accept); linkOpt.ifPresent(lp::accept);
} }
else if (linkOpt.isPresent()) { else {
if (linkParser.hasBinarySuffix(linkOpt.get().toString())) { linkOpt
linkOpt.ifPresent(lp::acceptNonIndexable); .filter(url -> linkParser.hasBinarySuffix(url.path.toLowerCase()))
} .ifPresent(lp::acceptNonIndexable);
} }
} }
for (var frame : doc.getElementsByTag("frame")) { for (var frame : doc.getElementsByTag("frame")) {
linkParser.parseFrame(baseUrl, frame).ifPresent(lp::accept); linkParser.parseFrame(baseUrl, frame).ifPresent(lp::accept);
@ -271,21 +269,20 @@ public class DocumentProcessor {
linkTerms.add("links:"+fd.toString().toLowerCase()); linkTerms.add("links:"+fd.toString().toLowerCase());
linkTerms.add("links:"+fd.getDomain().toLowerCase()); linkTerms.add("links:"+fd.getDomain().toLowerCase());
} }
words.append(IndexBlock.Meta, linkTerms); words.append(IndexBlock.Meta, linkTerms);
Set<String> fileKeywords = new HashSet<>(100); Set<String> fileKeywords = new HashSet<>(100);
for (var link : lp.getNonIndexableUrls()) { for (var link : lp.getNonIndexableUrls()) {
if (!Objects.equals(domain, link.domain)) { if (!domain.hasSameTopDomain(link.domain)) {
continue; continue;
} }
synthesizeFilenameKeyword(fileKeywords, link); synthesizeFilenameKeyword(fileKeywords, link);
} }
words.append(IndexBlock.Artifacts, fileKeywords); words.append(IndexBlock.Artifacts, fileKeywords);
} }
private void synthesizeFilenameKeyword(Set<String> fileKeywords, EdgeUrl link) { private void synthesizeFilenameKeyword(Set<String> fileKeywords, EdgeUrl link) {

View File

@ -1,6 +1,5 @@
package nu.marginalia.wmsa.edge.converting.processor.logic; package nu.marginalia.wmsa.edge.converting.processor.logic;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node; import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode; import org.jsoup.nodes.TextNode;
@ -9,22 +8,14 @@ import org.jsoup.select.NodeFilter;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
public class DomPruner { public class DomPruningFilter implements NodeFilter {
public void prune(Document document, double pruneThreshold) { private final double pruneThreshold;
document.filter(new PruningFilter(pruneThreshold));
}
}
class PruningFilter implements NodeFilter {
private final Map<Node, NodeData> data = new HashMap<>(); private final Map<Node, NodeData> data = new HashMap<>();
private final NodeData dummy = new NodeData(Integer.MAX_VALUE, 1, 0); private final NodeData dummy = new NodeData(Integer.MAX_VALUE, 1, 0);
private double pruneThreshold;
public PruningFilter(double pruneThreshold) { public DomPruningFilter(double pruneThreshold) {
this.pruneThreshold = pruneThreshold; this.pruneThreshold = pruneThreshold;
} }

View File

@ -19,10 +19,14 @@ import java.util.regex.Pattern;
public class LinkParser { public class LinkParser {
private final Logger logger = LoggerFactory.getLogger(getClass()); private final Logger logger = LoggerFactory.getLogger(getClass());
private final List<String> blockPrefixList = List.of( private final List<String> blockPrefixList = List.of(
"mailto:", "javascript:", "tel:", "itpc:", "#", "file:"); "mailto:", "javascript:", "tel:", "itpc:", "#", "file:");
private final List<String> blockSuffixList = List.of(
private final List<String> binarySuffixList = List.of(
".pdf", ".mp3", ".wmv", ".avi", ".zip", ".7z", ".pdf", ".mp3", ".wmv", ".avi", ".zip", ".7z",
".mpv", ".mp4", ".avi", ".mkv", ".tiff", ".dat", ".tar",
".com", ".bat", ".sh",
".bin", ".exe", ".tar.gz", ".tar.bz2", ".xml", ".swf", ".bin", ".exe", ".tar.gz", ".tar.bz2", ".xml", ".swf",
".wav", ".ogg", ".jpg", ".jpeg", ".png", ".gif", ".webp", ".wav", ".ogg", ".jpg", ".jpeg", ".png", ".gif", ".webp",
".webm", ".bmp", ".doc", ".docx", ".ppt", ".pptx", ".xls", ".xlsx", ".webm", ".bmp", ".doc", ".docx", ".ppt", ".pptx", ".xls", ".xlsx",
@ -33,7 +37,7 @@ public class LinkParser {
return Optional.of(l) return Optional.of(l)
.filter(this::shouldIndexLink) .filter(this::shouldIndexLink)
.map(this::getUrl) .map(this::getUrl)
.map(link -> resolveUrl(relativeBaseUrl, link)) .map(link -> resolveRelativeUrl(relativeBaseUrl, link))
.flatMap(this::createURI) .flatMap(this::createURI)
.map(URI::normalize) .map(URI::normalize)
.map(this::renormalize) .map(this::renormalize)
@ -44,7 +48,7 @@ public class LinkParser {
public Optional<EdgeUrl> parseLinkPermissive(EdgeUrl relativeBaseUrl, Element l) { public Optional<EdgeUrl> parseLinkPermissive(EdgeUrl relativeBaseUrl, Element l) {
return Optional.of(l) return Optional.of(l)
.map(this::getUrl) .map(this::getUrl)
.map(link -> resolveUrl(relativeBaseUrl, link)) .map(link -> resolveRelativeUrl(relativeBaseUrl, link))
.flatMap(this::createURI) .flatMap(this::createURI)
.map(URI::normalize) .map(URI::normalize)
.map(this::renormalize) .map(this::renormalize)
@ -74,7 +78,7 @@ public class LinkParser {
@Contract(pure=true) @Contract(pure=true)
public Optional<EdgeUrl> parseLink(EdgeUrl baseUrl, String str) { public Optional<EdgeUrl> parseLink(EdgeUrl baseUrl, String str) {
return Optional.of(str) return Optional.of(str)
.map(link -> resolveUrl(baseUrl, link)) .map(link -> resolveRelativeUrl(baseUrl, link))
.flatMap(this::createURI) .flatMap(this::createURI)
.map(URI::normalize) .map(URI::normalize)
.map(this::renormalize) .map(this::renormalize)
@ -85,7 +89,7 @@ public class LinkParser {
public Optional<EdgeUrl> parseFrame(EdgeUrl baseUrl, Element frame) { public Optional<EdgeUrl> parseFrame(EdgeUrl baseUrl, Element frame) {
return Optional.of(frame) return Optional.of(frame)
.map(l -> l.attr("src")) .map(l -> l.attr("src"))
.map(link -> resolveUrl(baseUrl, link)) .map(link -> resolveRelativeUrl(baseUrl, link))
.flatMap(this::createURI) .flatMap(this::createURI)
.map(URI::normalize) .map(URI::normalize)
.map(this::renormalize) .map(this::renormalize)
@ -95,10 +99,10 @@ public class LinkParser {
@SneakyThrows @SneakyThrows
private URI renormalize(URI uri) { private URI renormalize(URI uri) {
if (uri.getPath() == null) { if (uri.getPath() == null) {
return renormalize(new URI(uri.getScheme(), uri.getHost(), "/", uri.getFragment())); return renormalize(new URI(uri.getScheme(), uri.getHost(), "/", uri.getQuery(), uri.getFragment()));
} }
if (uri.getPath().startsWith("/../")) { if (uri.getPath().startsWith("/../")) {
return renormalize(new URI(uri.getScheme(), uri.getHost(), uri.getPath().substring(3), uri.getFragment())); return renormalize(new URI(uri.getScheme(), uri.getHost(), uri.getPath().substring(3), uri.getQuery(), uri.getFragment()));
} }
return uri; return uri;
} }
@ -117,10 +121,10 @@ public class LinkParser {
private static final Pattern paramSeparatorPattern = Pattern.compile("\\?"); private static final Pattern paramSeparatorPattern = Pattern.compile("\\?");
@SneakyThrows @SneakyThrows
private String resolveUrl(EdgeUrl baseUrl, String s) { private String resolveRelativeUrl(EdgeUrl baseUrl, String s) {
// url looks like http://www.marginalia.nu/ // url looks like http://www.marginalia.nu/
if (isAbsoluteDomain(s)) { if (doesUrlStringHaveProtocol(s)) {
return s; return s;
} }
@ -154,8 +158,15 @@ public class LinkParser {
return url.path.substring(0, lastSlash+1); return url.path.substring(0, lastSlash+1);
} }
private boolean isAbsoluteDomain(String s) { private boolean doesUrlStringHaveProtocol(String s) {
return s.matches("^[a-zA-Z]+:.*$"); int i = 0;
for (; i < s.length(); i++) {
if (!Character.isAlphabetic(s.charAt(i)))
break;
}
if (i == 0 || i == s.length())
return false;
return ':' == s.charAt(i);
} }
public boolean shouldIndexLink(Element link) { public boolean shouldIndexLink(Element link) {
@ -168,26 +179,29 @@ public class LinkParser {
return !"noindex".equalsIgnoreCase(rel); return !"noindex".equalsIgnoreCase(rel);
} }
public boolean hasBinarySuffix(String href) {
return blockSuffixList.stream().anyMatch(href::endsWith);
}
private boolean isUrlRelevant(String href) { private boolean isUrlRelevant(String href) {
if (null == href || "".equals(href)) { if (null == href || "".equals(href)) {
return false; return false;
} }
if (href.length() > 128) {
return false;
}
href = href.toLowerCase();
if (blockPrefixList.stream().anyMatch(href::startsWith)) { if (blockPrefixList.stream().anyMatch(href::startsWith)) {
return false; return false;
} }
if (hasBinarySuffix(href)) { if (hasBinarySuffix(href)) {
return false; return false;
} }
if (href.length() > 128) {
return false;
}
return true; return true;
} }
public boolean hasBinarySuffix(String str) {
return binarySuffixList.stream().anyMatch(str::endsWith);
}
@Nullable @Nullable
public EdgeUrl getBaseLink(Document parsed, EdgeUrl documentUrl) { public EdgeUrl getBaseLink(Document parsed, EdgeUrl documentUrl) {
var baseTags = parsed.getElementsByTag("base"); var baseTags = parsed.getElementsByTag("base");
@ -196,7 +210,7 @@ public class LinkParser {
for (var tag : baseTags) { for (var tag : baseTags) {
String href = tag.attr("href"); String href = tag.attr("href");
if (!Strings.isNullOrEmpty(href)) { if (!Strings.isNullOrEmpty(href)) {
return new EdgeUrl(resolveUrl(documentUrl, href)); return new EdgeUrl(resolveRelativeUrl(documentUrl, href));
} }
} }
} }

View File

@ -9,7 +9,7 @@ import java.util.regex.Pattern;
@AllArgsConstructor @AllArgsConstructor
@Getter @Setter @Builder @Getter @Setter @Builder
public class EdgeDomain implements WideHashable { public class EdgeDomain {
private static final Predicate<String> ipPatternTest = Pattern.compile("[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}").asMatchPredicate(); private static final Predicate<String> ipPatternTest = Pattern.compile("[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}").asMatchPredicate();
private static final Predicate<String> govListTest = Pattern.compile(".*\\.(ac|co|org|gov|edu|com)\\.[a-z]{2}").asMatchPredicate(); private static final Predicate<String> govListTest = Pattern.compile(".*\\.(ac|co|org|gov|edu|com)\\.[a-z]{2}").asMatchPredicate();
@ -23,6 +23,8 @@ public class EdgeDomain implements WideHashable {
public EdgeDomain(String host) { public EdgeDomain(String host) {
Objects.requireNonNull(host, "domain name must not be null"); Objects.requireNonNull(host, "domain name must not be null");
host = host.toLowerCase();
var dot = host.lastIndexOf('.'); var dot = host.lastIndexOf('.');
if (dot < 0 || ipPatternTest.test(host)) { // IPV6 >.> if (dot < 0 || ipPatternTest.test(host)) { // IPV6 >.>
@ -99,9 +101,11 @@ public class EdgeDomain implements WideHashable {
return ret.toString().toLowerCase(); return ret.toString().toLowerCase();
} }
@Override
public long wideHash() { public boolean hasSameTopDomain(EdgeDomain other) {
return ((long) Objects.hash(domain, subDomain) << 32) | toString().hashCode(); if (other == null) return false;
return domain.equalsIgnoreCase(other.domain);
} }
public boolean equals(final Object o) { public boolean equals(final Object o) {

View File

@ -9,33 +9,16 @@ import java.util.List;
import java.util.stream.Collectors; import java.util.stream.Collectors;
public enum EdgeSearchProfile { public enum EdgeSearchProfile {
DEFAULT("default",
List.of(IndexBlock.Title, IndexBlock.Tfidf_Top, IndexBlock.Tfidf_Middle, IndexBlock.Link, DEFAULT("default", SearchOrder.DEFAULT_ORDER, 0, 1),
IndexBlock.Words_1, IndexBlock.Words_2, IndexBlock.Words_4, IndexBlock.Words_8, IndexBlock.Words_16Plus MODERN("modern", SearchOrder.DEFAULT_ORDER, 2),
), CORPO("corpo", SearchOrder.DEFAULT_ORDER, 4, 5, 7),
0, 1), YOLO("yolo", SearchOrder.DEFAULT_ORDER, 0, 2, 1, 3, 4, 6),
MODERN("modern", CORPO_CLEAN("corpo-clean", SearchOrder.DEFAULT_ORDER, 4, 5),
List.of(IndexBlock.Title, IndexBlock.Tfidf_Top, IndexBlock.Tfidf_Middle, IndexBlock.Link, IndexBlock.NamesWords, ACADEMIA("academia", SearchOrder.DEFAULT_ORDER, 3),
IndexBlock.Words_1, IndexBlock.Words_2, IndexBlock.Words_4, IndexBlock.Words_8, IndexBlock.Words_16Plus
), FOOD("food", SearchOrder.DEFAULT_ORDER, 2, 0),
2), CRAFTS("crafts", SearchOrder.DEFAULT_ORDER, 2, 0),
CORPO("corpo",
List.of(IndexBlock.Title, IndexBlock.Tfidf_Top, IndexBlock.Tfidf_Middle, IndexBlock.Tfidf_Lower, IndexBlock.Link, IndexBlock.NamesWords,
IndexBlock.Words_1, IndexBlock.Words_2, IndexBlock.Words_4, IndexBlock.Words_8, IndexBlock.Words_16Plus),
4, 5, 7),
YOLO("yolo",
List.of(IndexBlock.Title, IndexBlock.Tfidf_Top, IndexBlock.Tfidf_Lower, IndexBlock.Link, IndexBlock.NamesWords,
IndexBlock.Words_1, IndexBlock.Words_2, IndexBlock.Words_4, IndexBlock.Words_8, IndexBlock.Words_16Plus),
0, 2, 1, 3, 4, 6),
CORPO_CLEAN("corpo-clean",
List.of(IndexBlock.Title, IndexBlock.Tfidf_Top, IndexBlock.Tfidf_Middle, IndexBlock.Tfidf_Lower, IndexBlock.Link, IndexBlock.NamesWords),
4, 5),
ACADEMIA("academia",
List.of( IndexBlock.Title, IndexBlock.Tfidf_Top, IndexBlock.Tfidf_Middle, IndexBlock.Tfidf_Lower, IndexBlock.Link, IndexBlock.NamesWords),
3),
FOOD("food",
List.of(IndexBlock.Title, IndexBlock.Tfidf_Top, IndexBlock.Tfidf_Middle, IndexBlock.Tfidf_Lower, IndexBlock.Link, IndexBlock.Words_1, IndexBlock.NamesWords),
2, 0),
; ;
@ -55,12 +38,14 @@ public enum EdgeSearchProfile {
if (null == param) { if (null == param) {
return YOLO; return YOLO;
} }
return switch (param) { return switch (param) {
case "modern" -> MODERN; case "modern" -> MODERN;
case "default" -> DEFAULT; case "default" -> DEFAULT;
case "corpo" -> CORPO; case "corpo" -> CORPO;
case "academia" -> ACADEMIA; case "academia" -> ACADEMIA;
case "food" -> FOOD; case "food" -> FOOD;
case "crafts" -> CRAFTS;
default -> YOLO; default -> YOLO;
}; };
} }
@ -69,6 +54,14 @@ public enum EdgeSearchProfile {
if (this == FOOD) { if (this == FOOD) {
subquery.searchTermsInclude.add(HtmlFeature.CATEGORY_FOOD.getKeyword()); subquery.searchTermsInclude.add(HtmlFeature.CATEGORY_FOOD.getKeyword());
} }
if (this == CRAFTS) {
subquery.searchTermsInclude.add(HtmlFeature.CATEGORY_CRAFTS.getKeyword());
}
} }
} }
class SearchOrder {
static List<IndexBlock> DEFAULT_ORDER = List.of(IndexBlock.Title, IndexBlock.Tfidf_Top, IndexBlock.Tfidf_Middle, IndexBlock.Link,
IndexBlock.Words_1, IndexBlock.Words_2, IndexBlock.Words_4, IndexBlock.Words_8, IndexBlock.Words_16Plus);
}

View File

@ -7,7 +7,7 @@ import nu.marginalia.util.language.processing.SentenceExtractor;
import nu.marginalia.wmsa.configuration.WmsaHome; import nu.marginalia.wmsa.configuration.WmsaHome;
import nu.marginalia.wmsa.edge.converting.ConverterModule; import nu.marginalia.wmsa.edge.converting.ConverterModule;
import nu.marginalia.wmsa.edge.converting.processor.DomainProcessor; import nu.marginalia.wmsa.edge.converting.processor.DomainProcessor;
import nu.marginalia.wmsa.edge.converting.processor.logic.DomPruner; import nu.marginalia.wmsa.edge.converting.processor.logic.DomPruningFilter;
import nu.marginalia.wmsa.edge.converting.processor.logic.topic.RecipeDetector; import nu.marginalia.wmsa.edge.converting.processor.logic.topic.RecipeDetector;
import nu.marginalia.wmsa.edge.converting.processor.logic.topic.TextileCraftDetector; import nu.marginalia.wmsa.edge.converting.processor.logic.topic.TextileCraftDetector;
import nu.marginalia.wmsa.edge.converting.processor.logic.topic.WoodworkingDetector; import nu.marginalia.wmsa.edge.converting.processor.logic.topic.WoodworkingDetector;
@ -25,7 +25,6 @@ public class ConverterLogicTestTool {
private final Logger logger = LoggerFactory.getLogger(getClass()); private final Logger logger = LoggerFactory.getLogger(getClass());
DomPruner domPruner = new DomPruner();
RecipeDetector recipeDetector = new RecipeDetector(); RecipeDetector recipeDetector = new RecipeDetector();
WoodworkingDetector woodworkingDetector = new WoodworkingDetector(); WoodworkingDetector woodworkingDetector = new WoodworkingDetector();
TextileCraftDetector textileCraftDetector = new TextileCraftDetector(); TextileCraftDetector textileCraftDetector = new TextileCraftDetector();
@ -64,7 +63,7 @@ public class ConverterLogicTestTool {
Runnable task = () -> { Runnable task = () -> {
var parsed = Jsoup.parse(doc.documentBody); var parsed = Jsoup.parse(doc.documentBody);
domPruner.prune(parsed, 0.5); parsed.body().filter(new DomPruningFilter(0.5));
var dld = se.extractSentences(parsed); var dld = se.extractSentences(parsed);
if (dld.totalNumWords() < 250) if (dld.totalNumWords() < 250)

View File

@ -7,12 +7,18 @@
</div> </div>
<div class="settings"> <div class="settings">
<select name="profile" id="profile"> <select name="profile" id="profile">
<optgroup label="General Search">
<option {{#eq profile "default"}}selected{{/eq}} value="default">Popular Sites</option> <option {{#eq profile "default"}}selected{{/eq}} value="default">Popular Sites</option>
<option {{#eq profile "modern"}}selected{{/eq}} value="modern">Blogs and Personal Websites</option> <option {{#eq profile "modern"}}selected{{/eq}} value="modern">Blogs and Personal Websites</option>
<option {{#eq profile "academia"}}selected{{/eq}} value="academia">Academia, Forums, Big Websites</option> <option {{#eq profile "academia"}}selected{{/eq}} value="academia">Academia, Forums, Big Websites</option>
<option {{#eq profile "yolo"}}selected{{/eq}} value="yolo">Default Ranking Algorithm</option> <option {{#eq profile "yolo"}}selected{{/eq}} value="yolo">Default Ranking Algorithm</option>
<option {{#eq profile "corpo"}}selected{{/eq}} value="corpo">Everything</option>
</optgroup>
<optgroup label="Topics Search">
<option {{#eq profile "food"}}selected{{/eq}} value="food">Recipes &#127859;</option> <option {{#eq profile "food"}}selected{{/eq}} value="food">Recipes &#127859;</option>
<option {{#eq profile "corpo"}}selected{{/eq}} value="corpo">Experimental</option> <option {{#eq profile "crafts"}}selected{{/eq}} value="crafts">Crafts &#129697;&#128296; (WIP; mostly textile-craft)</option>
</optgroup>
</select> </select>
<select name="js" id="js"> <select name="js" id="js">
<option {{#eq js "default"}}selected{{/eq}} value="default">Allow JS</option> <option {{#eq js "default"}}selected{{/eq}} value="default">Allow JS</option>

View File

@ -1,7 +1,7 @@
{{#if scripts}}<abbr title="scripts" class="meta">🏭️</abbr>{{/if}} {{#if scripts}}<abbr title="scripts" class="meta">🏭️</abbr>{{/if}}
{{#if tracking}}<abbr title="analytics or tracking" class="meta">🕵️️</abbr>{{/if}} {{#if tracking}}<abbr title="analytics or tracking" class="meta">🕵️️</abbr>{{/if}}
{{#if media}}<abbr title="audio or video" class="meta">🎞️</abbr>{{/if}} {{#if media}}<abbr title="audio or video" class="meta">🎞️</abbr>{{/if}}
{{#if affiliate}}<abbr title="possible amazon affiliate link (experimental; unreliable)" class="meta">💳️</abbr>{{/if}} {{#if affiliate}}<abbr title="possible amazon affiliate link" class="meta">💳️</abbr>{{/if}}
{{#if cookies}}<abbr title="cookies" class="meta">👁️️</abbr>{{/if}} {{#if cookies}}<abbr title="cookies" class="meta">👁️️</abbr>{{/if}}
{{#if ads}}<abbr title="ads (experimental)" class="meta">⚠️️️</abbr>{{/if}} {{#if ads}}<abbr title="ads (experimental)" class="meta">⚠️️️</abbr>{{/if}}
<span class="meta">{{format}}</span> <span class="meta">{{format}}</span>

View File

@ -4,7 +4,7 @@ import org.junit.jupiter.api.Test;
import java.io.IOException; import java.io.IOException;
class DomPrunerTest { class DomPruningFilterTest {
@Test @Test
public void test() throws IOException { public void test() throws IOException {