mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Add specialization for steam store and GOG
This commit is contained in:
parent
e65d75a0f9
commit
0a53ac68a0
@ -150,7 +150,7 @@ public class HtmlDocumentProcessorPlugin extends AbstractDocumentProcessorPlugin
|
||||
|
||||
ret.length = length;
|
||||
ret.standard = standard;
|
||||
ret.title = titleExtractor.getTitleAbbreviated(doc, dld, crawledDocument.url);
|
||||
ret.title = specialization.getTitle(doc, dld, crawledDocument.url);
|
||||
|
||||
documentLengthLogic.validateLength(dld, specialization.lengthModifier() * documentClass.lengthLimitModifier());
|
||||
|
||||
|
@ -3,6 +3,7 @@ package nu.marginalia.converting.processor.plugin.specialization;
|
||||
import ca.rmen.porterstemmer.PorterStemmer;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import nu.marginalia.keyword.model.DocumentKeywordsBuilder;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
@ -29,8 +30,8 @@ import java.util.stream.Collectors;
|
||||
public class BlogSpecialization extends DefaultSpecialization {
|
||||
|
||||
@Inject
|
||||
public BlogSpecialization(SummaryExtractor summaryExtractor) {
|
||||
super(summaryExtractor);
|
||||
public BlogSpecialization(SummaryExtractor summaryExtractor, TitleExtractor titleExtractor) {
|
||||
super(summaryExtractor, titleExtractor);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -2,8 +2,12 @@ package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.logic.dom.DomPruningFilter;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import nu.marginalia.keyword.model.DocumentKeywordsBuilder;
|
||||
import nu.marginalia.language.model.DocumentLanguageData;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import java.util.ArrayList;
|
||||
@ -14,10 +18,12 @@ import java.util.Set;
|
||||
public class DefaultSpecialization implements HtmlProcessorSpecializations.HtmlProcessorSpecializationIf {
|
||||
|
||||
private final SummaryExtractor summaryExtractor;
|
||||
private final TitleExtractor titleExtractor;
|
||||
|
||||
@Inject
|
||||
public DefaultSpecialization(SummaryExtractor summaryExtractor) {
|
||||
public DefaultSpecialization(SummaryExtractor summaryExtractor, TitleExtractor titleExtractor) {
|
||||
this.summaryExtractor = summaryExtractor;
|
||||
this.titleExtractor = titleExtractor;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -46,4 +52,14 @@ public class DefaultSpecialization implements HtmlProcessorSpecializations.HtmlP
|
||||
|
||||
return summaryExtractor.extractSummary(doc, cleanedWords);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getTitle(Document original, DocumentLanguageData dld, String url) {
|
||||
return titleExtractor.getTitleAbbreviated(original, dld, url);
|
||||
}
|
||||
|
||||
public boolean shouldIndex(EdgeUrl url) { return true; }
|
||||
public double lengthModifier() { return 1.0; }
|
||||
|
||||
public void amendWords(Document doc, DocumentKeywordsBuilder words) {}
|
||||
}
|
||||
|
@ -0,0 +1,53 @@
|
||||
package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import nu.marginalia.language.model.DocumentLanguageData;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
@Singleton
|
||||
public class GogStoreSpecialization extends DefaultSpecialization {
|
||||
|
||||
@Inject
|
||||
public GogStoreSpecialization(SummaryExtractor summaryExtractor, TitleExtractor titleExtractor) {
|
||||
super(summaryExtractor, titleExtractor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document prune(Document original) {
|
||||
var pruned = super.prune(original);
|
||||
pruned.select(".age-gate").remove();
|
||||
return pruned;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String getSummary(Document original,
|
||||
Set<String> importantWords) {
|
||||
var desc = original.select(".description").first();
|
||||
if (desc != null)
|
||||
return StringUtils.truncate(desc.text(), 255);
|
||||
return super.getSummary(original, importantWords);
|
||||
}
|
||||
|
||||
public String getTitle(Document original, DocumentLanguageData dld, String url) {
|
||||
var appHubName = original.select(".productcard-basics__title").first();
|
||||
if (appHubName != null) {
|
||||
return StringUtils.truncate(appHubName.text(), 128);
|
||||
}
|
||||
|
||||
return super.getTitle(original, dld, url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean shouldIndex(EdgeUrl url) {
|
||||
return url.path.startsWith("/en/game/");
|
||||
}
|
||||
|
||||
}
|
@ -6,6 +6,7 @@ import nu.marginalia.converting.model.GeneratorType;
|
||||
import nu.marginalia.converting.processor.ConverterDomainTypes;
|
||||
import nu.marginalia.converting.processor.logic.DocumentGeneratorExtractor;
|
||||
import nu.marginalia.keyword.model.DocumentKeywordsBuilder;
|
||||
import nu.marginalia.language.model.DocumentLanguageData;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
@ -19,8 +20,10 @@ public class HtmlProcessorSpecializations {
|
||||
private final PhpBBSpecialization phpBBSpecialization;
|
||||
private final JavadocSpecialization javadocSpecialization;
|
||||
private final MariadbKbSpecialization mariadbKbSpecialization;
|
||||
private final SteamStoreSpecialization steamStoreSpecialization;
|
||||
private final WikiSpecialization wikiSpecialization;
|
||||
private final BlogSpecialization blogSpecialization;
|
||||
private final GogStoreSpecialization gogStoreSpecialization;
|
||||
private final DefaultSpecialization defaultSpecialization;
|
||||
|
||||
@Inject
|
||||
@ -30,8 +33,10 @@ public class HtmlProcessorSpecializations {
|
||||
PhpBBSpecialization phpBBSpecialization,
|
||||
JavadocSpecialization javadocSpecialization,
|
||||
MariadbKbSpecialization mariadbKbSpecialization,
|
||||
SteamStoreSpecialization steamStoreSpecialization,
|
||||
WikiSpecialization wikiSpecialization,
|
||||
BlogSpecialization blogSpecialization,
|
||||
GogStoreSpecialization gogStoreSpecialization,
|
||||
DefaultSpecialization defaultSpecialization) {
|
||||
this.domainTypes = domainTypes;
|
||||
this.lemmySpecialization = lemmySpecialization;
|
||||
@ -39,8 +44,10 @@ public class HtmlProcessorSpecializations {
|
||||
this.phpBBSpecialization = phpBBSpecialization;
|
||||
this.javadocSpecialization = javadocSpecialization;
|
||||
this.mariadbKbSpecialization = mariadbKbSpecialization;
|
||||
this.steamStoreSpecialization = steamStoreSpecialization;
|
||||
this.wikiSpecialization = wikiSpecialization;
|
||||
this.blogSpecialization = blogSpecialization;
|
||||
this.gogStoreSpecialization = gogStoreSpecialization;
|
||||
this.defaultSpecialization = defaultSpecialization;
|
||||
}
|
||||
|
||||
@ -59,6 +66,14 @@ public class HtmlProcessorSpecializations {
|
||||
return mariadbKbSpecialization;
|
||||
}
|
||||
|
||||
if (url.domain.toString().equals("store.steampowered.com")) {
|
||||
return steamStoreSpecialization;
|
||||
}
|
||||
|
||||
if (url.domain.toString().equals("www.gog.com") && url.path.contains("/game/")) {
|
||||
return gogStoreSpecialization;
|
||||
}
|
||||
|
||||
if (generator.keywords().contains("lemmy")) {
|
||||
return lemmySpecialization;
|
||||
}
|
||||
@ -86,11 +101,11 @@ public class HtmlProcessorSpecializations {
|
||||
Document prune(Document original);
|
||||
String getSummary(Document original,
|
||||
Set<String> importantWords);
|
||||
String getTitle(Document original, DocumentLanguageData dld, String url);
|
||||
|
||||
default boolean shouldIndex(EdgeUrl url) { return true; }
|
||||
default double lengthModifier() { return 1.0; }
|
||||
|
||||
default void amendWords(Document doc, DocumentKeywordsBuilder words) {}
|
||||
boolean shouldIndex(EdgeUrl url);
|
||||
double lengthModifier();
|
||||
void amendWords(Document doc, DocumentKeywordsBuilder words);
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -2,6 +2,7 @@ package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.jsoup.nodes.Document;
|
||||
@ -15,8 +16,8 @@ public class JavadocSpecialization extends DefaultSpecialization {
|
||||
private static final Logger logger = LoggerFactory.getLogger(JavadocSpecialization.class);
|
||||
|
||||
@Inject
|
||||
public JavadocSpecialization(SummaryExtractor summaryExtractor) {
|
||||
super(summaryExtractor);
|
||||
public JavadocSpecialization(SummaryExtractor summaryExtractor, TitleExtractor titleExtractor) {
|
||||
super(summaryExtractor, titleExtractor);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -2,6 +2,7 @@ package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.slf4j.Logger;
|
||||
@ -11,12 +12,13 @@ import java.util.Set;
|
||||
|
||||
/** This class is used to specify how to process a website running Lemmy */
|
||||
@Singleton
|
||||
public class LemmySpecialization implements HtmlProcessorSpecializations.HtmlProcessorSpecializationIf {
|
||||
public class LemmySpecialization extends DefaultSpecialization {
|
||||
private static final Logger logger = LoggerFactory.getLogger(LemmySpecialization.class);
|
||||
private final SummaryExtractor summaryExtractor;
|
||||
|
||||
@Inject
|
||||
public LemmySpecialization(SummaryExtractor summaryExtractor) {
|
||||
public LemmySpecialization(SummaryExtractor summaryExtractor, TitleExtractor titleExtractor) {
|
||||
super(summaryExtractor, titleExtractor);
|
||||
this.summaryExtractor = summaryExtractor;
|
||||
}
|
||||
|
||||
|
@ -2,22 +2,25 @@ package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import nu.marginalia.keyword.model.DocumentKeywordsBuilder;
|
||||
import nu.marginalia.model.idx.WordFlags;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
@Singleton
|
||||
public class MariadbKbSpecialization extends DefaultSpecialization {
|
||||
private static final Logger logger = LoggerFactory.getLogger(MariadbKbSpecialization.class);
|
||||
|
||||
@Inject
|
||||
public MariadbKbSpecialization(SummaryExtractor summaryExtractor) {
|
||||
super(summaryExtractor);
|
||||
public MariadbKbSpecialization(SummaryExtractor summaryExtractor, TitleExtractor titleExtractor) {
|
||||
super(summaryExtractor, titleExtractor);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -2,8 +2,9 @@ package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -12,8 +13,8 @@ public class PhpBBSpecialization extends DefaultSpecialization {
|
||||
private static final Logger logger = LoggerFactory.getLogger(PhpBBSpecialization.class);
|
||||
|
||||
@Inject
|
||||
public PhpBBSpecialization(SummaryExtractor summaryExtractor) {
|
||||
super(summaryExtractor);
|
||||
public PhpBBSpecialization(SummaryExtractor summaryExtractor, TitleExtractor titleExtractor) {
|
||||
super(summaryExtractor, titleExtractor);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -0,0 +1,75 @@
|
||||
package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import nu.marginalia.language.model.DocumentLanguageData;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
@Singleton
|
||||
public class SteamStoreSpecialization extends DefaultSpecialization {
|
||||
|
||||
@Inject
|
||||
public SteamStoreSpecialization(SummaryExtractor summaryExtractor, TitleExtractor titleExtractor) {
|
||||
super(summaryExtractor, titleExtractor);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document prune(Document original) {
|
||||
var glanceCtn = original.select(".glance_ctn").clone().first();
|
||||
var gameAreaDesc= original.select("#game_area_description").clone().first();
|
||||
var appHubName = original.select("#appHubAppName").clone().first();
|
||||
|
||||
var newDoc = new Document(original.baseUri());
|
||||
var title = newDoc.head().appendElement("title");
|
||||
var bodyTag = newDoc.appendElement("body");
|
||||
if (appHubName != null) {
|
||||
title.appendText(appHubName.text());
|
||||
bodyTag.appendChild(appHubName);
|
||||
}
|
||||
if (glanceCtn != null) {
|
||||
bodyTag.appendChild(glanceCtn);
|
||||
}
|
||||
if (gameAreaDesc != null) {
|
||||
bodyTag.appendChild(gameAreaDesc);
|
||||
}
|
||||
|
||||
return newDoc;
|
||||
}
|
||||
|
||||
public String getTitle(Document original, DocumentLanguageData dld, String url) {
|
||||
var appHubName = original.select("#appHubAppName").first();
|
||||
if (appHubName != null) {
|
||||
return StringUtils.truncate(appHubName.text(), 128);
|
||||
}
|
||||
|
||||
return super.getTitle(original, dld, url);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getSummary(Document original, Set<String> importantWords) {
|
||||
// Trust wikis to generate a useful summary
|
||||
var gameDesc = original.select(".game_description_snippet");
|
||||
gameDesc = gameDesc.clone();
|
||||
|
||||
gameDesc.select("h2").remove();
|
||||
String desc = gameDesc.text();
|
||||
if (!desc.isBlank()) {
|
||||
return StringUtils.truncate(desc, 255);
|
||||
}
|
||||
else {
|
||||
return super.getSummary(original, importantWords);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean shouldIndex(EdgeUrl url) {
|
||||
return url.path.startsWith("/app/");
|
||||
}
|
||||
|
||||
}
|
@ -2,9 +2,10 @@ package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import nu.marginalia.keyword.model.DocumentKeywordsBuilder;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
||||
@ -15,8 +16,8 @@ import java.util.Set;
|
||||
public class WikiSpecialization extends DefaultSpecialization {
|
||||
|
||||
@Inject
|
||||
public WikiSpecialization(SummaryExtractor summaryExtractor) {
|
||||
super(summaryExtractor);
|
||||
public WikiSpecialization(SummaryExtractor summaryExtractor, TitleExtractor titleExtractor) {
|
||||
super(summaryExtractor, titleExtractor);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -2,6 +2,7 @@ package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.slf4j.Logger;
|
||||
@ -10,12 +11,13 @@ import org.slf4j.LoggerFactory;
|
||||
import java.util.Set;
|
||||
|
||||
@Singleton
|
||||
public class XenForoSpecialization implements HtmlProcessorSpecializations.HtmlProcessorSpecializationIf {
|
||||
public class XenForoSpecialization extends DefaultSpecialization {
|
||||
private static final Logger logger = LoggerFactory.getLogger(XenForoSpecialization.class);
|
||||
private final SummaryExtractor summaryExtractor;
|
||||
|
||||
@Inject
|
||||
public XenForoSpecialization(SummaryExtractor summaryExtractor) {
|
||||
public XenForoSpecialization(SummaryExtractor summaryExtractor, TitleExtractor titleExtractor) {
|
||||
super(summaryExtractor, titleExtractor);
|
||||
this.summaryExtractor = summaryExtractor;
|
||||
}
|
||||
|
||||
|
4684
code/processes/converting-process/test-resources/html/gog-store.html
Normal file
4684
code/processes/converting-process/test-resources/html/gog-store.html
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -3,13 +3,13 @@ package nu.marginalia.converting.processor.plugin.specialization;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
|
||||
class BlogSpecializationTest {
|
||||
|
||||
@Test
|
||||
void shouldIndex() throws Exception {
|
||||
var spec = new BlogSpecialization(null);
|
||||
var spec = new BlogSpecialization(null, null);
|
||||
assertFalse(spec.shouldIndex(new EdgeUrl("https://blog.marginalia.nu/2023/00/22/")));
|
||||
assertFalse(spec.shouldIndex(new EdgeUrl("https://blog.marginalia.nu/2023/00/")));
|
||||
assertFalse(spec.shouldIndex(new EdgeUrl("https://blog.marginalia.nu/00/22/")));
|
||||
|
@ -0,0 +1,53 @@
|
||||
package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import nu.marginalia.converting.model.DocumentHeaders;
|
||||
import nu.marginalia.converting.processor.logic.DocumentGeneratorExtractor;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.test.CommonTestData;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
class GogSpecializationTest {
|
||||
|
||||
static GogStoreSpecialization specialization;
|
||||
static DocumentGeneratorExtractor generatorExtractor = new DocumentGeneratorExtractor();
|
||||
|
||||
String storePage = CommonTestData.loadTestData("html/gog-store.html");
|
||||
|
||||
@BeforeAll
|
||||
public static void setUpAll() {
|
||||
specialization = new GogStoreSpecialization(
|
||||
new SummaryExtractor(255,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null),
|
||||
new TitleExtractor(128)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
void prune() {
|
||||
System.out.println(specialization.prune(Jsoup.parse(storePage)));
|
||||
}
|
||||
|
||||
@Test
|
||||
void generatorExtraction() throws Exception {
|
||||
var gen = generatorExtractor.detectGenerator(new EdgeUrl("https://www.example.com/"), Jsoup.parse(storePage), new DocumentHeaders(""));
|
||||
|
||||
System.out.println(gen);
|
||||
}
|
||||
|
||||
@Test
|
||||
void getSummary() {
|
||||
String summary = specialization.getSummary(Jsoup.parse(storePage), Set.of(""));
|
||||
|
||||
System.out.println(summary);
|
||||
}
|
||||
}
|
@ -2,6 +2,7 @@ package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import nu.marginalia.converting.model.DocumentHeaders;
|
||||
import nu.marginalia.converting.processor.logic.DocumentGeneratorExtractor;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.test.CommonTestData;
|
||||
@ -26,7 +27,8 @@ class JavadocSpecializationTest {
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null));
|
||||
null),
|
||||
new TitleExtractor(128));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -2,6 +2,7 @@ package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import nu.marginalia.converting.model.DocumentHeaders;
|
||||
import nu.marginalia.converting.processor.logic.DocumentGeneratorExtractor;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.test.CommonTestData;
|
||||
@ -29,7 +30,8 @@ class LemmySpecializationTest {
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null));
|
||||
null),
|
||||
new TitleExtractor(128));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -0,0 +1,53 @@
|
||||
package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import nu.marginalia.converting.model.DocumentHeaders;
|
||||
import nu.marginalia.converting.processor.logic.DocumentGeneratorExtractor;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.test.CommonTestData;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
class SteamSpecializationTest {
|
||||
|
||||
static SteamStoreSpecialization specialization;
|
||||
static DocumentGeneratorExtractor generatorExtractor = new DocumentGeneratorExtractor();
|
||||
|
||||
String storePage = CommonTestData.loadTestData("html/steam-store.html");
|
||||
|
||||
@BeforeAll
|
||||
public static void setUpAll() {
|
||||
specialization = new SteamStoreSpecialization(
|
||||
new SummaryExtractor(255,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null),
|
||||
new TitleExtractor(128)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
void prune() {
|
||||
System.out.println(specialization.prune(Jsoup.parse(storePage)));
|
||||
}
|
||||
|
||||
@Test
|
||||
void generatorExtraction() throws Exception {
|
||||
var gen = generatorExtractor.detectGenerator(new EdgeUrl("https://www.example.com/"), Jsoup.parse(storePage), new DocumentHeaders(""));
|
||||
|
||||
System.out.println(gen);
|
||||
}
|
||||
|
||||
@Test
|
||||
void getSummary() {
|
||||
String summary = specialization.getSummary(Jsoup.parse(storePage), Set.of(""));
|
||||
|
||||
System.out.println(summary);
|
||||
}
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import nu.marginalia.test.CommonTestData;
|
||||
import org.jsoup.Jsoup;
|
||||
@ -23,7 +24,9 @@ class WikiSpecializationTest {
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null));
|
||||
null),
|
||||
new TitleExtractor(128)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -2,6 +2,7 @@ package nu.marginalia.converting.processor.plugin.specialization;
|
||||
|
||||
import nu.marginalia.converting.model.DocumentHeaders;
|
||||
import nu.marginalia.converting.processor.logic.DocumentGeneratorExtractor;
|
||||
import nu.marginalia.converting.processor.logic.TitleExtractor;
|
||||
import nu.marginalia.converting.processor.summary.SummaryExtractor;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.test.CommonTestData;
|
||||
@ -27,7 +28,9 @@ class XenForoSpecializationTest {
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null));
|
||||
null),
|
||||
new TitleExtractor(128)
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
Loading…
Reference in New Issue
Block a user