diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerMain.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerMain.java index 71d2731c..dff5197e 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerMain.java +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/CrawlerMain.java @@ -2,13 +2,14 @@ package nu.marginalia.crawl; import nu.marginalia.UserAgent; import nu.marginalia.WmsaHome; +import nu.marginalia.crawl.retreival.fetcher.HttpFetcherImpl; import nu.marginalia.process.log.WorkLog; import plan.CrawlPlanLoader; import plan.CrawlPlan; import nu.marginalia.crawling.io.CrawledDomainWriter; import nu.marginalia.crawling.model.spec.CrawlingSpecification; import nu.marginalia.crawl.retreival.CrawlerRetreiver; -import nu.marginalia.crawl.retreival.HttpFetcher; +import nu.marginalia.crawl.retreival.fetcher.HttpFetcher; import okhttp3.ConnectionPool; import okhttp3.Dispatcher; import okhttp3.internal.Util; @@ -102,8 +103,8 @@ public class CrawlerMain implements AutoCloseable { if (workLog.isJobFinished(specification.id)) return; + HttpFetcher fetcher = new HttpFetcherImpl(userAgent.uaString(), dispatcher, connectionPool); - HttpFetcher fetcher = new HttpFetcher(userAgent.uaString(), dispatcher, connectionPool); try (CrawledDomainWriter writer = new CrawledDomainWriter(crawlDataDir, specification.domain, specification.id)) { var retreiver = new CrawlerRetreiver(fetcher, specification, writer::accept); diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java index 76e605a1..0c6c2d4d 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/CrawlerRetreiver.java @@ -3,11 +3,12 @@ package nu.marginalia.crawl.retreival; import com.google.common.hash.HashFunction; import com.google.common.hash.Hashing; import lombok.SneakyThrows; +import nu.marginalia.crawl.retreival.fetcher.FetchResult; +import nu.marginalia.crawl.retreival.fetcher.FetchResultState; +import nu.marginalia.crawl.retreival.fetcher.HttpFetcher; import nu.marginalia.crawling.model.spec.CrawlingSpecification; import nu.marginalia.link_parser.LinkParser; import nu.marginalia.crawling.model.*; -import nu.marginalia.ip_blocklist.GeoIpBlocklist; -import nu.marginalia.ip_blocklist.IpBlockList; import nu.marginalia.ip_blocklist.UrlBlocklist; import nu.marginalia.model.EdgeDomain; import nu.marginalia.model.EdgeUrl; @@ -20,10 +21,9 @@ import java.net.InetAddress; import java.net.UnknownHostException; import java.time.LocalDateTime; import java.util.ArrayList; -import java.util.HashSet; -import java.util.LinkedList; import java.util.Optional; import java.util.function.Consumer; +import java.util.function.Predicate; import static java.lang.Math.max; import static java.lang.Math.min; @@ -32,16 +32,18 @@ public class CrawlerRetreiver { private static final long DEFAULT_CRAWL_DELAY_MIN_MS = Long.getLong("defaultCrawlDelay", 1000); private static final long DEFAULT_CRAWL_DELAY_MAX_MS = Long.getLong("defaultCrawlDelaySlow", 2500); - private static final int MAX_ERRORS = 10; + private static final int MAX_ERRORS = 20; - private final LinkedList queue = new LinkedList<>(); private final HttpFetcher fetcher; - private final HashSet visited; - private final HashSet known; + + /** Flag to indicate that the crawler should slow down, e.g. from 429s */ private boolean slowDown = false; - private final int depth; + + /** Testing flag to disable crawl delay (otherwise crawler tests take several minutes) */ + private boolean testFlagIgnoreDelay = false; + private final String id; private final String domain; private final Consumer crawledDomainWriter; @@ -50,118 +52,120 @@ public class CrawlerRetreiver { private static final Logger logger = LoggerFactory.getLogger(CrawlerRetreiver.class); private static final HashFunction hashMethod = Hashing.murmur3_128(0); - private static final IpBlockList ipBlocklist; private static final UrlBlocklist urlBlocklist = new UrlBlocklist(); + private static final LinkFilterSelector linkFilterSelector = new LinkFilterSelector(); + + private static final DomainProber domainProber = new DomainProber(); + private final DomainCrawlFrontier crawlFrontier; + int errorCount = 0; - static { - try { - ipBlocklist = new IpBlockList(new GeoIpBlocklist()); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - public CrawlerRetreiver(HttpFetcher fetcher, CrawlingSpecification specs, Consumer writer) { this.fetcher = fetcher; - visited = new HashSet<>((int)(specs.urls.size() * 1.5)); - known = new HashSet<>(specs.urls.size() * 10); - depth = specs.crawlDepth; id = specs.id; domain = specs.domain; crawledDomainWriter = writer; - for (String urlStr : specs.urls) { - EdgeUrl.parse(urlStr).ifPresent(this::addToQueue); - } + this.crawlFrontier = new DomainCrawlFrontier(new EdgeDomain(domain), specs.urls, specs.crawlDepth); - if (queue.peek() != null) { - var fst = queue.peek(); + var fst = crawlFrontier.peek(); + if (fst != null) { + // Ensure the index page is always crawled var root = fst.withPathAndParam("/", null); - if (known.add(root.toString())) - queue.addFirst(root); + if (crawlFrontier.addKnown(root)) + crawlFrontier.addFirst(root); } else { - addToQueue(new EdgeUrl("http", new EdgeDomain(domain), null, "/", null)); - addToQueue(new EdgeUrl("https", new EdgeDomain(domain), null, "/", null)); + // We know nothing about this domain, so we'll start with the index, trying both HTTP and HTTPS + crawlFrontier.addToQueue(new EdgeUrl("http", new EdgeDomain(domain), null, "/", null)); + crawlFrontier.addToQueue(new EdgeUrl("https", new EdgeDomain(domain), null, "/", null)); } } + public CrawlerRetreiver withNoDelay() { + testFlagIgnoreDelay = true; + return this; + } + public int fetch() { - Optional probeResult = probeDomainForProblems(domain); + final DomainProber.ProbeResult probeResult = domainProber.probeDomain(fetcher, domain, crawlFrontier.peek()); - if (probeResult.isPresent()) { - crawledDomainWriter.accept(probeResult.get()); - return 1; - } - else { + if (probeResult instanceof DomainProber.ProbeResultOk) { return crawlDomain(); } - } - private Optional probeDomainForProblems(String domain) { - EdgeUrl fst = queue.peek(); + // handle error cases for probe + var ip = findIp(domain); - if (fst == null) { - logger.warn("No URLs for domain {}", domain); - - return Optional.of(CrawledDomain.builder() - .crawlerStatus(CrawlerDomainStatus.ERROR.name()) - .crawlerStatusDesc("No known URLs") - .id(id) - .domain(domain) - .build()); + if (probeResult instanceof DomainProber.ProbeResultError err) { + crawledDomainWriter.accept( + CrawledDomain.builder() + .crawlerStatus(err.status().name()) + .crawlerStatusDesc(err.desc()) + .id(id) + .domain(domain) + .ip(ip) + .build() + ); + return 1; } - if (!ipBlocklist.isAllowed(fst.domain)) { - return Optional.of(CrawledDomain.builder() - .crawlerStatus(CrawlerDomainStatus.BLOCKED.name()) - .id(id) - .domain(domain) - .ip(findIp(domain)) - .build()); + if (probeResult instanceof DomainProber.ProbeResultRedirect redirect) { + crawledDomainWriter.accept( + CrawledDomain.builder() + .crawlerStatus(CrawlerDomainStatus.REDIRECT.name()) + .crawlerStatusDesc("Redirected to different domain") + .redirectDomain(redirect.domain().toString()) + .id(id) + .domain(domain) + .ip(ip) + .build() + ); + return 1; } - var fetchResult = fetcher.probeDomain(fst.withPathAndParam("/", null)); - if (!fetchResult.ok()) { - logger.debug("Bad status on {}", domain); - return Optional.of(createErrorPostFromStatus(fetchResult)); - } - return Optional.empty(); - } + throw new IllegalStateException("Unknown probe result: " + probeResult); + }; private int crawlDomain() { String ip = findIp(domain); - assert !queue.isEmpty(); + assert !crawlFrontier.isEmpty(); - var robotsRules = fetcher.fetchRobotRules(queue.peek().domain); + var robotsRules = fetcher.fetchRobotRules(crawlFrontier.peek().domain); long crawlDelay = robotsRules.getCrawlDelay(); CrawledDomain ret = new CrawledDomain(id, domain, null, CrawlerDomainStatus.OK.name(), null, ip, new ArrayList<>(), null); int fetchedCount = 0; - while (!queue.isEmpty() && visited.size() < depth && errorCount < MAX_ERRORS ) { - var top = queue.removeFirst(); + configureLinkFilter(); + + while (!crawlFrontier.isEmpty() + && !crawlFrontier.isCrawlDepthReached() + && errorCount < MAX_ERRORS) + { + var top = crawlFrontier.takeNextUrl(); if (!robotsRules.isAllowed(top.toString())) { crawledDomainWriter.accept(createRobotsError(top)); continue; } + if (!crawlFrontier.filterLink(top)) + continue; if (urlBlocklist.isUrlBlocked(top)) continue; if (!isAllowedProtocol(top.proto)) continue; if (top.toString().length() > 255) continue; - if (!visited.add(top.toString())) + if (!crawlFrontier.addVisited(top)) continue; if (fetchDocument(top, crawlDelay)) { @@ -176,8 +180,22 @@ public class CrawlerRetreiver { return fetchedCount; } + private void configureLinkFilter() { + try { + logger.info("Configuring link filter"); + + fetchUrl(crawlFrontier.peek()) + .map(linkFilterSelector::selectFilter) + .ifPresent(crawlFrontier::setLinkFilter); + } + catch (Exception ex) { + logger.error("Error configuring link filter", ex); + } + } + private boolean fetchDocument(EdgeUrl top, long crawlDelay) { logger.debug("Fetching {}", top); + long startTime = System.currentTimeMillis(); var doc = fetchUrl(top); @@ -186,10 +204,10 @@ public class CrawlerRetreiver { crawledDomainWriter.accept(d); if (d.url != null) { - EdgeUrl.parse(d.url).map(EdgeUrl::toString).ifPresent(visited::add); + EdgeUrl.parse(d.url).ifPresent(crawlFrontier::addVisited); } - if ("ERROR".equals(d.crawlerStatus)) { + if ("ERROR".equals(d.crawlerStatus) && d.httpStatus != 404) { errorCount++; } @@ -211,7 +229,6 @@ public class CrawlerRetreiver { var doc = fetchContent(top); if (doc.documentBody != null) { - doc.documentBodyHash = createHash(doc.documentBody.decode()); Optional parsedDoc = parseDoc(doc); @@ -260,37 +277,23 @@ public class CrawlerRetreiver { return Optional.of(Jsoup.parse(doc.documentBody.decode())); } - public boolean isSameDomain(EdgeUrl url) { - return domain.equalsIgnoreCase(url.domain.toString()); - } - private void findLinks(EdgeUrl baseUrl, Document parsed) { baseUrl = linkParser.getBaseLink(parsed, baseUrl); for (var link : parsed.getElementsByTag("a")) { - linkParser.parseLink(baseUrl, link).ifPresent(this::addToQueue); + linkParser.parseLink(baseUrl, link).ifPresent(crawlFrontier::addToQueue); } for (var link : parsed.getElementsByTag("frame")) { - linkParser.parseFrame(baseUrl, link).ifPresent(this::addToQueue); + linkParser.parseFrame(baseUrl, link).ifPresent(crawlFrontier::addToQueue); } for (var link : parsed.getElementsByTag("iframe")) { - linkParser.parseFrame(baseUrl, link).ifPresent(this::addToQueue); + linkParser.parseFrame(baseUrl, link).ifPresent(crawlFrontier::addToQueue); } - } - - private void addToQueue(EdgeUrl url) { - if (!isSameDomain(url)) - return; - if (urlBlocklist.isUrlBlocked(url)) - return; - if (urlBlocklist.isMailingListLink(url)) - return; - // reduce memory usage by not growing queue huge when crawling large sites - if (queue.size() + visited.size() >= depth + 100) - return; - - if (known.add(url.toString())) { - queue.addLast(url); + for (var link : parsed.getElementsByTag("link")) { + String rel = link.attr("rel"); + if (rel.equalsIgnoreCase("next") || rel.equalsIgnoreCase("prev")) { + linkParser.parseLink(baseUrl, link).ifPresent(crawlFrontier::addToQueue); + } } } @@ -314,6 +317,9 @@ public class CrawlerRetreiver { @SneakyThrows private void delay(long sleepTime, long spentTime) { + if (testFlagIgnoreDelay) + return; + if (sleepTime >= 1) { if (spentTime > sleepTime) return; @@ -355,17 +361,17 @@ public class CrawlerRetreiver { .crawlerStatus(CrawlerDocumentStatus.ERROR.name()) .build(); } - private CrawledDomain createErrorPostFromStatus(HttpFetcher.FetchResult ret) { + private CrawledDomain createErrorPostFromStatus(FetchResult ret) { String ip = findIp(domain); - if (ret.state == HttpFetcher.FetchResultState.ERROR) { + if (ret.state == FetchResultState.ERROR) { return CrawledDomain.builder() .crawlerStatus(CrawlerDomainStatus.ERROR.name()) .id(id).domain(domain) .ip(ip) .build(); } - if (ret.state == HttpFetcher.FetchResultState.REDIRECT) { + if (ret.state == FetchResultState.REDIRECT) { return CrawledDomain.builder() .crawlerStatus(CrawlerDomainStatus.REDIRECT.name()) .id(id) @@ -377,4 +383,5 @@ public class CrawlerRetreiver { throw new AssertionError("Unexpected case"); } + } diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/DomainCrawlFrontier.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/DomainCrawlFrontier.java new file mode 100644 index 00000000..5b6a35df --- /dev/null +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/DomainCrawlFrontier.java @@ -0,0 +1,99 @@ +package nu.marginalia.crawl.retreival; + +import nu.marginalia.ip_blocklist.UrlBlocklist; +import nu.marginalia.model.EdgeDomain; +import nu.marginalia.model.EdgeUrl; + +import java.util.Collection; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.Objects; +import java.util.function.Predicate; + +public class DomainCrawlFrontier { + private final LinkedList queue = new LinkedList<>(); + private final HashSet visited; + private final HashSet known; + + private final EdgeDomain thisDomain; + private final UrlBlocklist urlBlocklist; + + private Predicate linkFilter = url -> true; + + final int depth; + + public DomainCrawlFrontier(EdgeDomain thisDomain, Collection urls, int depth) { + this.thisDomain = thisDomain; + this.urlBlocklist = new UrlBlocklist(); + this.depth = depth; + + visited = new HashSet<>((int)(urls.size() * 1.5)); + known = new HashSet<>(urls.size() * 10); + + for (String urlStr : urls) { + EdgeUrl.parse(urlStr).ifPresent(this::addToQueue); + } + } + + public void setLinkFilter(Predicate linkFilter) { + this.linkFilter = linkFilter; + } + + public boolean isCrawlDepthReached() { + return visited.size() >= depth; + } + + public boolean isEmpty() { + return queue.isEmpty(); + } + public boolean addKnown(EdgeUrl url) { + return known.contains(url.toString()); + } + public void addFirst(EdgeUrl url) { + queue.addFirst(url); + } + + public EdgeUrl takeNextUrl() { + return queue.removeFirst(); + } + + public EdgeUrl peek() { + return queue.peek(); + } + + public boolean addVisited(EdgeUrl url) { + return visited.add(url.toString()); + } + + public boolean filterLink(EdgeUrl url) { + return linkFilter.test(url); + } + + public void addToQueue(EdgeUrl url) { + if (!isSameDomain(url)) + return; + if (urlBlocklist.isUrlBlocked(url)) + return; + if (urlBlocklist.isMailingListLink(url)) + return; + if (!linkFilter.test(url)) + return; + + // reduce memory usage by not growing queue huge when crawling large sites + if (queue.size() + visited.size() >= depth + 100) + return; + + if (known.add(url.toString())) { + queue.addLast(url); + } + + + } + + + public boolean isSameDomain(EdgeUrl url) { + return Objects.equals(thisDomain, url.domain); + } + + +} diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/DomainProber.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/DomainProber.java new file mode 100644 index 00000000..4b1c6413 --- /dev/null +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/DomainProber.java @@ -0,0 +1,59 @@ +package nu.marginalia.crawl.retreival; + +import nu.marginalia.crawl.retreival.fetcher.FetchResultState; +import nu.marginalia.crawl.retreival.fetcher.HttpFetcher; +import nu.marginalia.crawling.model.CrawlerDomainStatus; +import nu.marginalia.ip_blocklist.GeoIpBlocklist; +import nu.marginalia.ip_blocklist.IpBlockList; +import nu.marginalia.model.EdgeDomain; +import nu.marginalia.model.EdgeUrl; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +public class DomainProber { + private final Logger logger = LoggerFactory.getLogger(DomainProber.class); + private static IpBlockList ipBlockList; + + static { + try { + ipBlockList = new IpBlockList(new GeoIpBlocklist()); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + /** To detect problems early we do a probing request to the domain before we start crawling it properly. + * This is a HEAD, typically to the root path. We check the IP against the blocklist, we check that it + * doesn't immediately redirect to another domain (which should be crawled separately, not under the name + * of this domain). + */ + public ProbeResult probeDomain(HttpFetcher fetcher, String domain, @Nullable EdgeUrl firstUrlInQueue) { + + if (firstUrlInQueue == null) { + logger.warn("No valid URLs for domain {}", domain); + + return new ProbeResultError(CrawlerDomainStatus.ERROR, "No known URLs"); + } + + if (!ipBlockList.isAllowed(firstUrlInQueue.domain)) + return new ProbeResultError(CrawlerDomainStatus.BLOCKED, "IP not allowed"); + + var fetchResult = fetcher.probeDomain(firstUrlInQueue.withPathAndParam("/", null)); + + if (fetchResult.ok()) + return new ProbeResultOk(); + + if (fetchResult.state == FetchResultState.REDIRECT) + return new ProbeResultRedirect(fetchResult.domain); + + return new ProbeResultError(CrawlerDomainStatus.ERROR, "Bad status"); + } + + interface ProbeResult {}; + + record ProbeResultError(CrawlerDomainStatus status, String desc) implements ProbeResult {} + record ProbeResultRedirect(EdgeDomain domain) implements ProbeResult {} + record ProbeResultOk() implements ProbeResult {} +} diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/HttpRedirectResolver.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/HttpRedirectResolver.java deleted file mode 100644 index dab54670..00000000 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/HttpRedirectResolver.java +++ /dev/null @@ -1,108 +0,0 @@ -package nu.marginalia.crawl.retreival; - -import com.google.inject.Inject; -import com.google.inject.Singleton; -import com.google.inject.name.Named; -import io.reactivex.rxjava3.core.Observable; -import lombok.SneakyThrows; -import nu.marginalia.link_parser.LinkParser; -import nu.marginalia.model.EdgeUrl; -import nu.marginalia.client.exception.NetworkException; -import okhttp3.OkHttpClient; -import okhttp3.Request; -import okhttp3.Response; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.net.ssl.X509TrustManager; -import java.io.IOException; -import java.util.concurrent.TimeUnit; - -// TODO: Is this used? -@Singleton -public class HttpRedirectResolver { - private static final LinkParser linkParser = new LinkParser(); - - private final Logger logger = LoggerFactory.getLogger(getClass()); - private final String userAgent; - private final Cookies cookies = new Cookies(); - - private final OkHttpClient client = createClient(); - - @SneakyThrows - private OkHttpClient createClient() { - - return new OkHttpClient.Builder() - .sslSocketFactory(NoSecuritySSL.buildSocketFactory(), (X509TrustManager) NoSecuritySSL.trustAllCerts[0]) - .hostnameVerifier(NoSecuritySSL.buildHostnameVerifyer()) - .cookieJar(cookies.getJar()) - .followRedirects(false) - .followSslRedirects(false) - .connectTimeout(8, TimeUnit.SECONDS) - .build(); - } - - @Inject - public HttpRedirectResolver(@Named("user-agent") String userAgent) { - this.userAgent = userAgent; - } - - @SneakyThrows - public Observable probe(EdgeUrl url) { - return probe(url, 0); - } - - private Observable probe(EdgeUrl url, int depth) { - if (depth > 10) { - return Observable.error(new IllegalStateException("Too many redirects")); - } - if (!url.proto.toLowerCase().startsWith("http")) { - return Observable.empty(); - } - var head = new Request.Builder().get().addHeader("User-agent", userAgent) - .url(url.toString()) - .addHeader("Accept-Encoding", "gzip") - .build(); - - var call = client.newCall(head); - try (var rsp = call.execute()) { - return resolveRedirects(depth, url, rsp); - } catch (IOException e) { - return Observable.error(e); - } - } - - @SneakyThrows - private Observable resolveRedirects(int depth, EdgeUrl url, Response response) { - int code = response.code(); - response.close(); - - if (code < 300) { - return Observable.just(url); - } - if (code < 309) { - String newUrl = response.header("Location"); - return Observable.fromOptional(linkParser.parseLink(url, newUrl)) - .flatMap(u -> probe(u, depth + 1)); - } - if (code >= 400) { - return Observable.just(url); - } - return Observable.error(new IllegalStateException("HttpStatusCode " + code)); - } - - - private boolean failOnBadStatus(Response response) { - if (response.code() >= 400) { - response.close(); - throw new NetworkException("Bad status " + response.code()); - } - return true; - } - - public static class BadContentType extends RuntimeException { - public BadContentType(String type) { - super(type); - } - } -} diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/LinkFilterSelector.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/LinkFilterSelector.java new file mode 100644 index 00000000..aa0b58d2 --- /dev/null +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/LinkFilterSelector.java @@ -0,0 +1,61 @@ +package nu.marginalia.crawl.retreival; + +import nu.marginalia.crawling.model.CrawledDocument; +import nu.marginalia.model.EdgeUrl; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Element; + +import java.util.function.Predicate; + +public class LinkFilterSelector { + + /* With websites that run e.g. forum software or wiki software, it's + very beneficial to cherry-pick the URLs that we want to crawl to + exclude e.g. user profiles, and other similar noise. + */ + public Predicate selectFilter(CrawledDocument sample) { + + if (sample.httpStatus != 200) { + return LinkFilterSelector::defaultFilter; + } + + // Sniff the software based on the sample document + + var doc = Jsoup.parse(sample.documentBody.decode()); + var head = doc.getElementsByTag("head").first(); + if (null == head) { + return url -> true; + } + + if (isLemmy(head)) { + return url -> url.path.startsWith("/post/") || url.path.startsWith("/c/"); + } + if (isMediawiki(head)) { + return url -> url.path.startsWith("/wiki/") && !url.path.contains(":"); + } + if (isDiscourse(head)) { + return url -> url.path.startsWith("/t/") || url.path.contains("/latest"); + } + + return LinkFilterSelector::defaultFilter; + } + + public static boolean defaultFilter(EdgeUrl url) { + return true; + } + + private boolean isMediawiki(Element head) { + return head.select("meta[name=generator]").attr("content").toLowerCase().contains("mediawiki"); + } + private boolean isDiscourse(Element head) { + return head.select("meta[name=generator]").attr("content").toLowerCase().contains("discourse"); + } + private boolean isLemmy(Element head) { + for (var scriptTags : head.select("script")) { + if (scriptTags.html().contains("window.lemmyConfig")) { + return true; + } + } + return false; + } +} diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/FastTerminatingSocketFactory.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/FastTerminatingSocketFactory.java similarity index 97% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/FastTerminatingSocketFactory.java rename to code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/FastTerminatingSocketFactory.java index 8679f09d..add64e29 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/FastTerminatingSocketFactory.java +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/FastTerminatingSocketFactory.java @@ -1,4 +1,4 @@ -package nu.marginalia.crawl.retreival; +package nu.marginalia.crawl.retreival.fetcher; import javax.net.SocketFactory; import java.io.IOException; diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/FetchResult.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/FetchResult.java new file mode 100644 index 00000000..40b6d1a8 --- /dev/null +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/FetchResult.java @@ -0,0 +1,16 @@ +package nu.marginalia.crawl.retreival.fetcher; + +import lombok.AllArgsConstructor; +import lombok.ToString; +import nu.marginalia.model.EdgeDomain; + +@AllArgsConstructor +@ToString +public class FetchResult { + public final FetchResultState state; + public final EdgeDomain domain; + + public boolean ok() { + return state == FetchResultState.OK; + } +} diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/FetchResultState.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/FetchResultState.java new file mode 100644 index 00000000..846e2b4f --- /dev/null +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/FetchResultState.java @@ -0,0 +1,7 @@ +package nu.marginalia.crawl.retreival.fetcher; + +public enum FetchResultState { + OK, + REDIRECT, + ERROR +} diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcher.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcher.java new file mode 100644 index 00000000..7937736f --- /dev/null +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcher.java @@ -0,0 +1,25 @@ +package nu.marginalia.crawl.retreival.fetcher; + +import com.google.inject.ImplementedBy; +import crawlercommons.robots.SimpleRobotRules; +import lombok.SneakyThrows; +import nu.marginalia.crawl.retreival.RateLimitException; +import nu.marginalia.crawling.model.CrawledDocument; +import nu.marginalia.model.EdgeDomain; +import nu.marginalia.model.EdgeUrl; + +import java.util.List; + +@ImplementedBy(HttpFetcherImpl.class) +public interface HttpFetcher { + void setAllowAllContentTypes(boolean allowAllContentTypes); + + List getCookies(); + void clearCookies(); + + FetchResult probeDomain(EdgeUrl url); + + CrawledDocument fetchContent(EdgeUrl url) throws RateLimitException; + + SimpleRobotRules fetchRobotRules(EdgeDomain domain); +} diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/HttpFetcher.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java similarity index 94% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/HttpFetcher.java rename to code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java index cd9856cf..83308118 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/HttpFetcher.java +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/HttpFetcherImpl.java @@ -1,12 +1,12 @@ -package nu.marginalia.crawl.retreival; +package nu.marginalia.crawl.retreival.fetcher; import com.google.inject.Inject; import com.google.inject.name.Named; import crawlercommons.robots.SimpleRobotRules; import crawlercommons.robots.SimpleRobotRulesParser; -import lombok.AllArgsConstructor; import lombok.SneakyThrows; -import lombok.ToString; +import nu.marginalia.crawl.retreival.Cookies; +import nu.marginalia.crawl.retreival.RateLimitException; import nu.marginalia.crawling.model.CrawledDocument; import nu.marginalia.crawling.model.CrawlerDocumentStatus; import nu.marginalia.crawling.model.ContentType; @@ -35,7 +35,7 @@ import java.util.Optional; import java.util.concurrent.TimeUnit; import java.util.zip.GZIPInputStream; -public class HttpFetcher { +public class HttpFetcherImpl implements HttpFetcher { private final Logger logger = LoggerFactory.getLogger(getClass()); private final String userAgent; @@ -46,29 +46,15 @@ public class HttpFetcher { private final ContentTypeLogic contentTypeLogic = new ContentTypeLogic(); + @Override public void setAllowAllContentTypes(boolean allowAllContentTypes) { contentTypeLogic.setAllowAllContentTypes(allowAllContentTypes); } private final OkHttpClient client; - public enum FetchResultState { - OK, - REDIRECT, - ERROR - } - - @AllArgsConstructor @ToString - public static class FetchResult { - public final FetchResultState state; - public final EdgeDomain domain; - - public boolean ok() { - return state == FetchResultState.OK; - } - } - private static final FastTerminatingSocketFactory ftSocketFactory = new FastTerminatingSocketFactory(); + @SneakyThrows private OkHttpClient createClient(Dispatcher dispatcher, ConnectionPool pool) { var builder = new OkHttpClient.Builder(); @@ -90,25 +76,28 @@ public class HttpFetcher { } + @Override public List getCookies() { return cookies.getCookies(); } + @Override public void clearCookies() { cookies.clear(); } @Inject - public HttpFetcher(@Named("user-agent") String userAgent, Dispatcher dispatcher, ConnectionPool connectionPool) { + public HttpFetcherImpl(@Named("user-agent") String userAgent, Dispatcher dispatcher, ConnectionPool connectionPool) { this.client = createClient(dispatcher, connectionPool); this.userAgent = userAgent; } - public HttpFetcher(@Named("user-agent") String userAgent) { + public HttpFetcherImpl(@Named("user-agent") String userAgent) { this.client = createClient(null, new ConnectionPool()); this.userAgent = userAgent; } + @Override @SneakyThrows public FetchResult probeDomain(EdgeUrl url) { var head = new Request.Builder().head().addHeader("User-agent", userAgent) @@ -126,6 +115,7 @@ public class HttpFetcher { } return new FetchResult(FetchResultState.OK, requestDomain); } + catch (Exception ex) { if (url.proto.equalsIgnoreCase("http") && "/".equals(url.path)) { return probeDomain(new EdgeUrl("https", url.domain, url.port, url.path, url.param)); @@ -151,6 +141,7 @@ public class HttpFetcher { } + @Override @SneakyThrows public CrawledDocument fetchContent(EdgeUrl url) throws RateLimitException { @@ -312,6 +303,7 @@ public class HttpFetcher { } + @Override public SimpleRobotRules fetchRobotRules(EdgeDomain domain) { return fetchRobotsForProto("https", domain) .or(() -> fetchRobotsForProto("http", domain)) diff --git a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/NoSecuritySSL.java b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/NoSecuritySSL.java similarity index 96% rename from code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/NoSecuritySSL.java rename to code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/NoSecuritySSL.java index 225bea97..a52251bc 100644 --- a/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/NoSecuritySSL.java +++ b/code/processes/crawling-process/src/main/java/nu/marginalia/crawl/retreival/fetcher/NoSecuritySSL.java @@ -1,4 +1,4 @@ -package nu.marginalia.crawl.retreival; +package nu.marginalia.crawl.retreival.fetcher; import lombok.SneakyThrows; diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/HttpFetcherTest.java b/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/HttpFetcherTest.java index c9cc8b0b..f6c2f3a4 100644 --- a/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/HttpFetcherTest.java +++ b/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/HttpFetcherTest.java @@ -1,9 +1,8 @@ package nu.marginalia.crawling; import lombok.SneakyThrows; -import nu.marginalia.crawl.retreival.HttpFetcher; -import nu.marginalia.crawl.retreival.HttpRedirectResolver; import nu.marginalia.crawl.retreival.RateLimitException; +import nu.marginalia.crawl.retreival.fetcher.HttpFetcherImpl; import nu.marginalia.crawl.retreival.logic.ContentTypeLogic; import nu.marginalia.model.EdgeUrl; import org.junit.jupiter.api.Assertions; @@ -29,44 +28,15 @@ class HttpFetcherTest { @Test void fetchUTF8() throws URISyntaxException, RateLimitException { - var fetcher = new HttpFetcher("nu.marginalia.edge-crawler"); + var fetcher = new HttpFetcherImpl("nu.marginalia.edge-crawler"); var str = fetcher.fetchContent(new EdgeUrl("https://www.marginalia.nu")); System.out.println(str.contentType); } @Test void fetchText() throws URISyntaxException, RateLimitException { - var fetcher = new HttpFetcher("nu.marginalia.edge-crawler"); + var fetcher = new HttpFetcherImpl("nu.marginalia.edge-crawler"); var str = fetcher.fetchContent(new EdgeUrl("https://www.marginalia.nu/robots.txt")); System.out.println(str); } - - @Test - void resolveRedirect() throws URISyntaxException { - var fetcher = new HttpRedirectResolver("nu.marginalia.edge-crawler"); - var str = fetcher.probe(new EdgeUrl("https://www.marginalia.nu/robots.txt")); - System.out.println(str); - } - - @Test - void resolveRedirect2() throws URISyntaxException { - var fetcher = new HttpRedirectResolver("nu.marginalia.edge-crawler"); - var str = fetcher.probe(new EdgeUrl("https://www.marginalia.nu/robots.txt")).blockingFirst(); - System.out.println(str); - } - - @Test - void resolveRedirect3() throws URISyntaxException { - var fetcher = new HttpRedirectResolver("nu.marginalia.edge-crawler"); - var str = fetcher.probe(new EdgeUrl("https://www.marginalia.nu/robots.txt")); - System.out.println(str); - } - - - @Test - void resolveRedirect4() throws URISyntaxException { - var fetcher = new HttpRedirectResolver("nu.marginalia.edge-crawler"); - var str = fetcher.probe(new EdgeUrl("https://www.marginalia.nu/robots.txt")); - System.out.println(str); - } } \ No newline at end of file diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/retreival/CrawlerMockFetcherTest.java b/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/retreival/CrawlerMockFetcherTest.java new file mode 100644 index 00000000..98b8bf83 --- /dev/null +++ b/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/retreival/CrawlerMockFetcherTest.java @@ -0,0 +1,152 @@ +package nu.marginalia.crawling.retreival; + +import crawlercommons.robots.SimpleRobotRules; +import lombok.SneakyThrows; +import nu.marginalia.bigstring.BigString; +import nu.marginalia.crawl.retreival.CrawlerRetreiver; +import nu.marginalia.crawl.retreival.RateLimitException; +import nu.marginalia.crawl.retreival.fetcher.FetchResult; +import nu.marginalia.crawl.retreival.fetcher.FetchResultState; +import nu.marginalia.crawl.retreival.fetcher.HttpFetcher; +import nu.marginalia.crawling.model.CrawledDocument; +import nu.marginalia.crawling.model.CrawlerDocumentStatus; +import nu.marginalia.crawling.model.SerializableCrawlData; +import nu.marginalia.crawling.model.spec.CrawlingSpecification; +import nu.marginalia.model.EdgeDomain; +import nu.marginalia.model.EdgeUrl; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class CrawlerMockFetcherTest { + + private static final Logger logger = LoggerFactory.getLogger(CrawlerMockFetcherTest.class); + + Map mockData = new HashMap<>(); + HttpFetcher fetcherMock = new MockFetcher(); + + @AfterEach + public void tearDown() { + mockData.clear(); + } + + private void registerUrl(EdgeUrl url, String documentData) { + mockData.put(url, CrawledDocument.builder() + .crawlId("1") + .url(url.toString()) + .contentType("text/html") + .httpStatus(200) + .crawlerStatus(CrawlerDocumentStatus.OK.name()) + .documentBody(BigString.encode(documentData)) + .build()); + } + + @SneakyThrows + private void registerUrlClasspathData(EdgeUrl url, String path) { + try (var resourceStream = getClass().getClassLoader().getResourceAsStream(path)) { + if (resourceStream == null) throw new IllegalArgumentException("No such resource: " + path); + + var data = BigString.encode(new String(resourceStream.readAllBytes(), StandardCharsets.UTF_8)); + + mockData.put(url, CrawledDocument.builder() + .crawlId("1") + .url(url.toString()) + .contentType("text/html") + .httpStatus(200) + .crawlerStatus(CrawlerDocumentStatus.OK.name()) + .documentBody(data) + .build()); + } + + } + + @Test + public void testLemmy() throws URISyntaxException { + List out = new ArrayList<>(); + + registerUrlClasspathData(new EdgeUrl("https://startrek.website/"), "mock-crawl-data/lemmy/index.html"); + registerUrlClasspathData(new EdgeUrl("https://startrek.website/c/startrek"), "mock-crawl-data/lemmy/c_startrek.html"); + + new CrawlerRetreiver(fetcherMock, new CrawlingSpecification("1", 10, "startrek.website", new ArrayList<>()), out::add) + .withNoDelay() + .fetch(); + + out.forEach(System.out::println); + } + + @Test + public void testMediawiki() throws URISyntaxException { + List out = new ArrayList<>(); + + registerUrlClasspathData(new EdgeUrl("https://en.wikipedia.org/"), "mock-crawl-data/mediawiki/index.html"); + + new CrawlerRetreiver(fetcherMock, new CrawlingSpecification("1", 10, "en.wikipedia.org", new ArrayList<>()), out::add) + .withNoDelay() + .fetch(); + + out.forEach(System.out::println); + } + + @Test + public void testDiscourse() throws URISyntaxException { + List out = new ArrayList<>(); + + registerUrlClasspathData(new EdgeUrl("https://community.tt-rss.org/"), "mock-crawl-data/discourse/index.html"); + registerUrlClasspathData(new EdgeUrl("https://community.tt-rss.org/t/telegram-channel-to-idle-on/3501"), "mock-crawl-data/discourse/telegram.html"); + registerUrlClasspathData(new EdgeUrl("https://community.tt-rss.org/t/combined-mode-but-grid/4489"), "mock-crawl-data/discourse/grid.html"); + + new CrawlerRetreiver(fetcherMock, new CrawlingSpecification("1", 100, "community.tt-rss.org", new ArrayList<>()), out::add) + .withNoDelay() + .fetch(); + + out.forEach(System.out::println); + } + + class MockFetcher implements HttpFetcher { + + @Override + public void setAllowAllContentTypes(boolean allowAllContentTypes) {} + + @Override + public List getCookies() { return List.of();} + + @Override + public void clearCookies() {} + + @Override + public FetchResult probeDomain(EdgeUrl url) { + logger.info("Probing {}", url); + return new FetchResult(FetchResultState.OK, url.domain); + } + + @Override + public CrawledDocument fetchContent(EdgeUrl url) { + logger.info("Fetching {}", url); + if (mockData.containsKey(url)) { + return mockData.get(url); + } + else { + return CrawledDocument.builder() + .crawlId("1") + .url(url.toString()) + .contentType("text/html") + .httpStatus(404) + .crawlerStatus(CrawlerDocumentStatus.ERROR.name()) + .build(); + } + } + + @Override + public SimpleRobotRules fetchRobotRules(EdgeDomain domain) { + return new SimpleRobotRules(); + } + } +} diff --git a/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java b/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java index b002b1ab..7c91510a 100644 --- a/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java +++ b/code/processes/crawling-process/src/test/java/nu/marginalia/crawling/retreival/CrawlerRetreiverTest.java @@ -1,7 +1,8 @@ package nu.marginalia.crawling.retreival; import nu.marginalia.crawl.retreival.CrawlerRetreiver; -import nu.marginalia.crawl.retreival.HttpFetcher; +import nu.marginalia.crawl.retreival.fetcher.HttpFetcher; +import nu.marginalia.crawl.retreival.fetcher.HttpFetcherImpl; import nu.marginalia.crawling.model.CrawledDocument; import nu.marginalia.crawling.model.spec.CrawlingSpecification; import nu.marginalia.crawling.model.SerializableCrawlData; @@ -23,7 +24,7 @@ class CrawlerRetreiverTest { var specs = new CrawlingSpecification("1", 5, "www.marginalia.nu", new ArrayList<>()); - HttpFetcher fetcher = new HttpFetcher("test.marginalia.nu"); + HttpFetcher fetcher = new HttpFetcherImpl("test.marginalia.nu"); List data = new ArrayList<>(); diff --git a/code/processes/crawling-process/src/test/resources/mock-crawl-data/discourse/grid.html b/code/processes/crawling-process/src/test/resources/mock-crawl-data/discourse/grid.html new file mode 100644 index 00000000..5ebb6c14 --- /dev/null +++ b/code/processes/crawling-process/src/test/resources/mock-crawl-data/discourse/grid.html @@ -0,0 +1,860 @@ + + + + + Combined mode but grid - Development - Tiny Tiny RSS: Community + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Tiny Tiny RSS: Community + +
+ +
+ + + + +
+ + + +
+ +
+ +
+ +
+

horrible, huh?

+
@media screen and (min-width: 1400px) {
+		#headlines-frame {
+			display : grid;
+			grid-template-columns: repeat(2, 1fr);
+			grid-gap : 8px;
+
+			.cdm.expanded {
+				.footer {
+					border : 0;
+				}
+
+				border : 1px solid @border-default;
+			}
+		}
+	}
+}
+
+

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

I kind of like it. The concept at least.

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

You got me curious.

+

A little rough in its current form if you’re someone toggling collapsed mode. Not sure exactly what caused it, but I wound up with the page getting wider a time or two while I tried it out. I’m sure that it’s actually pretty desirable for image-heavy uses, such as your example of Reddit. Would not have guessed that it could be pulled off with just some lines of CSS, but I don’t know web dev.

+

Neat.

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

yeah this would obviously only work for expanded mode :slight_smile:

+

+

a bit more polished looking, i think.

+

e: it’s an option now.

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

i made a really primitive plugin that fakes masonry layout for the grid:

+

+

its somewhat buggy and lacks any optimization whatsoever but cool nonetheless (when it works). enjoy.

+

https://git.tt-rss.org/fox/ttrss-grid-masonry

+

e: this needs latest master, just in case.

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

So I’ve been using this view and I like it but one small issue I can’t figure out how to fix is word wrapping in the frames. Words keep getting split between lines and making things hard to read sometimes. Is that something I can fix with the custom css?

+

edit: I should have mentioned I am running the dynamic docker setup and I restarted the containers a few hours ago.

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

i should probably go easier on word-breaking in there, limit to links only or something like that.

+

https://git.tt-rss.org/fox/tt-rss/src/branch/master/themes/light/tt-rss.less#L749

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

Hi @fox,awesome to see that rush of progress!

+

I’d suggest using word-wrap: break-word; instead of word-break: break-all;. This makes words only break if they are really wider than the container. Short words are not affected. So you might then also remove the restriction to links.

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

yeah this could work for text, but for links specifically it can make things ugly (uglier?):

+

+

which is why i went with break-all.

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

Thanks for the quick reply and your explanation – I get your point. The only problematic edge-case I see with this is links at the end of a line.

+

+

But I think that’s okay.

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

Thanks for the quick fix. It’s much better. I’m really liking the grid layout with mark as read on scroll. Thanks.

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

I think it’s a nice layout to be set to feeds like Dilbert

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

Good, now add it to android app :grin:

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

Feel a bit guilty since I gave feedback and disappeared.

+

I have an image heavy feed I’m very far behind on, and your first post made me try out expanded instead of collapsed. Sped things up immensely because apparently marking posts as read when you flip through them was CPU heavy on my server.

+

After that, columns just made things even faster for me.

+

I did run into an edge case that most people probably wouldn’t have even noticed, where a headline would be top-to-bottom in one column at 1080p, so I ham-fisted some CSS to set a max-height on titleWrap.

+

Thanks so much for this feature, fox. Didn’t even know I needed it.

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

Sorry to resurrect this old thread. I’ve really grown to love grid mode but I noticed the article headlines sometimes break words on to separate lines. It seems to happen with all themes. I don’t have the masonry plugin active either.

+

I’m using the stock docker setup.

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+ +

yeah it’s a stock CSS thing, it’s either that or super long words breaking layout. i’m not fond of either behaviors. :frowning:

+

there are several rules like this in the .less files:

+
word-break : break-all;
+
+

ideas welcome, etc.

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

Thanks for confirming. I will take the occassional headline issue over breaking the layout. I’ll use this as a chance to learn something and if I come up with a fix I’ll let you know.

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+

How do I activate this? I installed the Plugin and set the checkmark, but nothing happens.

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ + + + + + +
+ + + + + + + diff --git a/code/processes/crawling-process/src/test/resources/mock-crawl-data/discourse/index.html b/code/processes/crawling-process/src/test/resources/mock-crawl-data/discourse/index.html new file mode 100644 index 00000000..73249e9c --- /dev/null +++ b/code/processes/crawling-process/src/test/resources/mock-crawl-data/discourse/index.html @@ -0,0 +1,1279 @@ + + + + + Tiny Tiny RSS: Community + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Tiny Tiny RSS: Community + +
+ +
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TopicRepliesViewsActivity
+ + + + + 1 + + 3288 + + May 15, 2020 +
+ + + + + + + + + + + + + + + + + 17 + + 1083 + + June 24, 2023 +
+ + + + + + + + 4 + + 191 + + June 22, 2023 +
+ + + + + + + + 6 + + 297 + + June 18, 2023 +
+ + + + + + + + 9 + + 239 + + June 14, 2023 +
+ + + + + + + + 2 + + 232 + + June 14, 2023 +
+ + + + + 0 + + 190 + + June 13, 2023 +
+ + + + + + + + 1 + + 243 + + March 25, 2023 +
+ + + + + 0 + + 1508 + + June 3, 2022 +
+ + + + + + + + 1 + + 197 + + June 5, 2023 +
+ + + + + + + + + + + + + + 30 + + 10292 + + June 4, 2023 +
+ + + + + + + + + + + 4 + + 838 + + November 21, 2018 +
+ + + + + + + + 3 + + 195 + + June 1, 2023 +
+ + + + + + + + 1 + + 160 + + May 31, 2023 +
+ + + + + + + + 1 + + 203 + + May 28, 2023 +
+ + + + + + + + + + + 6 + + 1429 + + May 26, 2023 +
+ + + + + + + + + + + + + + + + + 13 + + 3447 + + May 26, 2023 +
+ + + + + + + + + + + + + + 15 + + 836 + + May 26, 2023 +
+ + + + + + + + 9 + + 496 + + May 21, 2023 +
+ + + + + + + + + + + 6 + + 302 + + May 12, 2023 +
+ + + + + + + + 1 + + 252 + + May 11, 2023 +
+ + + + + + + + + + + + + + + + + 10 + + 652 + + May 8, 2023 +
+ + + + + + + + 2 + + 271 + + May 7, 2023 +
+ + + + + 0 + + 181 + + May 5, 2023 +
+ + + + + + + + + + + + + + 3 + + 506 + + May 4, 2023 +
+ + + + + + + + 1 + + 158 + + May 1, 2023 +
+ + + + + + + + + + + + + + 8 + + 2226 + + October 20, 2020 +
+ + + + + + + + + + + 2 + + 284 + + April 28, 2023 +
+ + + + + + + + + + + + + + + + + 27 + + 5482 + + April 24, 2023 +
+ + + + + + + + + + + + + + + + + 16 + + 891 + + April 24, 2023 +
+
+ + + + + + + + + +
+ + + + + + + diff --git a/code/processes/crawling-process/src/test/resources/mock-crawl-data/discourse/telegram.html b/code/processes/crawling-process/src/test/resources/mock-crawl-data/discourse/telegram.html new file mode 100644 index 00000000..d76e6320 --- /dev/null +++ b/code/processes/crawling-process/src/test/resources/mock-crawl-data/discourse/telegram.html @@ -0,0 +1,230 @@ + + + + + Telegram channel to idle on - Everything else - Tiny Tiny RSS: Community + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Tiny Tiny RSS: Community + +
+ +
+ + + + +
+ + + +
+ +
+ +
+ +
+

Here: Telegram: Contact @TinyTinyRSS

+

Now there’s a place to post on if something on tt-rss.org doesn’t work right.

+
+ +
+ + + +
+ +
+ + +
+ +
+
+ +
+ +
+ +
+ + + +
+ +
+ + +
+ +
+
+ + + + + + +
+ + + + + + + diff --git a/code/processes/crawling-process/src/test/resources/mock-crawl-data/lemmy/c_startrek.html b/code/processes/crawling-process/src/test/resources/mock-crawl-data/lemmy/c_startrek.html new file mode 100644 index 00000000..90f1f0be --- /dev/null +++ b/code/processes/crawling-process/src/test/resources/mock-crawl-data/lemmy/c_startrek.html @@ -0,0 +1,546 @@ + + + + + + + + + + + + + + Star Trek - Star Trek: Website + + + + + + + + + + + + + + + + + + + + + +
Star Trek
!startrek
help-circle
rss

Episode Discussion | Star Trek: Strange New Worlds | 2x02 “Ad Astra Per Aspera”
pin
pin
::: spoiler Logline + Commander Una Chin-Riley faces court-martial along with possible imprisonment and dishonorable dismissal from Starfleet, and her defense is in the hands of a lawyer who’s also a childhood friend with whom she had a terrible falling out. + ::: + + --- + + Written by Dana Horgan + + Directed by Valerie Weiss

![](https://i.ytimg.com/vi/6r9HSrWvNJ0/hqdefault.jpg) + + In the scene between Q and Jean-Luc Picard, Q delivers an amazingly stirring monologue about the importance of one person, saying "Must it always have galactic import? Universal stakes? Celestial upheaval? Isn't one life enough?" This line is an impressive reminder that sometimes, saving a single individual can be just as important as saving a galaxy. + + In this moment, we see a different side of Q - one that is vulnerable and sincere. Or was that his true side all along? The scene impresses the deep friendship between Q and Jean-Luc, as Q reveals his true motivation - to heal Jean-Luc's heart. + + Overall, this scene is such a beautiful and touching moment that adds even more depth to these two already well-developed characters. It reminds me of the power of friendship and the importance of valuing individual lives. + + "Humans. Your griefs, your pains, fix you to moments in the past long gone. You’re like butterflies with your wings pinned”. There is so much food for thought in this piece of wisdom alone. Every sentence in this scene has its place, has meaning beyond the words expressing it. Note also the stunning, subtle yet powerful soundtrack. + + This scene and the one to follow is so well paced and delivered by John de Lancie and Patrick Stewart, it will stick with me for many years to come. I have rewatched it over and over. It alone makes the entire 3rd season worthwhile. + + May we all remember to say this to someome from our heart. Almost nothing means more than that: + + "You matter to me."
fedilink

“Undetermined” language seems to prevent seeing content in other languages.
As a noob this is likely me, but I tried to start a community on another server and I can't see the post (my other account used to make said community) made and set as english. Unless it's just me being a noob, that can't be good. A cursory look seems to confirm that I can't see anything aside from "Undetermined".

Overthinking Number One’s fate in SNW S1E10 (in light of S2E2)
Spoilers for both episodes: + + In the alternate future depicted in "A Quality of Mercy", Una has been in prison for the past seven years. In the main timeline, we now know that Una wins her trial and doesn't serve any prison time. How do we account for the fact that the same events led to two different outcomes? + + This bugged me for hours yesterday. But after some thought and time, I think a solution exists! + + We know, in the Prime timeline, that Pike literally risked his life to visit Neera (the Illyrian attorney). Her skills, and devotion to Illyrian rights, ultimately win the case. Therefore, in the alternate future, we must conclude that *Pike did not take this same action*. + + The only difference between the two version of Pike, with regard to Una's trial anyway, is that the prime Pike had already seen the alternate future. He *knew* that if something drastic didn't happen, Una would lose and be imprisoned. This is why he sought an outspoken attorney from outside Starfleet. + + It's interesting, then, to consider the fact that Una's victory was indirectly caused by Pike seeing the future - which was in turn caused by his first glimpse of the canon future back in Discovery. And if Una was fated to lose the trial without the interference of future knowledge, will this change have ramifications too? Is SNW now branched off of the original canon into its own timeline? + + (Personally, I hope so! My wish is that SNW diverges from the Prime timeline, and finds a way for Pike to escape his fate without causing disaster. And that Spock & Chapel end up together.)
fedilink

“There is nothing wrong with you Lieutenant, no hidden monster inside.” - SNW Spoilers
So that is a vague title, but this is obviously spoiler related but it also felt fitting with what this post is about. I am going to be a corny emotional potato right now and I really needed this off my chest. + + Also before you go reading my post and complain that is now what you want, it is mainly a self reflection post relating to being trans, transphobia, modern society etc. So if its not your cup o tea, gotcha but yeah. + Also unsurprisingly in relation to that, yes I am trans. And I guess I should warn this post will deal with some societal issues, self loathing, emotions yada yada. + + So even last season I felt that I could relate to Una's story, having to hide her identity as Illyrian because society deemed it illegal. We saw the anger and fear from La'an when she felt deceived alongside her own identity issues with being related to Noonien Singh. + Now of course augments aren't a direct 1 - 1 the same as trans people and I am not trying to make that claim either. But I did read parts of it as allegorical for trans experiences, and I think its also partially intentional. I remember being a bit teary seeing La'an and Una's fight and everything else. + + Then we got this absolutely amazing episode dealing with the aftermath. Of course we knew she would be safe but the journey there was what is interesting and this episode blew it out of it all. + + Already seeing the way her identity was used and presented as some form of deception, as if she had lied to others and that it was harmful rings a very clear bell with both past histories of laws with lgbtq people but still do today. While I do not live in the US, seeing the laws presented these days, the moral panic and continuous attack made on trans people I see a clear connection. And even if I am not suffering under them, I still fear both for friends who do and the worry that those same laws might come to my country. We can't choose to be born like this, we never had that choice and yet it's argued that it should be used to exclude trans people. It leads to a lot of pain. + + Knowing how laws where discussed, might even have passed of trans children being taken from their parents if doctors reported them under arguments of child abuse if they went in to health care providers made me think of the way Una could not get health care for the fear of being arrested with her family. This is of course most likely relatable aswell to a lot of other minorities. The whole passing argument how people where divided in two cities, and those that passed could live in society as everyone else, if they kept their identity hidden and if you didn't passed you would face persecution, hatred etc. + + Not to mention when they mentioned all the slurs used, I found so many points where I could change the word augment for trans related things and it painfully made sense. + + There was so much here but in the end that is not why I made this post, what actually made me just break down crying and hit so hard in feeling that I was told my own life was okay was this discussion between Neera and La'an. + + >Counselor: There is nothing wrong with you Lieutenant, no hidden monster inside. But I do know how they make us feel. They look down at us for so long that we begin to look down at ourselves. Genetics is not our destiny despite what you may have been taught. The fear of yourself it’s not your own. It was drilled into you. You’re not born a monster. You were just born with a capacity for actions, good or ill. Just like the rest of us… + + While we talk about augments here again, there is a clear correlation with the notion of that self loathing that comes with being part of something you have no control over. That fear of being discriminated against but also the way people talk about you.. + + I have felt like a monster before for being trans. Same with freak, and a lot of other words. Both being told by strangers but also familiar people's reactions. Feeling like I was damaging their lives, feeling like I robbed my parents of the child they thought I had. The deceiver, liar. I still feel like that a lot of the time. And then just hearing those words, despite the varying context it helped. I felt a bit more at peace, or relieved to see that sort of affirmation on screen. + + I don't know how many times I have feared my own existence, or worried what others will think or react. To feel lesser, faker but also potentially dangerous. I don't even know how to explain what it made me feel but I felt seen. I felt seen for the first time in forever on screen and it makes me feel less wrong. Despite everything bad in the world there still exists some good, some people that still see how wrong things can be, and I am happy Star Trek remains there, always optimistic about it despite the hardships. + + This turned into a weird rambling post, I am not sure it made a lot of sense but I needed to share/vent somewhere. + ::: spoiler spoiler + ___ + ::: +

That article speculates that elements of Paramount (including CBS Studios, which produces Trek these days) could be sold off to other companies, perhaps even Netflix. This, however, doesn't get into the hairy discussion surrounding rights to the franchise.
fedilink

What’s Your Favorite Star Trek Practical Effect Species
From Wikipedia, "A practical effect is a special effect produced physically, without computer-generated imagery or other post-production techniques." + + Some of my favorite practical effect species are from TOS. There's the Horta from TOS "The Devil in the Dark" s1e25. I was about 9 years old when I first saw the Horta episode. Old enough to know it wasn't real, but young enough to be utterly fascinated by something that looked like an angry chunk of hot lava making its way around on the ground. + + TOS's the Salt Vampire's look scared little kid me. Truth be told, the way it looks still gives me the creeps. + + The Gorn (TOS "Arena" s1e18) didn't scare me, but I thought it was intimidating. Sure, it looks like somebody in a lizard man suit. But, that Gorn had a knife! And, was trying to kill Kirk! Bonus points for the Gorn's sparkly, stylish armor. + + I think tribbles are just adorable.

Watching SNW S2E2, I couldn’t help but contrast it with Discovery … my thoughts.
*Reposting this from the SNW S2E2 thread as it was removed by a mod for being “off topic”.* + + ::: spoiler SNW S2E2 spoilers and a Discovery critical perspective + So I’m not the biggest fan of Discovery. I would say I’ve found it a disappointment and I’m sure I’m not alone in this. I don’t want to convince anyone here of this or even get into the arguments, in part because there’s still a lot I’ve liked about the show and what they tried and the fact that it ushered in more trek! + + What I did want to talk about, just in case anyone finds it interesting or agrees … is that this last episode of SNW (S2 ep 2, *ad astra per aspera*) feels like a perfect demonstration of what Discovery was missing. + + Sure, using a court trial as a vehicle is a bit tropy, but for a reason, it works. The story and premise of the trial, while not particularly deep or even well rooted in character, worked. It made sense, had human and political plot elements to it and was delivered well most importantly … all of which is what, IMO, Discovery often lacked and instead would often just cross the line into being on the nose. + + I don’t want to be negative against Discovery here. It is what it is and has its fans. I just want to express as someone who didn’t vibe with Discovery that this is what was missing for me, and I’m very pleased to have SNW! + + *Added to original post after removed* + + Watching the episode it felt like writers etc had reflected on Discovery and wanted to do the progressive, ethical stuff differently, and maybe they were trying to do it better too. + + IMO, what the writers managed to pull off was successfully weaving personal stories and inter personal dynamics with the ethical issue, which, in combination with the court room drama structure, allowed the issue to be explored and unravelled organically. From what I’ve gathered from my own reflections and speaking to others about Discovery, part of the difficulties some of us have had with it is its tendency to resort to speeches/monologues to digest dilemmas. For someone like me, it was tonally off putting, because it took away my ability to feel like I was exploring the issue myself either sympathetically with individual characters or logically/philosophically. + + With this episode, part of the reason it works, IMO is that Una’s trial takes us through the issue, not any one perspective, character or speech, demonstrating each character’s personal connections and biases while also allowing the issues to stay in focus. + + Plus, it was cool to see Neera being a badass lawyer! Maybe I just like legal dramas too much!! + + Thoughts? Am I being too harsh on Discovery? + :::

DS9 S6E11 - Waltz
I've been watching through ds9 for the first time and the show has always been good, even the first season I thought started fairly strong. And season 6 (as I've said in another post) has started really strong, with an actual deviation from the show's status quo that lasts longer than one episode. + + But holy shit, this might be one of my favorite episodes. Not just of ds9, or star trek in general, but maybe of any show I've seen. And as far as I'm aware people barely talk about it. + + The whole episode is essentially Dukat's facade of composure slowly breaking down. He's always been an interesting villain, and at times his insistence that he's doing his best can almost convince you that he might not entirely be the bad guy; that maybe he is, in some way, trying to do good. + + And a lesser show might try to redeem him. But not ds9. Millions died under his command, people were sent to the deaths, and as long as he tries defend his actions in any way there's no room for redemption. + + This episode finally breaks down all of his fake pretenses of helping Bajor, or trying his best to "rule with a softer hand." Finally his deception is broken down and reveals what he has always been. And it's done in the best way possible. + + The scene where he finally breaks down is fantastic. No music, just Marc Alaimo acting his ass off. The panning back and forth between him talking to sisko and talking to the various voices in his head... + + "And that is why you're not an evil man?"
fedilink






Narrated by the plain and simple tailor himself.

PSA: Jeri - Seven of Nine - Ryan is active on the fediverse!
Just in case you didn't know ... she's on mastodon, has probably been here longer than you, and is active (she once actually replied to me!!!!!). + + https://mastodon.world/@JeriLRyan

June 2023 Star Trek ebook deals
For those not already familiar, Simon & Schuster offers a monthly ebook promotion with an array of Treklit across shows and eras. + + It’s a great way to dip your toe into the Litverse at modest cost. + + Recommended price is $ 0.99 in the USA through the major ebook sellers, with similar pricing offers in Canada and some other countries. + + This month’s selection includes the Destiny trilogy from the Relaunch novelverse. It’s David Mack’s excellent alpha to omega story of The Borg, featuring the Enterprise under Picard, the Titan under Riker, and the Aventine under newly promoted Captain Ezri Dax, with a Voyager cameo as well. + + There’s also the Worlds of DS9 series, some favourites from the TOS and TNG eras from Greg Cox, Christopher L Bennett and Michael Jan Friedman as well as tie-in books to the new series. + + This month’s offer is available until July 3rd




As much as I appreciate all that (vintage) Star Trek was trying to do, handling of a lot of women’s issues were problematic or nonexistent.
I'm making my way through DS9 for the first time (almost finished with season two). For reference, I've seen TOG, TNG, all the new movies, Discovery, Lower Decks, and SNW. + DS9 season one was a little slow to pick up, but my husband and I are loving season two. As always, Star Trek addresses a lot of hard hitting issues in very nuanced ways. It's also great continuously seeing fantastic representation in even older episodes (mainly women and people of colour in prominent and varied positions). + + But when we get to more women specific issues, things are seriously lacking. Basically, almost any scene with the Ferengis interacting with women are problematic. I understand that they are a highly misogynistic race, but my discomfort and annoyance is with how others react to them. The Grand Nagus sexually assaulting women, especially Kira, is never actually fully addressed. It's just seen as an annoyance to be put up with. Quark is often seen as aggressively pursuing women, often with unwanted touches and other advances. Even if it's at first the women in question is disinterested, they relent in some way, either completely or they tell him something flattering/of chances in the future. Thankfully, so far, none of the main women characters have given into his advances, but it's common for one off and guest characters to somehow be "charmed" by him, even those from more egalitarian civilizations (eg: Vulcan). + + I know DS9 was made prior to #MeToo, but it's still worth pointing out where Star Trek can and have improved (thankfully newer Trek has less of these issues), and when some of the portrayals in the past are rather inconsistent with the philosophies of the Federation. Of course, the Federation operates with a level of cultural relativism, but it still doesn't make sense to me how the Grand Nagus (and Ferengis in general) were allowed to assault and harass those outside of their culture/species on an interstellar, multi-cutural, Federation run space station. Star Trek in the 90s was trying to a lot of good, but watching it now, there's definitely quite a few aspects that are unsatisfying. +
fedilink

Is the new episode of [@startrek](https://startrek.website/c/startrek) [#StrangeNewWorlds](https://mastodon.cloud/tags/StrangeNewWorlds) already streaming in México? + + Or should I go to the 🏴‍☠️🌊🏖️???
fedilink


Instance Meta - Is this instance getting flooded with spam bot accounts?
So, lemmy seems to be flooded with spam bot accounts at the moment. Look through the table of servers on fedidb (https://fedidb.org/software/lemmy) and notice how there are these huge instances without any active users (MAU). + + Also notice how `startrek.website` has 9000 users for 276 active users this month. + + From memory, when I signed up, there was no email requirement or captcha or anything. + + Admins ... maybe you want to tighten things up?





SPOILER WARNING: Carol Kane interview: how she joined the Trek world, how she feels being a newcomer, and some details about Pelia
**SPOILER WARNING.** Unless I missed some details from the episode 1 of SNW season 2, Kane reveals some details of her character that are not in episode 1 yet. + + You have been warned. + + Edit: missed a word
fedilink






My wife and I are watching through TNG, on S3 now, and I kinda like Wesley.
What's the opinion on his character these days? I know he has a reputation of being very disliked.
fedilink


One actor you wish would do Trek
I know it's been done to death all over the internet but not here yet! + + If you could choose any actor or actress to be in Star Trek who would you choose? + + I recently saw that Nic Cage said he was a Trekkie and I'd absolutely love it if he played an insane alien villain of some sort for an episode or two. Get him in star trek legacy as a crazy ferenghi or something.

Re: Images not loading on StarTrek.website - UPDATE, WORKING AGAIN
We are aware images aren’t loading and new images can't be uploaded. It appears to be a bug with the latest Lemmy update as we’re not the only instance with this problem. We have filed a bug report and are looking into possible solutions and hopefully it will get fixed soon. We are also looking into better ways to communicate server issues like this. Thank you for your understanding! We are learning this alongside all of you. + + EDIT: If anyone out there has an idea to the cause please feel free to shoot me a DM! + + EDIT 2: We're back in action folks! Thanks for your patience, you may feel free to set avatars now.


Star Trek
!startrek
    Create a post

    r/startrek: The Next Generation

    +

    Star Trek news and discussion. No slash fic…

    +

    Maybe a little slash fic.

    +
    +

    Rules

    +
      +
    1. +

      Be constructive: explain your thoughts and opinions.

      +
    2. +
    3. +

      Be welcoming: this is an IDIC community.

      +
    4. +
    5. +

      Be truthful: Keep it factual, and don’t speak for others.

      +
    6. +
    7. +

      Be nice: Don’t be a jerk.

      +
    8. +
    9. +

      Spoiler tag stuff that’s less than a week old, and keep spoilers for said stuff out of the post title.

      +
    10. +
    +
    +

    Upcoming Episodes

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    DateEpisodeTitle
    06-22SNW 2x02“Ad Astra Per Aspera”
    06-29SNW 2x03“Tomorrow and Tomorrow and Tomorrow”
    07-06SNW 2x04“Among the Lotus Eaters”
    07-13SNW 2x05“Charades”
    07-20SNW 2x06“Lost In Translation”
    +
    +

    In Production

    +

    Discovery (2024)

    +

    Lower Decks (2023)

    +

    Prodigy (TBA)

    +

    Strange New Worlds

    +

    In Development

    +

    Section 31

    +

    Starfleet Academy

    +
    +

    Wondering where to stream a series? Search here.

    +
    +
    • 40 users online
    • 103 users / day
    • 544 users / week
    • 903 users / month
    • 903 users / 6 months
    • 4.52K subscribers
    • 160 Posts
    • 2.74K Comments
    • Modlog
    + + + diff --git a/code/processes/crawling-process/src/test/resources/mock-crawl-data/lemmy/index.html b/code/processes/crawling-process/src/test/resources/mock-crawl-data/lemmy/index.html new file mode 100644 index 00000000..1a92c0bf --- /dev/null +++ b/code/processes/crawling-process/src/test/resources/mock-crawl-data/lemmy/index.html @@ -0,0 +1,726 @@ + + + + + + + + + + + + + Star Trek: Website - The new home of r/StarTrek, r/DaystromInstitute and *sigh* ...r/Risa + + + + + + + + + + + + + + + + + + + + + +
    help-circle
    rss

    Episode Discussion | Star Trek: Strange New Worlds | 2x02 “Ad Astra Per Aspera”
    pin
    pin
    ::: spoiler Logline + Commander Una Chin-Riley faces court-martial along with possible imprisonment and dishonorable dismissal from Starfleet, and her defense is in the hands of a lawyer who’s also a childhood friend with whom she had a terrible falling out. + ::: + + --- + + Written by Dana Horgan + + Directed by Valerie Weiss

    StarTrek.website - Lemmy info, FAQ, Patreon info, future plans, and more!
    pin
    ![](https://startrek.website/pictrs/image/590456a7-0f95-4e61-968a-c688dd564033.jpeg) + + *nuqneH!* + + Welcome to our new home in the Fed~~eration~~*iverse*. First of all- WOW we did not expect to surpass 300 users on this instance (and over 1K fediverse subscribers) within our first 48 hours and with little promotional effort. We are all excited to see where this long road goes. + + # Coming from Reddit and confused about Lemmy? + + I had some stuff typed out, but honestly, [this thread](https://lemmy.world/post/37906) sums it up better. Check it out! It has infographics. + + If you're still stressed out, remember that **Lemmy is still new**. Yes, it's ugly, but people said Reddit was ugly too (both are correct). As Lemmy grows, and #Rexxit continues, more tools will get made. Decentralization opens up a lot of possibilities we didn't have before. The future is bright. + + # Will other communities be setting up shop besides StarTrek, DaystromInstitute and Risa? + + Yes! Eventually. Right now our focus is staying online, fast, and reliable which means keeping things focused while we find our footing. Daystrom, StarTrek and Risa were chosen to start off with because the three of them cover the "srs bsns ↔ shitposting" spread quite well. + + If you are part of a community interested in being hosted on startrek.website, [send me a DM](https://startrek.website/create_private_message/recipient/617) and we can try to work something out. + + # *Qapla'!* How can I support? + + We've started a Patreon here: [Patreon.com/treksite](https://www.patreon.com/treksite/). There's only one plan and it's just $4. If our growth continues like it has, we're going to need to upgrade our hosting *very* soon. + + # I'm having trouble creating an account, the box just keep spinning + + There's a known bug with Lemmy when sometimes accounts get "half created". You can try again with a new username, or if you're really connected to it, contact [@williams_482@startrek.website](https://startrek.website/u/williams_482) and they can manually add it (when they have time). + + --- + + If you have any questions for the team, please don't hesitate to ask in this thread (and yes the discussion thread for the Strange New Worlds season premiere will be up on [/c/StarTrek](https://startrek.website/c/startrek) later today, stay tuned!)🖖

    Why was the Galaxy class saucer separation ability so rarely used?
    The Galaxy class starship was designed with the ability to separate the saucer from the stardrive section, so that the "floating city" part of the ship could be left somewhere safe while the rest of the ship galavants off to do something risky. We see this happen precisely once, in the season one episode *Arsenal of Freedom*. We also see saucer separation deployed for a handful of tactical and or emergency uses (such as against the Borg in *The Best of Both Worlds*, or to escape the breaching warp core in *Generations*). + + So, this seems like a useful ability to have, and the Enterprise is constantly being sent into dangerous situations. Why not use this ability more frequently?

    ![](https://i.ytimg.com/vi/6r9HSrWvNJ0/hqdefault.jpg) + + In the scene between Q and Jean-Luc Picard, Q delivers an amazingly stirring monologue about the importance of one person, saying "Must it always have galactic import? Universal stakes? Celestial upheaval? Isn't one life enough?" This line is an impressive reminder that sometimes, saving a single individual can be just as important as saving a galaxy. + + In this moment, we see a different side of Q - one that is vulnerable and sincere. Or was that his true side all along? The scene impresses the deep friendship between Q and Jean-Luc, as Q reveals his true motivation - to heal Jean-Luc's heart. + + Overall, this scene is such a beautiful and touching moment that adds even more depth to these two already well-developed characters. It reminds me of the power of friendship and the importance of valuing individual lives. + + "Humans. Your griefs, your pains, fix you to moments in the past long gone. You’re like butterflies with your wings pinned”. There is so much food for thought in this piece of wisdom alone. Every sentence in this scene has its place, has meaning beyond the words expressing it. Note also the stunning, subtle yet powerful soundtrack. + + This scene and the one to follow is so well paced and delivered by John de Lancie and Patrick Stewart, it will stick with me for many years to come. I have rewatched it over and over. It alone makes the entire 3rd season worthwhile. + + May we all remember to say this to someome from our heart. Almost nothing means more than that: + + "You matter to me."
    fedilink

    “Undetermined” language seems to prevent seeing content in other languages.
    As a noob this is likely me, but I tried to start a community on another server and I can't see the post (my other account used to make said community) made and set as english. Unless it's just me being a noob, that can't be good. A cursory look seems to confirm that I can't see anything aside from "Undetermined".

    Why did Grilka’s retainer say Worf’s house was dishonored?
    In "Looking for Par'Mach in All the Wrong Places", Grilka's senior retainer (Tumek) takes Worf aside and tells him that he cannot pursue Grilka, as he is from a dishonored house. But hadn't Gowron cleared the House of Mogh? I feel like I'm missing or forgetting something obvious. Otherwise, my best explanation would be that although the House of Mogh was formally cleared, it was still regarded with suspicion by more conservative Klingons.

    Overthinking Number One’s fate in SNW S1E10 (in light of S2E2)
    Spoilers for both episodes: + + In the alternate future depicted in "A Quality of Mercy", Una has been in prison for the past seven years. In the main timeline, we now know that Una wins her trial and doesn't serve any prison time. How do we account for the fact that the same events led to two different outcomes? + + This bugged me for hours yesterday. But after some thought and time, I think a solution exists! + + We know, in the Prime timeline, that Pike literally risked his life to visit Neera (the Illyrian attorney). Her skills, and devotion to Illyrian rights, ultimately win the case. Therefore, in the alternate future, we must conclude that *Pike did not take this same action*. + + The only difference between the two version of Pike, with regard to Una's trial anyway, is that the prime Pike had already seen the alternate future. He *knew* that if something drastic didn't happen, Una would lose and be imprisoned. This is why he sought an outspoken attorney from outside Starfleet. + + It's interesting, then, to consider the fact that Una's victory was indirectly caused by Pike seeing the future - which was in turn caused by his first glimpse of the canon future back in Discovery. And if Una was fated to lose the trial without the interference of future knowledge, will this change have ramifications too? Is SNW now branched off of the original canon into its own timeline? + + (Personally, I hope so! My wish is that SNW diverges from the Prime timeline, and finds a way for Pike to escape his fate without causing disaster. And that Spock & Chapel end up together.)
    fedilink

    “There is nothing wrong with you Lieutenant, no hidden monster inside.” - SNW Spoilers
    So that is a vague title, but this is obviously spoiler related but it also felt fitting with what this post is about. I am going to be a corny emotional potato right now and I really needed this off my chest. + + Also before you go reading my post and complain that is now what you want, it is mainly a self reflection post relating to being trans, transphobia, modern society etc. So if its not your cup o tea, gotcha but yeah. + Also unsurprisingly in relation to that, yes I am trans. And I guess I should warn this post will deal with some societal issues, self loathing, emotions yada yada. + + So even last season I felt that I could relate to Una's story, having to hide her identity as Illyrian because society deemed it illegal. We saw the anger and fear from La'an when she felt deceived alongside her own identity issues with being related to Noonien Singh. + Now of course augments aren't a direct 1 - 1 the same as trans people and I am not trying to make that claim either. But I did read parts of it as allegorical for trans experiences, and I think its also partially intentional. I remember being a bit teary seeing La'an and Una's fight and everything else. + + Then we got this absolutely amazing episode dealing with the aftermath. Of course we knew she would be safe but the journey there was what is interesting and this episode blew it out of it all. + + Already seeing the way her identity was used and presented as some form of deception, as if she had lied to others and that it was harmful rings a very clear bell with both past histories of laws with lgbtq people but still do today. While I do not live in the US, seeing the laws presented these days, the moral panic and continuous attack made on trans people I see a clear connection. And even if I am not suffering under them, I still fear both for friends who do and the worry that those same laws might come to my country. We can't choose to be born like this, we never had that choice and yet it's argued that it should be used to exclude trans people. It leads to a lot of pain. + + Knowing how laws where discussed, might even have passed of trans children being taken from their parents if doctors reported them under arguments of child abuse if they went in to health care providers made me think of the way Una could not get health care for the fear of being arrested with her family. This is of course most likely relatable aswell to a lot of other minorities. The whole passing argument how people where divided in two cities, and those that passed could live in society as everyone else, if they kept their identity hidden and if you didn't passed you would face persecution, hatred etc. + + Not to mention when they mentioned all the slurs used, I found so many points where I could change the word augment for trans related things and it painfully made sense. + + There was so much here but in the end that is not why I made this post, what actually made me just break down crying and hit so hard in feeling that I was told my own life was okay was this discussion between Neera and La'an. + + >Counselor: There is nothing wrong with you Lieutenant, no hidden monster inside. But I do know how they make us feel. They look down at us for so long that we begin to look down at ourselves. Genetics is not our destiny despite what you may have been taught. The fear of yourself it’s not your own. It was drilled into you. You’re not born a monster. You were just born with a capacity for actions, good or ill. Just like the rest of us… + + While we talk about augments here again, there is a clear correlation with the notion of that self loathing that comes with being part of something you have no control over. That fear of being discriminated against but also the way people talk about you.. + + I have felt like a monster before for being trans. Same with freak, and a lot of other words. Both being told by strangers but also familiar people's reactions. Feeling like I was damaging their lives, feeling like I robbed my parents of the child they thought I had. The deceiver, liar. I still feel like that a lot of the time. And then just hearing those words, despite the varying context it helped. I felt a bit more at peace, or relieved to see that sort of affirmation on screen. + + I don't know how many times I have feared my own existence, or worried what others will think or react. To feel lesser, faker but also potentially dangerous. I don't even know how to explain what it made me feel but I felt seen. I felt seen for the first time in forever on screen and it makes me feel less wrong. Despite everything bad in the world there still exists some good, some people that still see how wrong things can be, and I am happy Star Trek remains there, always optimistic about it despite the hardships. + + This turned into a weird rambling post, I am not sure it made a lot of sense but I needed to share/vent somewhere. + ::: spoiler spoiler + ___ + ::: +

    Why are Starfleet consoles apparently stuffed with rocks?
    We've seen it many, many times: the ship gets into a firefight, takes a few hits, shakes around, and consoles explode (possibly taking an unfortunate ensign with them). Eventually the battle is resolved with our heroes largely intact if somewhat shaken up. If it was a particularly nasty battle, there will be signs of damage: scorches on the walls, deformed equipment, busted lights, and rocks scattered about. + + All of that seems reasonable... except the rocks, which look pretty out of place in a spiffy 24th century starship. So why are they there?

    RSVP to Disco and Prodigy
    fedilink

    That article speculates that elements of Paramount (including CBS Studios, which produces Trek these days) could be sold off to other companies, perhaps even Netflix. This, however, doesn't get into the hairy discussion surrounding rights to the franchise.
    fedilink

    Annotations for Star Trek: Strange New Worlds 2x02: “Ad Aspera Per Astra” (SPOILERS)
    What amazes me most about this episode is that it’s a *Star Trek* legal episode that doesn’t want to make me tear my hair out. Thankfully they kept the trial procedure to its most basic. + + The title means loosely, in Latin, “To the Stars Through Hardship/Difficulties,” or “A Rough Road Leads to the Stars”. It is the motto of the state of Kansas, can be found on NASA’s Apollo I memorial, and also in-universe the motto of the United Earth Starfleet in ENT. + + As a child, Una suffered a serious leg injury, but her father refused to take her to the hospital for fear of the doctors discovering her modifications. We see the open wound glowing, like Una did when manifesting her abilities in SNW: “Ghosts of Illyria”. + + Batel offers a plea deal: plead guilty to knowingly submitting false information to Starfleet by failing to disclose her genetic modifications, and Starfleet will dishonorably discharge her without prison time. The charge could carry a two-year minimum imprisonment term, so Batel and Una’s JAG-appointed counsel advise her to take it. Una recognizes that this is designed to sweep this under the carpet, and questions how she can have effective counsel if he works for Starfleet. This is a live issue in military trials even today, and to discuss it properly would take more space than we have here. + + It is Stardate 2393.8. Pike is on a planet in the Vaultera Nebula to persuade Counselor Neera Ketoul, the civil rights lawyer he and Una discussed in the previous episode, to take up her case. The atmosphere is toxic to humans and Pike requires an oxygen mask to get around. The local inhabitants are Illyrian, genetically adapted to survive. + + Ketoul used to be Una’s friend but something came between them. She notes that Starfleet’s race laws are draconian and Una’s lucky not to be charged with sedition. Ketoul has had 10 cases against the Federation thrown out over the last 2 years despite being strong ones. Pike makes reference to the events of “Ghosts of Illyria” and convinces her that taking this case might bring more attention to those she’s lost. + + The case file Pike hands to Ketoul is contained on a translucent orange square, like a cross between the old data cartridges of TOS and the isolinear chips of TNG. + + The last time Neera and Una met was 25 years ago (we find out later that was when she joined Starfleet), which makes it around 2234-2235 (SNW: “Children of the Comet” suggests it was at least 2260 then). It’s been two months since Starfleet found out about Una’s modifications. Until then her record had been spotless. + + Batel refers to the JAG as her boss, although she was also commanding the USS *Cayuga* in SNW: “The Quality of Mercy”, that was helping *Enterprise* upgrade the Neutral Zone outposts. In the present day military, trained JAG officers can technically alternate between legal duties and being line officers in a separate MOS as required, and Batel could be in that position. It is possible she used to be a JAG officer, switched to a starship captaincy and then was reactivated for Una’s case because she was the closest qualified JAG officer. + + The Judge is Admiral Vasak, and Batel is accompanied by a Vulcan Vice-Admiral, Pasalk (the JAG?). Both Admirals are dressed in variations of the blue uniforms last seen in DIS with Admiral rank flashes on their shoulders. + + Because Una has rejected the plea deal, Batel applies to amend the charges against Una to knowingly submitting false information to Starfleet and violating Starfleet Code 614 to 617 by engaging in permanent bioengineering, along with two counts of sedition. She announces she’s seeking a sentence of dishonorable dismissal and 20 years in a Federation penal colony. + + The way Batel phrased it (and taking Ketoul’s earlier remarks about Una being lucky not to be charged with sedition), I surmise Una’s original charges were only the false information and permanent bioengineering charges. As we find out later, they fall under the same regulations, so the false information charge is probably specific to information about bioengineering rather than a general false reporting offence. + + The plea deal was then not a reduction, but merely to plead guilty to one and have the other one either withdrawn or taken into consideration (i.e. not sentenced separately for). Batel’s application is therefore to add the sedition charges not previously put forward and proceed with all charges at trial. + + A global sentence of 20 years seems harsh, and that’s probably because of the sedition charges and also because they’re charging her under military regulations. Over a century later, Richard Bashir would be sentenced to two years in a minimum-security penal colony for genetically modifying his son, which as a civilian he would have been subject to civil laws (DS9: “Doctor Bashir, I Presume?”). + + Robert April sponsored Una’s application to the Academy. She served under him for years (alongside Pike on *Enterprise*) and he promoted her. + + Ketoul is assigned Una’s quarters on the *Enterprise* for the duration. She is escorted there by La’An, who is back in uniform. Ketoul asks for access to the Starfleet Uniform Code of Justice - presumably their equivalent of the US military’s Uniform Code of Military Justice. + + La’An refers to *Starfleet v. Wyck*, which points to the “fruit of the poisonous tree” doctrine - a well-known rule in US law which states that illegally procured evidence, or indeed even evidence indirectly derived from that, is inadmissible in Court. + + Una and Pike met when he gave a speech to her Academy class, talking about a test mission he’d flown. Una pointed out a mistake he had made during re-entry, impressing him with her willingness to tell him he was wrong, qualities important in a first officer. + + Ortegas’ miming of Pasalk and Spock’s conversation mentions *kal-toh*, a Vulcan logic game/puzzle first seen in VOY: “Alter Ego” and most recently in PIC: “No Win Scenario”. M’Benga reads the Vulcan body language and says the two hate each other. He would, of course, be familiar with it since he did a medical internship on Vulcan (TOS: “A Private Little War”). Spock says Pasalk was a former colleague of Sarek’s. + + Uhura quotes Regulation 25, Section B, that all personal logs are to remain sealed unless by order of Starfleet Command. Regulation 25 was quoted in LD: “Second Contact” as prohibiting the transfer of weapons to other races without the permission of the Federation Council, so it seems odd on first blush that personal logs should be lumped under that section as well, but maybe it’s just a result of a century of regulatory amendments. + + The tribunal is called to order with a ship’s bell, as first seen in TOS: “Court Martial”. Behind the panel is the JAG Office seal. Javas is presiding, along with Space Command Representative Zus Tlaggul, a Tellarite, and Starfleet Commander Chiv, a Vulcan. Batel and Una are dressed in division colored versions of the Admiral uniforms, which are redesigns of the dress uniforms seen in TOS, complete with medals being displayed on the left chest. + + Batel refers to the Eugenics Wars (TOS: “Space Seed”) as the impetus for the genetic modification ban, with tens of millions dead. April says Starfleet Regulation 17, Article 12 specifically prohibits genetically modified people from serving in Starfleet. + + April words General Order One as, “No starship may interfere with the normal development of any alien life or society.” This is a summary, as the actual order was seen for the first time in PRO: “First Con-tact”, and the wording taken from David A. Goodman’s book *Federation: the First 150 Years”. + + In 2246 (one year after *Enterprise* was commissioned), April warned the Perricans, a pre-warp civilization about a meteor shower that could have ended their planet. In 2248, he sent his science officer to Na’rel, an industrial age planet to stave off an extinction-level drought by sharing Federation technology. On the hostile planet Man-us II, landing without his security officer, April chose to reveal the *Enterprise* to the pre-warp Ohawk. Apart from the violations, this suggests that GO1 was in force by 2246 at least. + + April promoted Una faster than any other officer on the ship and recommended her for the Medal of Gallantry after the Marcel disaster of 2248. + + La’An graduated top of her class and has been promoted each year of her tenure in Starfleet. That means she’s been in service for about two to three years (assuming ENS, LT j.g. and LT progression). That also allows us to calculate her age to be - at a minimum - 23 to 24 years old (entry at age 16-17, 4 years at the Academy, 3 more years in service). She has also been considered for the Starfleet Medal of Gallantry. + + La’An met Una when she was rescued after escaping a Gorn breeding planet (SNW: “Strange New Worlds”, when Una was an ENS on the USS *Martin Luther King*). She lies when she says she didn’t know Una was Illyrian (“Ghosts of Illyria”). Una sponsored La’An’s application to Starfleet. + + Spock met Una on his first day aboard *Enteprise* (ST: “Q & A”) and mentions her love for Gilbert & Sullivan (which she swore him to secrecy on, damn you Spock!). + + In Una’s quarters we see a picture of her as a child with her parents alongside a picture of Pike in his DIS blues next to Una in her DIS Season 2 uniform (DIS: “An Obol for Charon”). + + La’An believes that someone got a hold of her personal log in “Ghosts of Illyria” and that was how Una was outed. La’An also carries her ancestor’s augmentations (confirming something we’ve long suspected) and fears she could become dangerous. Ketoul assures her genetics is not destiny and given the time - 6 months minimum - it’s needed to subpoena a persona log, it’s unlikely La’An was responsible. + + (Continued in comments)

    What’s Your Favorite Star Trek Practical Effect Species
    From Wikipedia, "A practical effect is a special effect produced physically, without computer-generated imagery or other post-production techniques." + + Some of my favorite practical effect species are from TOS. There's the Horta from TOS "The Devil in the Dark" s1e25. I was about 9 years old when I first saw the Horta episode. Old enough to know it wasn't real, but young enough to be utterly fascinated by something that looked like an angry chunk of hot lava making its way around on the ground. + + TOS's the Salt Vampire's look scared little kid me. Truth be told, the way it looks still gives me the creeps. + + The Gorn (TOS "Arena" s1e18) didn't scare me, but I thought it was intimidating. Sure, it looks like somebody in a lizard man suit. But, that Gorn had a knife! And, was trying to kill Kirk! Bonus points for the Gorn's sparkly, stylish armor. + + I think tribbles are just adorable.

    Watching SNW S2E2, I couldn’t help but contrast it with Discovery … my thoughts.
    *Reposting this from the SNW S2E2 thread as it was removed by a mod for being “off topic”.* + + ::: spoiler SNW S2E2 spoilers and a Discovery critical perspective + So I’m not the biggest fan of Discovery. I would say I’ve found it a disappointment and I’m sure I’m not alone in this. I don’t want to convince anyone here of this or even get into the arguments, in part because there’s still a lot I’ve liked about the show and what they tried and the fact that it ushered in more trek! + + What I did want to talk about, just in case anyone finds it interesting or agrees … is that this last episode of SNW (S2 ep 2, *ad astra per aspera*) feels like a perfect demonstration of what Discovery was missing. + + Sure, using a court trial as a vehicle is a bit tropy, but for a reason, it works. The story and premise of the trial, while not particularly deep or even well rooted in character, worked. It made sense, had human and political plot elements to it and was delivered well most importantly … all of which is what, IMO, Discovery often lacked and instead would often just cross the line into being on the nose. + + I don’t want to be negative against Discovery here. It is what it is and has its fans. I just want to express as someone who didn’t vibe with Discovery that this is what was missing for me, and I’m very pleased to have SNW! + + *Added to original post after removed* + + Watching the episode it felt like writers etc had reflected on Discovery and wanted to do the progressive, ethical stuff differently, and maybe they were trying to do it better too. + + IMO, what the writers managed to pull off was successfully weaving personal stories and inter personal dynamics with the ethical issue, which, in combination with the court room drama structure, allowed the issue to be explored and unravelled organically. From what I’ve gathered from my own reflections and speaking to others about Discovery, part of the difficulties some of us have had with it is its tendency to resort to speeches/monologues to digest dilemmas. For someone like me, it was tonally off putting, because it took away my ability to feel like I was exploring the issue myself either sympathetically with individual characters or logically/philosophically. + + With this episode, part of the reason it works, IMO is that Una’s trial takes us through the issue, not any one perspective, character or speech, demonstrating each character’s personal connections and biases while also allowing the issues to stay in focus. + + Plus, it was cool to see Neera being a badass lawyer! Maybe I just like legal dramas too much!! + + Thoughts? Am I being too harsh on Discovery? + :::

    DS9 S6E11 - Waltz
    I've been watching through ds9 for the first time and the show has always been good, even the first season I thought started fairly strong. And season 6 (as I've said in another post) has started really strong, with an actual deviation from the show's status quo that lasts longer than one episode. + + But holy shit, this might be one of my favorite episodes. Not just of ds9, or star trek in general, but maybe of any show I've seen. And as far as I'm aware people barely talk about it. + + The whole episode is essentially Dukat's facade of composure slowly breaking down. He's always been an interesting villain, and at times his insistence that he's doing his best can almost convince you that he might not entirely be the bad guy; that maybe he is, in some way, trying to do good. + + And a lesser show might try to redeem him. But not ds9. Millions died under his command, people were sent to the deaths, and as long as he tries defend his actions in any way there's no room for redemption. + + This episode finally breaks down all of his fake pretenses of helping Bajor, or trying his best to "rule with a softer hand." Finally his deception is broken down and reveals what he has always been. And it's done in the best way possible. + + The scene where he finally breaks down is fantastic. No music, just Marc Alaimo acting his ass off. The panning back and forth between him talking to sisko and talking to the various voices in his head... + + "And that is why you're not an evil man?"
    fedilink

    The Klingon Augment Virus is the real reason for the ban on genetic engineering (includes spoilers from SNW 2.2)
    It's never made much sense that the entire multi-species Federation would be subject to a strict ban on genetic engineering due to events on Earth that happened centuries before the Federation was even founded. The way they doubled down on that rationale in Una's trial only highlighted the absurdity -- especially when Admiral April claimed he would exclude Una to prevent genocide. + + On the one hand, the writers may be trying to create a straw man out of a weird part of Star Trek lore so they can have a civil rights issue in Starfleet. And that's fine. From an in-universe perspective, though, I think we can discern another reason for the ban on genetic engineering -- the Klingon Augment Virus. + + There was a ban on genetic engineering on United Earth, which is understandable given that it was much closer to the time of the Eugenics Wars. Why would that remain unchanged when more time passed, more species joined, and more humans lived in places without living reminders of the war? [NOTE: I have updated the paragraph up to this point to reflect @Value Subtracted's correction in comments.] The answer is presumably that they needed to reassure the Klingons that something like the Augment Virus would never happen again. Hence they instituted a blanket ban around that time -- perhaps in 2155, the year after the Klingon Augment Virus crisis and also, according to Michael Burnham, the year the Geneva Protocols on Biological Weapons were updated. + + That bought the Federation over a century of peace, but after war broke out due to a paranoid faction of Klingons who thought humans would dilute Klingon purity and after peace was only secured through the most improbable means, they doubled down on the ban. Una's revelation provided a perfect opportunity to signal to the Klingons that they were serious about the ban -- hence why they would add the charges of sedition, perhaps. Ultimately, an infinitely long speech and the prospect of losing one of their best captains combined to make them find a loophole -- but not to invalidate the ban or call it into question. This Klingon context is why April, who we know is caught up in war planning of various kinds, is so passionate that the ban exists "to prevent genocide" -- he's not thinking of people like Una, he's thinking of the near-genocide they suffered at the hands of the Klingons. + + This theory still doesn't paint the Federation in a positive light, since they have effectively invented a false propaganda story to defend a policy that has led to demonstrable harm. But it makes a little more sense, at least to me. What do _you_ think? + +


    If other Star Trek series had their own “Space: The Final Frontier” narration, what could they be?
    The traditional monologue, as used in TOS, TAS, TNG, Strange New Worlds, as well as the endings of Enterprise and several movies, can be taken as a sort of overall mission statement for the Enterprise, possibly even one that takes place in-universe. + + If the other series-- Deep Space Nine, Voyager, Discovery, Picard, Lower Decks, and Prodigy-- had similar mission statements, how might you phrase them?






    Alt text: The NX-01 Enterprise's motion picture library showing movies of various genres. Circled is 'Bride of Chaotica, The' which is a fictional movie from the Captain Proton series introduced in Voyager.
    fedilink


    Narrated by the plain and simple tailor himself.

    PSA: Jeri - Seven of Nine - Ryan is active on the fediverse!
    Just in case you didn't know ... she's on mastodon, has probably been here longer than you, and is active (she once actually replied to me!!!!!). + + https://mastodon.world/@JeriLRyan

    The first nine episodes of Discovery are a model for what streaming era Star Trek should have looked like
    To say Discovery has been "controversial" would be something of an understatement. From the very beginning the show sparked off considerable debate about it's quality, and the bevy of showrunner changes and resulting shifts in tone and plot choices just adds an extra layer of confusion. Many of the same groups and same people continue to have very similar arguments over what is clearly a completely different show in 2023 than it was in 2017. Personally I've become frustrated to the point of disinterest about where this show has gone, which makes it all the more exciting to go back and (re)discover something I thought I knew but had begun to really wonder about: + + The very beginnings of Discovery are fucking excellent television. + + Here's why. + + ## Early Discovery was actually planned out + + To start with, the pacing and plotting of both the individual episodes and the overall arc of the season are excellent. In the moment, they are delightfully seamless: pacing is brisk but not rushed, traversing from one important thing to the next, with emotional moments given an appropriate amount of time to be registered and felt without feeling drawn out. Each episode has a clear beginning, middle, and end, with individual stakes that matter beyond simply advancing the season plot. Of course they consistently advance the overall season plot too (with the exception of *Magic to Make the Sanest Man Go Mad*, which is "merely" a wonderfully executed standalone sci fi story that significantly develops three of our main characters). They do so not by dropping largely inconsequential teases and misdirection in alleged pursuit of a goal fated for resolution only in the finale, but via bite sized, meaningful changes to the circumstances our heroes find themselves in. + + This demonstrates something which is clearly absent from the subsequent seasons, and even tossed away before the end of this one: detailed long term planning. Not only are we spared the bizare shifts in background information (is the Red Angel suit hyper advanced future tech, or something a research team banged out 20 years ago? Is the 32nd century Federation tiny, isolated, and largely ignored, or are they active galactic participants with genuine political clout?), but it's also critical for allowing the episodes to flow neatly together as a coherent story. There's been plenty of debate about if Star Trek should even be trying to tell these long-arc, binge-friendly seasonal stories, but clearly CBS wanted that. So why not do it right? + + ## Early Discovery (mostly) makes sense + + Every Star Trek show has had it's share of silly stuff. Obviously TOS was absolutely loaded with zany things that seem more in keeping with it's cardboard and hot glue aesthetics than the more serious tone subsequent shows attempted to set, but even the best of TNG era Trek had some whoppers mixed in. Where it has succeeded is by keeping most of the wacky missteps in relatively unimportant places, encapsulated by single episodes and devoid of larger consequence. + + Then there's the tech which every Starfleet ship is totally reliant on, most of which has only a fleeting connection to real world physics. The Mycelial Network blends right in: it's a pretty wild idea and most certainly is not real. Just like warp drive. And just like warp drive, it is at least [based on something real](https://www.forbes.com/sites/linhanhcat/2019/03/19/star-trek-discovery-spore-drive/?sh=4aaa6f8d3741). Ehh, close enough. + + I have little desire to relitigate in depth the plausibility of S2/S3 Burnham being intimately connected to so many wildly disparate galaxy changing things, or how reasonable it is to have a emotionally distraught child trigger a galactic cataclysm that nobody could solve for over a century, but I'll certainly contend that early Discovery's [WTF rate](http://3.bp.blogspot.com/-ilMjE1Gh3Yg/VpUAmd-6TWI/AAAAAAAAAbg/-FJ08zxN42s/s320/WFTPM.png) is more in line with TNG era Trek than it's more recent seasons have been. A low bar? Sure. But a relevant one. + + ## Early Discovery did good job developing characters + + By the end of those nine episodes, we've had a reasonable detailed introduction to six main characters, and all of them have at least a little extra dimensionality to them, enough that they feel real and as presented, [I do care what happens to them](https://web.archive.org/web/20191008190554/https://old.reddit.com/r/DaystromInstitute/comments/bftmfq/i_dont_care_what_happens_to_these_people_the/): + + **Burnham** is our focusing lens for the story and certainly gets the most screen time, but she's also far from the most important person on the ship. We know she's a proficient officer, but also that she fucked up royally with massive repercussions in the opening acts of the show. That dichotomy lines up well with her odd mix of behaviors: conflicted about how much she deserves the second chance she was thrust into, yet supremely confident in her own abilities. Highly empathetic towards the Tardigrade, yet unhesitant and unapologetic in manipulating Saru into being a walking danger meter. There's clearly major unresolved trauma there, and I'd like to see this person develop more naturally from here. She should have her redemption, but she'll need to earn it: not through one grand gesture of genocide refusal, but by demonstrating over time that she is dealing with her demons, and really has learned from the disaster at the binaries. + + Speaking of the most important people on the ship, **Stamets** is chief among them. He has neither the desire nor the mentality to be a warrior, and yet he serves an irreplaceable and absolutely critical role in what has clearly become a ship of war. He's a jerk when we first meet him, but his military necessitated chance to get close and personal with his research shows us a softer side, and likely changed him in ways that we're just starting to see develop. **Culber** is still mostly one-note, but as a couple they play very well off each other. + + **Saru** has a decidedly alien mentality for a military officer, but is clearly good at what he does. He is both thoughtful and candid about his past and present conflicts with Burnham, and his stint as acting captain in *Choose Your Pain* showed considerable growth. I want to see more of this guy learning to command (and I will get some, if less than I'd like). + + **Tilly** is an absolute delight. She has her share of minor and harmless tics, babbling when she's nervous and occasionally blurting things out when excited, and she's vulnerable to getting flustered... but can still pull herself together and do what must be done. She shows an impressive level of emotional intelligence in her interactions with Burnham and Stamets, and she also has the awareness and confidence to identify what she wants in life, and fight for it. That's an incredibly endearing combination, and makes her the emotional heart of the show. Give me more, much more, of Burnham mentoring Tilly up to an eventual captaincy. Maybe Tilly could only reasonably work her way to full Lieutenant or Lieutenant Commander over the course of a seven season show, but that would be plenty: I'm not here to see four pips, I'm here to see believable growth in an already sympathetic character. + + **Lorca** and **Tyler** I'll be touching on later. + + (Continued in the [comments](https://startrek.website/comment/149302)...)

    June 2023 Star Trek ebook deals
    For those not already familiar, Simon & Schuster offers a monthly ebook promotion with an array of Treklit across shows and eras. + + It’s a great way to dip your toe into the Litverse at modest cost. + + Recommended price is $ 0.99 in the USA through the major ebook sellers, with similar pricing offers in Canada and some other countries. + + This month’s selection includes the Destiny trilogy from the Relaunch novelverse. It’s David Mack’s excellent alpha to omega story of The Borg, featuring the Enterprise under Picard, the Titan under Riker, and the Aventine under newly promoted Captain Ezri Dax, with a Voyager cameo as well. + + There’s also the Worlds of DS9 series, some favourites from the TOS and TNG eras from Greg Cox, Christopher L Bennett and Michael Jan Friedman as well as tie-in books to the new series. + + This month’s offer is available until July 3rd




    As much as I appreciate all that (vintage) Star Trek was trying to do, handling of a lot of women’s issues were problematic or nonexistent.
    I'm making my way through DS9 for the first time (almost finished with season two). For reference, I've seen TOG, TNG, all the new movies, Discovery, Lower Decks, and SNW. + DS9 season one was a little slow to pick up, but my husband and I are loving season two. As always, Star Trek addresses a lot of hard hitting issues in very nuanced ways. It's also great continuously seeing fantastic representation in even older episodes (mainly women and people of colour in prominent and varied positions). + + But when we get to more women specific issues, things are seriously lacking. Basically, almost any scene with the Ferengis interacting with women are problematic. I understand that they are a highly misogynistic race, but my discomfort and annoyance is with how others react to them. The Grand Nagus sexually assaulting women, especially Kira, is never actually fully addressed. It's just seen as an annoyance to be put up with. Quark is often seen as aggressively pursuing women, often with unwanted touches and other advances. Even if it's at first the women in question is disinterested, they relent in some way, either completely or they tell him something flattering/of chances in the future. Thankfully, so far, none of the main women characters have given into his advances, but it's common for one off and guest characters to somehow be "charmed" by him, even those from more egalitarian civilizations (eg: Vulcan). + + I know DS9 was made prior to #MeToo, but it's still worth pointing out where Star Trek can and have improved (thankfully newer Trek has less of these issues), and when some of the portrayals in the past are rather inconsistent with the philosophies of the Federation. Of course, the Federation operates with a level of cultural relativism, but it still doesn't make sense to me how the Grand Nagus (and Ferengis in general) were allowed to assault and harass those outside of their culture/species on an interstellar, multi-cutural, Federation run space station. Star Trek in the 90s was trying to a lot of good, but watching it now, there's definitely quite a few aspects that are unsatisfying. +
    fedilink

    Is the new episode of [@startrek](https://startrek.website/c/startrek) [#StrangeNewWorlds](https://mastodon.cloud/tags/StrangeNewWorlds) already streaming in México? + + Or should I go to the 🏴‍☠️🌊🏖️???
    fedilink


    Instance Meta - Is this instance getting flooded with spam bot accounts?
    So, lemmy seems to be flooded with spam bot accounts at the moment. Look through the table of servers on fedidb (https://fedidb.org/software/lemmy) and notice how there are these huge instances without any active users (MAU). + + Also notice how `startrek.website` has 9000 users for 276 active users this month. + + From memory, when I signed up, there was no email requirement or captcha or anything. + + Admins ... maybe you want to tighten things up?




    + + + diff --git a/code/processes/crawling-process/src/test/resources/mock-crawl-data/mediawiki/index.html b/code/processes/crawling-process/src/test/resources/mock-crawl-data/mediawiki/index.html new file mode 100644 index 00000000..d7eda561 --- /dev/null +++ b/code/processes/crawling-process/src/test/resources/mock-crawl-data/mediawiki/index.html @@ -0,0 +1,854 @@ + + + + + Wikipedia, the free encyclopedia + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Jump to content +
    +
    +
    + + + + +
    +
    + + + + + +
    +
    +
    +
    +
    +
    +
    + +
    +
    +
    +
    +
    +
    +
    +
    +

    Main Page

    + + +
    +
    +
    +
    +
    +
    + +
    +
    + + + +
    +
    +
    +
    + +
    +
    +
    + +
    From Wikipedia, the free encyclopedia
    +
    +
    + + +
    +
    +
    +

    Welcome to Wikipedia

    ,
    + +
    6,673,736 articles in English
    +
    +
    +
    +
    +

    From today's featured article

    +
    +
    + Goodwin Fire burning in the Bradshaw Mountains
    Goodwin Fire burning in the Bradshaw Mountains
    +
    +

    The Goodwin Fire was a wildfire that burned 28,516 acres (115.40 km2) in the U.S. state of Arizona from June 24 to July 10, 2017. The fire destroyed 17 homes and damaged another 19 structures, but no one was killed. The fire was first detected on June 24 by a two-man fire patrol that spotted smoke in the Bradshaw Mountains near Prescott, Arizona. Fed by high winds and undisturbed growths of chaparral, and with fire crews impaired by difficult terrain, the fire grew from 150 acres (61 ha) on June 24 to 25,000 acres (10,000 ha) on June 29. Several communities in Yavapai County were evacuated, and Arizona State Route 69 was closed. Firefighting aircraft were grounded twice by civilian drones operating illegally in the burn area, but firefighters made rapid progress containing the fire's spread after June 28, and it was fully contained on July 10. Investigators did not determine any particular cause for the fire. (Full article...) +

    +
    + Recently featured:
    +
    +

    Did you know ...

    +
    +
    +
    + William Penn Memorial Fire Tower
    William Penn Memorial Fire Tower
    +
    + + +
    +
    +
    +

    In the news

    +
    +
    + Yevgeny Prigozhin in 2010
    Yevgeny Prigozhin
    +
    + + +
    +

    On this day

    +
    +

    June 24 +

    +
    +
    + Jiang Zemin
    Jiang Zemin
    +
    + +
    +
    + More anniversaries:
    +
    +
    +
    +
    +

    Today's featured picture

    +
    + + + +
    Södermanland Runic Inscription 113 + +

    Södermanland Runic Inscription 113 is a 10th-century runic inscription engraved on a 0.9 m by 0.5 m (35 in by 20 in) granite runestone in Södermanland, Sweden. The runestone was found in 1856 on a hill believed to have once had many other graves and monuments, and is thought to have originally stood atop a burial mound before falling over and being buried. The inscription is written in Old Norse using the Younger Futhark alphabet, and reads (transliterated into the Latin script): Þaiʀ situ stin, suniʀ Þurkitils auk Fulku, hiar faþur auk muþur iftiʀ. Kiarþu trikila, meaning: 'They placed the stone here, the sons of Þorketill and Folka, in memory of their father and mother. Made valiantly.' This photograph of the inscription was taken by Otto von Friesen, who owned the runestone in the early 20th century. +

    +

    Photograph credit: Otto von Friesen, restored by Adam Cuerden

    + + +
    +
    +
    +

    Other areas of Wikipedia

    +
    +
    • Community portal – The central hub for editors, with resources, links, tasks, and announcements.
    • +
    • Village pump – Forum for discussions about Wikipedia itself, including policies and technical issues.
    • +
    • Site news – Sources of news about Wikipedia and the broader Wikimedia movement.
    • +
    • Teahouse – Ask basic questions about using or editing Wikipedia.
    • +
    • Help desk – Ask questions about using or editing Wikipedia.
    • +
    • Reference desk – Ask research questions about encyclopedic topics.
    • +
    • Content portals – A unique way to navigate the encyclopedia.
    +
    +

    Wikipedia's sister projects

    +
    +

    Wikipedia is written by volunteer editors and hosted by the Wikimedia Foundation, a non-profit organization that also hosts a range of other volunteer projects: +

    +
    + +
    +

    Wikipedia languages

    +
    +
    +
    + + + + +
    +
    + + + +
    +
    + +
    + +
    +
    +
    + + + +
    + + + + \ No newline at end of file diff --git a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/DebugConverterExperiment.java b/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/DebugConverterExperiment.java index 04118c0f..fb8c536d 100644 --- a/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/DebugConverterExperiment.java +++ b/code/tools/experiment-runner/src/main/java/nu/marginalia/tools/experiments/DebugConverterExperiment.java @@ -1,10 +1,17 @@ package nu.marginalia.tools.experiments; import com.google.inject.Inject; +import nu.marginalia.converting.model.GeneratorType; import nu.marginalia.converting.model.ProcessedDocument; import nu.marginalia.converting.processor.DomainProcessor; +import nu.marginalia.converting.processor.logic.DocumentGeneratorExtractor; import nu.marginalia.crawling.model.CrawledDomain; import nu.marginalia.tools.Experiment; +import org.apache.commons.lang3.StringUtils; +import org.jsoup.Jsoup; + +import java.util.HashSet; +import java.util.Set; public class DebugConverterExperiment extends Experiment { @@ -17,15 +24,55 @@ public class DebugConverterExperiment extends Experiment { } + Set seenGenerators = new HashSet<>(); + @Override public boolean process(CrawledDomain domain) { - var ret = domainProcessor.process(domain); - ret.documents.stream() - .filter(ProcessedDocument::isProcessedFully) - .peek(d -> System.out.println(d.url)) - .map(d -> d.details.metadata) - .forEach(System.out::println); + if (domain.doc == null) return true; + + var dge = new DocumentGeneratorExtractor(); + + for (var doc : domain.doc) { + if (doc.documentBody == null) continue; + + var parsed = Jsoup.parse(doc.documentBody.decode()); + parsed.getElementsByTag("head").comments() + .stream().filter(c -> { + String data = c.getData(); + if (data.contains("" + generators.type()); + if (generators.type() == GeneratorType.UNKNOWN) { + System.out.println(parsed.select("meta[name=generator]") + .attr("content")); + System.out.println(doc.url); + } + } + } + + } + +// +// var ret = domainProcessor.process(domain); +// +// +// ret.documents.stream() +// .filter(ProcessedDocument::isProcessedFully) +// .peek(d -> System.out.println(d.url)) +// .map(d -> d.details.metadata) +// .forEach(System.out::println); return true; }