Better title extraction for plain text plugin.

This commit is contained in:
Viktor Lofgren 2023-03-07 21:53:44 +01:00
parent 6fb0f77eea
commit d4010c76cf
2 changed files with 36 additions and 1 deletions

View File

@ -28,7 +28,7 @@ public class PlainTextLogic {
List<String> candidates = new ArrayList<>(firstFewLines);
// Remove mailing list header type stuff
candidates.removeIf(line -> line.contains(":"));
candidates.removeIf(line -> line.length() < 32 && line.contains(":"));
for (int line = 1; line < candidates.size(); line++) {
String maybeUnderline = candidates.get(line);
@ -46,6 +46,10 @@ public class PlainTextLogic {
}
}
if (candidates.size() >= 2 && candidates.get(1).isBlank() && candidates.get(0).trim().length() > 16) {
return candidates.get(0).trim();
}
return url.path.substring(url.path.lastIndexOf('/'));
}

View File

@ -246,8 +246,37 @@ class PlainTextLogicTest {
constitutes an illegal circumvention of copyright management.
""";
private final String PXE = """
PXE: Installing Slackware over the network
==========================================
Introduction
------------
When the time comes to install Slackware on your computer, you have a\s
limited number of options regarding the location of your Slackware\s
packages. Either you install them from the (un)official Slackware CDROM or\s
DVD, or you copy them to a pre-existing hard disk partition before starting\s
the installation procedure, or you fetch the packages from a network server
(using either NFS, HTTP or FTP protocol).
""";
private final String slackware = """
Announcing Slackware Linux 7.1!
The first major release for 2000, Slackware Linux 7.1 builds on the
success of Slackware 7.0. In addition to program updates and distribution
enhancements, you'll find the Konfucius (1.90) and the Kleopatra (1.91)
developmental releases of the K Desktop Environment, XFree86 4.0,
OpenMotif 2.1.30, and TrollTech's Qt 2.1.1 library available as system
""";
@Test
void getDescription() {
System.out.println(ptl.getDescription(LineUtils.firstNLines(PXE, 25)));
System.out.println(ptl.getDescription(LineUtils.firstNLines(uml, 25)));
System.out.println(ptl.getDescription(LineUtils.firstNLines(cmucl, 25)));
System.out.println(ptl.getDescription(LineUtils.firstNLines(xprint, 25)));
@ -257,6 +286,8 @@ class PlainTextLogicTest {
@Test
void getTitle() throws URISyntaxException {
System.out.println(ptl.getTitle(new EdgeUrl("http://mirror.cs.princeton.edu/pub/mirrors/slackware/slackware-7.1/ANNOUNCE.TXT"), LineUtils.firstNLines(slackware, 25)));
System.out.println(ptl.getTitle(new EdgeUrl("https://slackjeff.com.br/slackware/slackware-14.2/usb-and-pxe-installers/README_PXE.TXT"), LineUtils.firstNLines(PXE, 25)));
System.out.println(ptl.getTitle(new EdgeUrl("http://user-mode-linux.sourceforge.net/old/UserModeLinux-HOWTO.txt"), LineUtils.firstNLines(uml, 25)));
System.out.println(ptl.getTitle(new EdgeUrl("https://www.cons.org/cmucl/news/release-20a.txt"), LineUtils.firstNLines(cmucl, 25)));
System.out.println(ptl.getTitle(new EdgeUrl("https://www.x.org/docs/XPRINT/Xprint_old_FAQ.txt"), LineUtils.firstNLines(xprint, 25)));