mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
Better title extraction for plain text plugin.
This commit is contained in:
parent
6fb0f77eea
commit
d4010c76cf
@ -28,7 +28,7 @@ public class PlainTextLogic {
|
|||||||
List<String> candidates = new ArrayList<>(firstFewLines);
|
List<String> candidates = new ArrayList<>(firstFewLines);
|
||||||
|
|
||||||
// Remove mailing list header type stuff
|
// Remove mailing list header type stuff
|
||||||
candidates.removeIf(line -> line.contains(":"));
|
candidates.removeIf(line -> line.length() < 32 && line.contains(":"));
|
||||||
|
|
||||||
for (int line = 1; line < candidates.size(); line++) {
|
for (int line = 1; line < candidates.size(); line++) {
|
||||||
String maybeUnderline = candidates.get(line);
|
String maybeUnderline = candidates.get(line);
|
||||||
@ -46,6 +46,10 @@ public class PlainTextLogic {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (candidates.size() >= 2 && candidates.get(1).isBlank() && candidates.get(0).trim().length() > 16) {
|
||||||
|
return candidates.get(0).trim();
|
||||||
|
}
|
||||||
|
|
||||||
return url.path.substring(url.path.lastIndexOf('/'));
|
return url.path.substring(url.path.lastIndexOf('/'));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -246,8 +246,37 @@ class PlainTextLogicTest {
|
|||||||
constitutes an illegal circumvention of copyright management.
|
constitutes an illegal circumvention of copyright management.
|
||||||
|
|
||||||
""";
|
""";
|
||||||
|
|
||||||
|
private final String PXE = """
|
||||||
|
|
||||||
|
PXE: Installing Slackware over the network
|
||||||
|
==========================================
|
||||||
|
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
------------
|
||||||
|
|
||||||
|
When the time comes to install Slackware on your computer, you have a\s
|
||||||
|
limited number of options regarding the location of your Slackware\s
|
||||||
|
packages. Either you install them from the (un)official Slackware CDROM or\s
|
||||||
|
DVD, or you copy them to a pre-existing hard disk partition before starting\s
|
||||||
|
the installation procedure, or you fetch the packages from a network server
|
||||||
|
(using either NFS, HTTP or FTP protocol).
|
||||||
|
|
||||||
|
""";
|
||||||
|
|
||||||
|
private final String slackware = """
|
||||||
|
Announcing Slackware Linux 7.1!
|
||||||
|
|
||||||
|
The first major release for 2000, Slackware Linux 7.1 builds on the
|
||||||
|
success of Slackware 7.0. In addition to program updates and distribution
|
||||||
|
enhancements, you'll find the Konfucius (1.90) and the Kleopatra (1.91)
|
||||||
|
developmental releases of the K Desktop Environment, XFree86 4.0,
|
||||||
|
OpenMotif 2.1.30, and TrollTech's Qt 2.1.1 library available as system
|
||||||
|
""";
|
||||||
@Test
|
@Test
|
||||||
void getDescription() {
|
void getDescription() {
|
||||||
|
System.out.println(ptl.getDescription(LineUtils.firstNLines(PXE, 25)));
|
||||||
System.out.println(ptl.getDescription(LineUtils.firstNLines(uml, 25)));
|
System.out.println(ptl.getDescription(LineUtils.firstNLines(uml, 25)));
|
||||||
System.out.println(ptl.getDescription(LineUtils.firstNLines(cmucl, 25)));
|
System.out.println(ptl.getDescription(LineUtils.firstNLines(cmucl, 25)));
|
||||||
System.out.println(ptl.getDescription(LineUtils.firstNLines(xprint, 25)));
|
System.out.println(ptl.getDescription(LineUtils.firstNLines(xprint, 25)));
|
||||||
@ -257,6 +286,8 @@ class PlainTextLogicTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
void getTitle() throws URISyntaxException {
|
void getTitle() throws URISyntaxException {
|
||||||
|
System.out.println(ptl.getTitle(new EdgeUrl("http://mirror.cs.princeton.edu/pub/mirrors/slackware/slackware-7.1/ANNOUNCE.TXT"), LineUtils.firstNLines(slackware, 25)));
|
||||||
|
System.out.println(ptl.getTitle(new EdgeUrl("https://slackjeff.com.br/slackware/slackware-14.2/usb-and-pxe-installers/README_PXE.TXT"), LineUtils.firstNLines(PXE, 25)));
|
||||||
System.out.println(ptl.getTitle(new EdgeUrl("http://user-mode-linux.sourceforge.net/old/UserModeLinux-HOWTO.txt"), LineUtils.firstNLines(uml, 25)));
|
System.out.println(ptl.getTitle(new EdgeUrl("http://user-mode-linux.sourceforge.net/old/UserModeLinux-HOWTO.txt"), LineUtils.firstNLines(uml, 25)));
|
||||||
System.out.println(ptl.getTitle(new EdgeUrl("https://www.cons.org/cmucl/news/release-20a.txt"), LineUtils.firstNLines(cmucl, 25)));
|
System.out.println(ptl.getTitle(new EdgeUrl("https://www.cons.org/cmucl/news/release-20a.txt"), LineUtils.firstNLines(cmucl, 25)));
|
||||||
System.out.println(ptl.getTitle(new EdgeUrl("https://www.x.org/docs/XPRINT/Xprint_old_FAQ.txt"), LineUtils.firstNLines(xprint, 25)));
|
System.out.println(ptl.getTitle(new EdgeUrl("https://www.x.org/docs/XPRINT/Xprint_old_FAQ.txt"), LineUtils.firstNLines(xprint, 25)));
|
||||||
|
Loading…
Reference in New Issue
Block a user