package nu.marginalia.crawling; import nu.marginalia.link_parser.LinkParser; import nu.marginalia.model.EdgeUrl; import org.jsoup.Jsoup; import org.junit.jupiter.api.Test; import java.net.URISyntaxException; import static org.junit.jupiter.api.Assertions.*; class LinkParserTest { private String parseLink(String href, String relBase) throws URISyntaxException { var url = new EdgeUrl("http://www.marginalia.nu/" + relBase); var parser = new LinkParser(); var stuff = Jsoup.parseBodyFragment("test"); var lnk = parser.parseLink( url, stuff.getElementsByTag("a").get(0)); if (lnk.isEmpty()) { return null; } return lnk.get().toString(); } @Test void testRenormalization() throws URISyntaxException { assertEquals("http://www.marginalia.nu/test", parseLink("http://www.marginalia.nu/../test", "/")); } @Test void testRenormalization2() { assertTrue("http:".matches("^[a-zA-Z]+:")); assertFalse("/foo".matches("^[a-zA-Z]+:")); } @Test void testAnchor() throws URISyntaxException { assertNull(parseLink("#test", "/")); } @Test void testRelative() throws URISyntaxException { assertEquals("http://search.marginalia.nu/", parseLink("//search.marginalia.nu", "/")); assertEquals("http://www.marginalia.nu/test", parseLink("../test", "/")); assertEquals("http://www.marginalia.nu/test", parseLink("test", "/")); assertEquals("http://www.marginalia.nu/foo/test", parseLink("test", "/foo/index.html")); assertEquals("http://www.marginalia.nu/test", parseLink("../test", "/foo/index.html")); assertEquals("http://www.marginalia.nu/test", parseLink("/test", "/foo/index.html")); } private EdgeUrl getBaseUrl(String href, EdgeUrl documentUrl) { LinkParser lp = new LinkParser(); return lp.getBaseLink(Jsoup.parse(""), documentUrl); } @Test public void getBaseUrlTest() throws URISyntaxException { assertEquals(new EdgeUrl("https://www.marginalia.nu/base"), getBaseUrl("/base", new EdgeUrl("https://www.marginalia.nu/test/foo.bar"))); assertEquals(new EdgeUrl("https://memex.marginalia.nu/base"), getBaseUrl("https://memex.marginalia.nu/base", new EdgeUrl("https://www.marginalia.nu/test/foo.bar"))); assertEquals(new EdgeUrl("https://www.marginalia.nu/test/base"), getBaseUrl("base", new EdgeUrl("https://www.marginalia.nu/test/foo.bar"))); } @Test public void testParseBadBaseLink() throws URISyntaxException { LinkParser lp = new LinkParser(); var url = new EdgeUrl("https://memex.marginalia.nu/"); assertEquals(url, lp.getBaseLink(Jsoup.parse(""), url)); assertEquals(url, lp.getBaseLink(Jsoup.parse(""), url)); assertEquals(url, lp.getBaseLink(Jsoup.parse(""), url)); } }