mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-25 05:38:59 +00:00

Some refactoring is still needed, but an dummy actor is in place and a process that crawls URLs from the livecapture service's RSS endpoints; that makes it all the way to being indexable.
33 lines
919 B
Java
33 lines
919 B
Java
package nu.marginalia.livecrawler;
|
|
|
|
import nu.marginalia.model.EdgeUrl;
|
|
import org.junit.jupiter.api.Assertions;
|
|
import org.junit.jupiter.api.Test;
|
|
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
|
|
public class LiveCrawlDataSetTest {
|
|
|
|
@Test
|
|
public void testGetDataSet() throws Exception {
|
|
Path tempFile = Files.createTempFile("test", ".db");
|
|
try {
|
|
LiveCrawlDataSet dataSet = new LiveCrawlDataSet(tempFile.toString());
|
|
|
|
Assertions.assertFalse(dataSet.hasUrl("https://www.example.com/"));
|
|
dataSet.saveDocument(
|
|
1,
|
|
new EdgeUrl("https://www.example.com/"),
|
|
"test",
|
|
"test",
|
|
"test"
|
|
);
|
|
Assertions.assertTrue(dataSet.hasUrl("https://www.example.com/"));
|
|
}
|
|
finally {
|
|
Files.delete(tempFile);
|
|
}
|
|
}
|
|
|
|
} |