2023-07-17 11:57:32 +00:00
|
|
|
package nu.marginalia.mqapi.crawling;
|
2023-07-14 15:08:10 +00:00
|
|
|
|
2023-07-17 11:57:32 +00:00
|
|
|
import lombok.AllArgsConstructor;
|
2023-10-14 10:07:40 +00:00
|
|
|
import nu.marginalia.storage.model.FileStorageId;
|
|
|
|
|
|
|
|
import java.util.List;
|
2023-07-14 15:08:10 +00:00
|
|
|
|
|
|
|
/** A request to start a crawl */
|
2023-07-17 11:57:32 +00:00
|
|
|
@AllArgsConstructor
|
2023-07-14 15:08:10 +00:00
|
|
|
public class CrawlRequest {
|
2023-10-19 11:22:52 +00:00
|
|
|
/** (optional) Crawl spec(s) for sourcing domains to crawl. If not set,
|
|
|
|
* the EC_DOMAIN table will be consulted and domains with the corresponding
|
|
|
|
* node affinity will be used.
|
|
|
|
*/
|
2023-10-14 10:07:40 +00:00
|
|
|
public List<FileStorageId> specStorage;
|
2023-10-19 11:22:52 +00:00
|
|
|
|
2024-07-05 13:31:47 +00:00
|
|
|
/** (optional) Name of a single domain to be re-crawled */
|
|
|
|
public String targetDomainName;
|
|
|
|
|
2023-10-19 11:22:52 +00:00
|
|
|
/** File storage where the crawl data will be written. If it contains existing crawl data,
|
|
|
|
* this crawl data will be referenced for e-tags and last-mofified checks.
|
|
|
|
*/
|
2023-07-20 19:05:16 +00:00
|
|
|
public FileStorageId crawlStorage;
|
2024-07-05 13:31:47 +00:00
|
|
|
|
|
|
|
public static CrawlRequest forSpec(FileStorageId specStorage, FileStorageId crawlStorage) {
|
|
|
|
return new CrawlRequest(List.of(specStorage), null, crawlStorage);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static CrawlRequest forSingleDomain(String targetDomainName, FileStorageId crawlStorage) {
|
|
|
|
return new CrawlRequest(null, targetDomainName, crawlStorage);
|
|
|
|
}
|
|
|
|
|
|
|
|
public static CrawlRequest forRecrawl(FileStorageId crawlStorage) {
|
|
|
|
return new CrawlRequest(null, null, crawlStorage);
|
|
|
|
}
|
|
|
|
|
2023-07-14 15:08:10 +00:00
|
|
|
}
|