MarginaliaSearch/code/process-mqapi/java/nu/marginalia/mqapi/crawling/CrawlRequest.java

38 lines
1.3 KiB
Java
Raw Normal View History

2023-07-17 11:57:32 +00:00
package nu.marginalia.mqapi.crawling;
2023-07-14 15:08:10 +00:00
2023-07-17 11:57:32 +00:00
import lombok.AllArgsConstructor;
import nu.marginalia.storage.model.FileStorageId;
import java.util.List;
2023-07-14 15:08:10 +00:00
/** A request to start a crawl */
2023-07-17 11:57:32 +00:00
@AllArgsConstructor
2023-07-14 15:08:10 +00:00
public class CrawlRequest {
/** (optional) Crawl spec(s) for sourcing domains to crawl. If not set,
* the EC_DOMAIN table will be consulted and domains with the corresponding
* node affinity will be used.
*/
public List<FileStorageId> specStorage;
/** (optional) Name of a single domain to be re-crawled */
public String targetDomainName;
/** File storage where the crawl data will be written. If it contains existing crawl data,
* this crawl data will be referenced for e-tags and last-mofified checks.
*/
2023-07-20 19:05:16 +00:00
public FileStorageId crawlStorage;
public static CrawlRequest forSpec(FileStorageId specStorage, FileStorageId crawlStorage) {
return new CrawlRequest(List.of(specStorage), null, crawlStorage);
}
public static CrawlRequest forSingleDomain(String targetDomainName, FileStorageId crawlStorage) {
return new CrawlRequest(null, targetDomainName, crawlStorage);
}
public static CrawlRequest forRecrawl(FileStorageId crawlStorage) {
return new CrawlRequest(null, null, crawlStorage);
}
2023-07-14 15:08:10 +00:00
}