Add synthetic meta flag for root path documents

If the document's URL path is "/", a "special:root" meta flag is now added with the "Synthetic" bit set. This will help searching only for the root document of a website, neat stuff ahead :D
This commit is contained in:
Viktor Lofgren 2024-12-11 16:10:44 +01:00
parent 5002870d1f
commit a97c05107e

View File

@ -15,6 +15,7 @@ import nu.marginalia.model.crawl.HtmlFeature;
import nu.marginalia.model.crawl.UrlIndexingState;
import nu.marginalia.model.crawldata.CrawledDocument;
import nu.marginalia.model.crawldata.CrawlerDocumentStatus;
import nu.marginalia.model.idx.WordFlags;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -118,6 +119,10 @@ public class DocumentProcessor {
ret.details = detailsWithWords.details();
ret.words = detailsWithWords.words();
if (url.path.equals("/")) {
ret.words.addMeta("special:root", WordFlags.Synthetic.asBit());
}
documentDecorator.apply(ret);
if (Boolean.TRUE.equals(crawledDocument.hasCookies)