MarginaliaSearch/code/common/config/java/nu/marginalia/WmsaHome.java

115 lines
4.1 KiB
Java
Raw Normal View History

2023-03-04 12:19:01 +00:00
package nu.marginalia;
2022-05-19 15:45:26 +00:00
2023-03-04 12:19:01 +00:00
2022-05-19 15:45:26 +00:00
import java.nio.file.Files;
import java.nio.file.Path;
2024-02-22 16:31:25 +00:00
import java.nio.file.Paths;
import java.util.Objects;
2024-02-22 16:31:25 +00:00
import java.util.Optional;
import java.util.stream.Stream;
2022-05-19 15:45:26 +00:00
public class WmsaHome {
public static UserAgent getUserAgent() {
return new UserAgent(
System.getProperty("crawler.userAgentString", "Mozilla/5.0 (compatible; Marginalia-like bot; +https://git.marginalia.nu/))"),
System.getProperty("crawler.userAgentIdentifier", "search.marginalia.nu")
);
2022-06-01 12:46:51 +00:00
}
public static Path getUploadDir() {
return Path.of(
System.getProperty("executor.uploadDir", "/uploads")
);
}
public static Path getHomePath() {
String[] possibleLocations = new String[] {
System.getenv("WMSA_HOME"),
System.getProperty("system.homePath"),
"/var/lib/wmsa",
"/wmsa"
};
2024-02-22 16:31:25 +00:00
Optional<String> retStr = Stream.of(possibleLocations)
.filter(Objects::nonNull)
.map(Path::of)
.filter(Files::isDirectory)
.map(Path::toString)
2024-02-22 16:31:25 +00:00
.findFirst();
if (retStr.isEmpty()) {
// Check parent directories for a fingerprint of the project's installation boilerplate
var prodRoot = Stream.iterate(Paths.get("").toAbsolutePath(), f -> f != null && Files.exists(f), Path::getParent)
.filter(p -> Files.exists(p.resolve("conf/properties/system.properties")))
.filter(p -> Files.exists(p.resolve("model/tfreq-new-algo3.bin")))
.findAny();
if (prodRoot.isPresent()) {
return prodRoot.get();
}
// Check if we are running in a test environment by looking for fingerprints
// matching the base of the source tree for the project, then looking up the
// run directory which contains a template for the installation we can use as
// though it's the project root for testing purposes
2024-02-22 16:31:25 +00:00
var testRoot = Stream.iterate(Paths.get("").toAbsolutePath(), f -> f != null && Files.exists(f), Path::getParent)
.filter(p -> Files.exists(p.resolve("run/env")))
.filter(p -> Files.exists(p.resolve("run/setup.sh")))
.map(p -> p.resolve("run"))
.findAny();
return testRoot.orElseThrow(() -> new IllegalStateException("""
Could not find $WMSA_HOME, either set environment
variable, the 'system.homePath' java property,
or ensure either /wmsa or /var/lib/wmsa exists
"""));
2024-02-22 16:31:25 +00:00
}
2024-02-22 16:31:25 +00:00
var ret = Path.of(retStr.get());
if (!Files.isDirectory(ret.resolve("model"))) {
throw new IllegalStateException("You need to run 'run/setup.sh' to download models to run/ before this will work!");
2022-05-19 15:45:26 +00:00
}
2022-05-19 15:45:26 +00:00
return ret;
}
public static Path getAdsDefinition() {
return getHomePath().resolve("data").resolve("adblock.txt");
}
public static Path getIPLocationDatabse() {
return getHomePath().resolve("data").resolve("IP2LOCATION-LITE-DB1.CSV");
}
public static Path getAsnMappingDatabase() {
return getHomePath().resolve("data").resolve("asn-data-raw-table");
}
public static Path getAsnInfoDatabase() {
return getHomePath().resolve("data").resolve("asn-used-autnums");
}
public static LanguageModels getLanguageModels() {
final Path home = getHomePath();
return new LanguageModels(
home.resolve("model/tfreq-new-algo3.bin"),
home.resolve("model/opennlp-sentence.bin"),
home.resolve("model/English.RDR"),
home.resolve("model/English.DICT"),
home.resolve("model/opennlp-tok.bin"),
home.resolve("model/lid.176.ftz"),
home.resolve("model/segments.bin")
);
}
public static Path getAtagsPath() {
return getHomePath().resolve("data/atags.parquet");
}
2022-05-19 15:45:26 +00:00
}