2023-03-04 12:19:01 +00:00
|
|
|
package nu.marginalia;
|
2022-05-19 15:45:26 +00:00
|
|
|
|
2023-03-04 12:19:01 +00:00
|
|
|
|
2022-05-19 15:45:26 +00:00
|
|
|
import java.nio.file.Files;
|
|
|
|
import java.nio.file.Path;
|
2024-02-22 16:31:25 +00:00
|
|
|
import java.nio.file.Paths;
|
2024-02-22 13:01:23 +00:00
|
|
|
import java.util.Objects;
|
2024-02-22 16:31:25 +00:00
|
|
|
import java.util.Optional;
|
2023-03-05 13:12:13 +00:00
|
|
|
import java.util.stream.Stream;
|
2022-05-19 15:45:26 +00:00
|
|
|
|
|
|
|
public class WmsaHome {
|
2024-01-13 16:12:18 +00:00
|
|
|
public static UserAgent getUserAgent() {
|
|
|
|
return new UserAgent(
|
|
|
|
System.getProperty("crawler.userAgentString", "Mozilla/5.0 (compatible; Marginalia-like bot; +https://git.marginalia.nu/))"),
|
|
|
|
System.getProperty("crawler.userAgentIdentifier", "search.marginalia.nu")
|
|
|
|
);
|
2022-06-01 12:46:51 +00:00
|
|
|
}
|
|
|
|
|
2023-03-05 12:47:40 +00:00
|
|
|
|
2024-01-12 11:33:05 +00:00
|
|
|
public static Path getUploadDir() {
|
2024-01-13 16:12:18 +00:00
|
|
|
return Path.of(
|
|
|
|
System.getProperty("executor.uploadDir", "/uploads")
|
|
|
|
);
|
2024-01-12 11:33:05 +00:00
|
|
|
}
|
|
|
|
|
2022-05-25 16:02:19 +00:00
|
|
|
public static Path getHomePath() {
|
2024-02-22 13:01:23 +00:00
|
|
|
String[] possibleLocations = new String[] {
|
|
|
|
System.getenv("WMSA_HOME"),
|
|
|
|
System.getProperty("system.homePath"),
|
|
|
|
"/var/lib/wmsa",
|
|
|
|
"/wmsa"
|
|
|
|
};
|
|
|
|
|
2024-02-22 16:31:25 +00:00
|
|
|
Optional<String> retStr = Stream.of(possibleLocations)
|
2024-02-22 13:01:23 +00:00
|
|
|
.filter(Objects::nonNull)
|
|
|
|
.map(Path::of)
|
|
|
|
.filter(Files::isDirectory)
|
|
|
|
.map(Path::toString)
|
2024-02-22 16:31:25 +00:00
|
|
|
.findFirst();
|
|
|
|
|
|
|
|
if (retStr.isEmpty()) {
|
2024-04-16 10:37:18 +00:00
|
|
|
// Check parent directories for a fingerprint of the project's installation boilerplate
|
|
|
|
var prodRoot = Stream.iterate(Paths.get("").toAbsolutePath(), f -> f != null && Files.exists(f), Path::getParent)
|
|
|
|
.filter(p -> Files.exists(p.resolve("conf/properties/system.properties")))
|
|
|
|
.filter(p -> Files.exists(p.resolve("model/tfreq-new-algo3.bin")))
|
|
|
|
.findAny();
|
|
|
|
if (prodRoot.isPresent()) {
|
|
|
|
return prodRoot.get();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if we are running in a test environment by looking for fingerprints
|
|
|
|
// matching the base of the source tree for the project, then looking up the
|
|
|
|
// run directory which contains a template for the installation we can use as
|
|
|
|
// though it's the project root for testing purposes
|
2024-02-22 16:31:25 +00:00
|
|
|
|
|
|
|
var testRoot = Stream.iterate(Paths.get("").toAbsolutePath(), f -> f != null && Files.exists(f), Path::getParent)
|
|
|
|
.filter(p -> Files.exists(p.resolve("run/env")))
|
|
|
|
.filter(p -> Files.exists(p.resolve("run/setup.sh")))
|
|
|
|
.map(p -> p.resolve("run"))
|
|
|
|
.findAny();
|
|
|
|
|
2024-03-03 09:49:23 +00:00
|
|
|
return testRoot.orElseThrow(() -> new IllegalStateException("""
|
2024-02-22 13:01:23 +00:00
|
|
|
Could not find $WMSA_HOME, either set environment
|
2024-04-16 10:37:18 +00:00
|
|
|
variable, the 'system.homePath' java property,
|
|
|
|
or ensure either /wmsa or /var/lib/wmsa exists
|
2024-02-22 13:01:23 +00:00
|
|
|
"""));
|
2024-02-22 16:31:25 +00:00
|
|
|
}
|
2022-06-03 11:32:05 +00:00
|
|
|
|
2024-02-22 16:31:25 +00:00
|
|
|
var ret = Path.of(retStr.get());
|
2023-03-05 13:12:13 +00:00
|
|
|
|
|
|
|
if (!Files.isDirectory(ret.resolve("model"))) {
|
2024-03-03 09:49:23 +00:00
|
|
|
throw new IllegalStateException("You need to run 'run/setup.sh' to download models to run/ before this will work!");
|
2022-05-19 15:45:26 +00:00
|
|
|
}
|
2023-03-05 13:12:13 +00:00
|
|
|
|
2022-05-19 15:45:26 +00:00
|
|
|
return ret;
|
|
|
|
}
|
2022-05-25 16:02:19 +00:00
|
|
|
|
2024-11-06 14:28:20 +00:00
|
|
|
public static Path getDataPath() {
|
|
|
|
return getHomePath().resolve("data");
|
|
|
|
}
|
|
|
|
|
2022-08-12 11:50:18 +00:00
|
|
|
public static Path getAdsDefinition() {
|
|
|
|
return getHomePath().resolve("data").resolve("adblock.txt");
|
|
|
|
}
|
|
|
|
|
2022-05-27 12:27:44 +00:00
|
|
|
public static Path getIPLocationDatabse() {
|
|
|
|
return getHomePath().resolve("data").resolve("IP2LOCATION-LITE-DB1.CSV");
|
2023-12-16 20:55:04 +00:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Path getAsnMappingDatabase() {
|
|
|
|
return getHomePath().resolve("data").resolve("asn-data-raw-table");
|
|
|
|
}
|
|
|
|
|
|
|
|
public static Path getAsnInfoDatabase() {
|
|
|
|
return getHomePath().resolve("data").resolve("asn-used-autnums");
|
2022-05-27 12:27:44 +00:00
|
|
|
}
|
2022-05-27 22:16:31 +00:00
|
|
|
|
2022-06-03 11:32:05 +00:00
|
|
|
public static LanguageModels getLanguageModels() {
|
|
|
|
final Path home = getHomePath();
|
|
|
|
|
|
|
|
return new LanguageModels(
|
|
|
|
home.resolve("model/tfreq-new-algo3.bin"),
|
|
|
|
home.resolve("model/opennlp-sentence.bin"),
|
|
|
|
home.resolve("model/English.RDR"),
|
|
|
|
home.resolve("model/English.DICT"),
|
2024-03-19 09:33:29 +00:00
|
|
|
home.resolve("model/lid.176.ftz"),
|
|
|
|
home.resolve("model/segments.bin")
|
|
|
|
);
|
2022-06-03 11:32:05 +00:00
|
|
|
}
|
2022-08-12 11:50:18 +00:00
|
|
|
|
2023-11-04 13:24:17 +00:00
|
|
|
public static Path getAtagsPath() {
|
|
|
|
return getHomePath().resolve("data/atags.parquet");
|
|
|
|
}
|
2023-12-16 20:55:04 +00:00
|
|
|
|
|
|
|
|
2022-05-19 15:45:26 +00:00
|
|
|
}
|