mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 21:29:00 +00:00

Look, this will make the git history look funny, but trimming unnecessary depth from the source tree is a very necessary sanity-preserving measure when dealing with a super-modularized codebase like this one. While it makes the project configuration a bit less conventional, it will save you several clicks every time you jump between modules. Which you'll do a lot, because it's *modul*ar. The src/main/java convention makes a lot of sense for a non-modular project though. This ain't that.
61 lines
2.1 KiB
Java
61 lines
2.1 KiB
Java
package nu.marginalia.tools;
|
|
|
|
import com.google.inject.Guice;
|
|
import com.google.inject.Injector;
|
|
import nu.marginalia.converting.ConverterModule;
|
|
import nu.marginalia.crawling.io.CrawledDomainReader;
|
|
import nu.marginalia.process.log.WorkLog;
|
|
import nu.marginalia.service.module.DatabaseModule;
|
|
import nu.marginalia.tools.experiments.*;
|
|
|
|
import java.io.IOException;
|
|
import java.nio.file.Path;
|
|
import java.util.*;
|
|
|
|
public class ExperimentRunnerMain {
|
|
|
|
private static Map<String, Class<? extends Experiment>> experiments = Map.of(
|
|
"test", TestExperiment.class,
|
|
"adblock", AdblockExperiment.class,
|
|
"topic", TopicExperiment.class,
|
|
"atags", AtagsExperiment.class,
|
|
"sentence-statistics", SentenceStatisticsExperiment.class,
|
|
"site-statistics", SiteStatisticsExperiment.class,
|
|
"export-atags", ExportExternalLinksExperiment.class,
|
|
"debug-converter", DebugConverterExperiment.class
|
|
);
|
|
|
|
public static void main(String... args) throws IOException {
|
|
if (args.length < 2) {
|
|
System.err.println("Expected arguments: plan.yaml experiment-name [experiment-args]");
|
|
return;
|
|
}
|
|
|
|
if (!experiments.containsKey(args[1])) {
|
|
System.err.println("Valid experiment names: " + experiments.keySet());
|
|
return;
|
|
}
|
|
|
|
Injector injector = Guice.createInjector(
|
|
new DatabaseModule(false),
|
|
new ConverterModule()
|
|
);
|
|
|
|
Experiment experiment = injector.getInstance(experiments.get(args[1]));
|
|
|
|
experiment.args(Arrays.copyOfRange(args, 2, args.length));
|
|
|
|
Path basePath = Path.of(args[0]);
|
|
for (var item : WorkLog.iterable(basePath.resolve("crawler.log"))) {
|
|
Path crawlDataPath = basePath.resolve(item.relPath());
|
|
try (var stream = CrawledDomainReader.createDataStream(CrawledDomainReader.CompatibilityLevel.FAST, crawlDataPath)) {
|
|
experiment.process(stream);
|
|
}
|
|
catch (Exception ex) {
|
|
ex.printStackTrace();
|
|
}
|
|
}
|
|
experiment.onFinish();
|
|
}
|
|
}
|