MarginaliaSearch/code/processes/loading-process/java/nu/marginalia/loading/LoaderInputData.java
Viktor Lofgren 1d34224416 (refac) Remove src/main from all source code paths.
Look, this will make the git history look funny, but trimming unnecessary depth from the source tree is a very necessary sanity-preserving measure when dealing with a super-modularized codebase like this one.

While it makes the project configuration a bit less conventional, it will save you several clicks every time you jump between modules.  Which you'll do a lot, because it's *modul*ar.  The src/main/java convention makes a lot of sense for a non-modular project though.  This ain't that.
2024-02-23 16:13:40 +01:00

66 lines
2.3 KiB
Java

package nu.marginalia.loading;
import nu.marginalia.io.processed.ProcessedDataFileNames;
import nu.marginalia.worklog.BatchingWorkLogInspector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.file.Path;
import java.util.*;
public class LoaderInputData {
private final List<Path> sourceDirectories;
private static final Logger logger = LoggerFactory.getLogger(LoaderInputData.class);
private final Map<Path, Integer> lastGoodBatch = new HashMap<>();
public LoaderInputData(List<Path> sourceDirectories) throws IOException {
this.sourceDirectories = sourceDirectories;
for (var source : sourceDirectories) {
int lastGoodBatch = BatchingWorkLogInspector.getValidBatches(source.resolve("processor.log"));
this.lastGoodBatch.put(source, lastGoodBatch);
if (lastGoodBatch == 0) {
// This is useful diagnostic information, so we log it as a warning
logger.warn("No valid batches found in {}", source);
}
}
}
/** This constructor is primarily intended for testing. It still works and is good though,
* but it skips consulting processor.log for lastGoodBatch
*/
public LoaderInputData(Path singleSource, int lastBatch) throws IOException {
sourceDirectories = List.of(singleSource);
lastGoodBatch.put(singleSource, lastBatch);
}
public Collection<Path> listDomainFiles() {
List<Path> pathsAll = new ArrayList<>();
for (var source : sourceDirectories) {
pathsAll.addAll(ProcessedDataFileNames.listDomainFiles(source, lastGoodBatch.get(source)));
}
return pathsAll;
}
public Collection<Path> listDomainLinkFiles() {
List<Path> pathsAll = new ArrayList<>();
for (var source : sourceDirectories) {
pathsAll.addAll(ProcessedDataFileNames.listDomainLinkFiles(source, lastGoodBatch.get(source)));
}
return pathsAll;
}
public Collection<Path> listDocumentFiles() {
List<Path> pathsAll = new ArrayList<>();
for (var source : sourceDirectories) {
pathsAll.addAll(ProcessedDataFileNames.listDocumentFiles(source, lastGoodBatch.get(source)));
}
return pathsAll;
}
}