mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-22 20:48:59 +00:00
(actor) Improve resilience for the migration actor
This commit is contained in:
parent
39cd1c18f8
commit
6ece6a6cfb
@ -60,30 +60,35 @@ public class MigrateCrawlDataActor extends RecordActorPrototype {
|
|||||||
|
|
||||||
for (Map.Entry<WorkLogEntry, Path> item : WorkLog.iterableMap(crawlerLog, new CrawlDataLocator(root))) {
|
for (Map.Entry<WorkLogEntry, Path> item : WorkLog.iterableMap(crawlerLog, new CrawlDataLocator(root))) {
|
||||||
|
|
||||||
var entry = item.getKey();
|
final WorkLogEntry entry = item.getKey();
|
||||||
var path = item.getValue();
|
final Path inputPath = item.getValue();
|
||||||
|
|
||||||
heartbeat.progress("Migrating" + path.toFile().getName(), entryIdx++, totalEntries);
|
Path outputPath = inputPath;
|
||||||
|
heartbeat.progress("Migrating" + inputPath.getFileName(), entryIdx++, totalEntries);
|
||||||
|
|
||||||
if (path.toFile().getName().endsWith(".parquet") && Files.exists(path)) {
|
if (inputPath.toString().endsWith(".parquet")) {
|
||||||
try {
|
String domain = entry.id();
|
||||||
String domain = entry.id();
|
String id = Integer.toHexString(domain.hashCode());
|
||||||
String id = Integer.toHexString(domain.hashCode());
|
|
||||||
|
|
||||||
Path outputFile = CrawlerOutputFile.createSlopPath(root, id, domain);
|
outputPath = CrawlerOutputFile.createSlopPath(root, id, domain);
|
||||||
|
|
||||||
SlopCrawlDataRecord.convertFromParquet(path, outputFile);
|
if (Files.exists(inputPath)) {
|
||||||
|
try {
|
||||||
workLog.setJobToFinished(entry.id(), outputFile.toString(), entry.cnt());
|
SlopCrawlDataRecord.convertFromParquet(inputPath, outputPath);
|
||||||
|
} catch (Exception ex) {
|
||||||
|
outputPath = inputPath; // don't update the work log on error
|
||||||
|
logger.error("Failed to convert " + inputPath, ex);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch (Exception ex) {
|
else if (!Files.exists(inputPath) && !Files.exists(outputPath)) {
|
||||||
logger.error("Failed to convert " + path, ex);
|
// if the input file is missing, and the output file is missing, we just write the log
|
||||||
|
// record identical to the old one
|
||||||
|
outputPath = inputPath;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
workLog.setJobToFinished(entry.id(), path.toString(), entry.cnt());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// Write a log entry for the (possibly) converted file
|
||||||
|
workLog.setJobToFinished(entry.id(), outputPath.toString(), entry.cnt());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user