mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 13:19:02 +00:00
(heartbeat) Task heartbeats
This commit is contained in:
parent
1d0cea1d55
commit
624b78ec3a
@ -0,0 +1,10 @@
|
|||||||
|
CREATE TABLE IF NOT EXISTS TASK_HEARTBEAT (
|
||||||
|
TASK_NAME VARCHAR(255) PRIMARY KEY COMMENT "Full name of the task, including node id if applicable, e.g. reconvert:0",
|
||||||
|
TASK_BASE VARCHAR(255) NOT NULL COMMENT "Base name of the task, e.g. reconvert",
|
||||||
|
INSTANCE VARCHAR(255) NOT NULL COMMENT "UUID of the task instance",
|
||||||
|
SERVICE_INSTANCE VARCHAR(255) NOT NULL COMMENT "UUID of the parent service",
|
||||||
|
STATUS ENUM ('STARTING', 'RUNNING', 'STOPPED') NOT NULL DEFAULT 'STARTING' COMMENT "Status of the task",
|
||||||
|
PROGRESS INT NOT NULL DEFAULT 0 COMMENT "Progress of the task",
|
||||||
|
STAGE_NAME VARCHAR(255) DEFAULT "",
|
||||||
|
HEARTBEAT_TIME TIMESTAMP(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6) COMMENT "Task was last seen at this point"
|
||||||
|
);
|
@ -10,7 +10,8 @@ import org.slf4j.LoggerFactory;
|
|||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
/** This service sends a heartbeat to the database every 5 seconds.
|
/** This service sends a heartbeat to the database every 5 seconds,
|
||||||
|
* updating the control service with the liveness information for the service.
|
||||||
*/
|
*/
|
||||||
@Singleton
|
@Singleton
|
||||||
public class ServiceHeartbeat {
|
public class ServiceHeartbeat {
|
||||||
@ -18,6 +19,7 @@ public class ServiceHeartbeat {
|
|||||||
private final String serviceName;
|
private final String serviceName;
|
||||||
private final String serviceBase;
|
private final String serviceBase;
|
||||||
private final String instanceUUID;
|
private final String instanceUUID;
|
||||||
|
private final ServiceConfiguration configuration;
|
||||||
private final HikariDataSource dataSource;
|
private final HikariDataSource dataSource;
|
||||||
|
|
||||||
|
|
||||||
@ -32,6 +34,7 @@ public class ServiceHeartbeat {
|
|||||||
{
|
{
|
||||||
this.serviceName = configuration.serviceName() + ":" + configuration.node();
|
this.serviceName = configuration.serviceName() + ":" + configuration.node();
|
||||||
this.serviceBase = configuration.serviceName();
|
this.serviceBase = configuration.serviceName();
|
||||||
|
this.configuration = configuration;
|
||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
|
|
||||||
this.instanceUUID = configuration.instanceUuid().toString();
|
this.instanceUUID = configuration.instanceUuid().toString();
|
||||||
@ -41,6 +44,11 @@ public class ServiceHeartbeat {
|
|||||||
Runtime.getRuntime().addShutdownHook(new Thread(this::shutDown));
|
Runtime.getRuntime().addShutdownHook(new Thread(this::shutDown));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public <T extends Enum<T>> ServiceTaskHeartbeat<T> createServiceProcessHeartbeat(Class<T> steps, String processName) {
|
||||||
|
return new ServiceTaskHeartbeat<>(steps, configuration, processName, dataSource);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public void start() {
|
public void start() {
|
||||||
if (!running) {
|
if (!running) {
|
||||||
runnerThread.start();
|
runnerThread.start();
|
||||||
@ -142,4 +150,5 @@ public class ServiceHeartbeat {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,184 @@
|
|||||||
|
package nu.marginalia.service.control;
|
||||||
|
|
||||||
|
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import nu.marginalia.service.module.ServiceConfiguration;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
/** This object sends a heartbeat to the database every few seconds,
|
||||||
|
* updating with the progress of a task within a service. Progress is tracked by providing
|
||||||
|
* enumerations corresponding to the steps in the task. It's important they're arranged in the same
|
||||||
|
* order as the steps in the task in order to get an accurate progress tracking.
|
||||||
|
*/
|
||||||
|
public class ServiceTaskHeartbeat<T extends Enum<T>> implements AutoCloseable {
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(ServiceTaskHeartbeat.class);
|
||||||
|
private final String taskName;
|
||||||
|
private final String taskBase;
|
||||||
|
private final String instanceUUID;
|
||||||
|
private final HikariDataSource dataSource;
|
||||||
|
|
||||||
|
|
||||||
|
private final Thread runnerThread;
|
||||||
|
private final int heartbeatInterval = Integer.getInteger("mcp.heartbeat.interval", 1);
|
||||||
|
private final String serviceInstanceUUID;
|
||||||
|
private final int stepCount;
|
||||||
|
|
||||||
|
private volatile boolean running = false;
|
||||||
|
private volatile int stepNum = 0;
|
||||||
|
private volatile String step = "-";
|
||||||
|
|
||||||
|
ServiceTaskHeartbeat(Class<T> stepClass,
|
||||||
|
ServiceConfiguration configuration,
|
||||||
|
String taskName,
|
||||||
|
HikariDataSource dataSource)
|
||||||
|
{
|
||||||
|
this.taskName = configuration.serviceName() + "." + taskName + ":" + configuration.node();
|
||||||
|
this.taskBase = configuration.serviceName() + "." + taskName;
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
|
||||||
|
this.instanceUUID = UUID.randomUUID().toString();
|
||||||
|
this.serviceInstanceUUID = configuration.instanceUuid().toString();
|
||||||
|
|
||||||
|
this.stepCount = stepClass.getEnumConstants().length;
|
||||||
|
|
||||||
|
runnerThread = new Thread(this::run);
|
||||||
|
runnerThread.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Update the progress of the task. This is a fast function that doesn't block;
|
||||||
|
* the actual update is done in a separate thread.
|
||||||
|
*
|
||||||
|
* @param step The current step in the task.
|
||||||
|
*/
|
||||||
|
public void progress(T step) {
|
||||||
|
this.step = step.name();
|
||||||
|
|
||||||
|
// off by one since we calculate the progress based on the number of steps,
|
||||||
|
// and Enum.ordinal() is zero-based (so the 5th step in a 5 step task is 4, not 5; resulting in the
|
||||||
|
// final progress being 80% and not 100%)
|
||||||
|
|
||||||
|
this.stepNum = 1 + step.ordinal();
|
||||||
|
|
||||||
|
logger.info("ServiceTask {} progress: {}", taskBase, step.name());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void shutDown() {
|
||||||
|
if (!running)
|
||||||
|
return;
|
||||||
|
|
||||||
|
running = false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
runnerThread.join();
|
||||||
|
heartbeatStop();
|
||||||
|
}
|
||||||
|
catch (InterruptedException|SQLException ex) {
|
||||||
|
logger.warn("ServiceHeartbeat shutdown failed", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void run() {
|
||||||
|
if (!running)
|
||||||
|
running = true;
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
|
||||||
|
try {
|
||||||
|
heartbeatInit();
|
||||||
|
|
||||||
|
while (running) {
|
||||||
|
try {
|
||||||
|
heartbeatUpdate();
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
logger.warn("ServiceHeartbeat failed to update", ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
TimeUnit.SECONDS.sleep(heartbeatInterval);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (InterruptedException|SQLException ex) {
|
||||||
|
logger.error("ServiceHeartbeat caught irrecoverable exception, killing service", ex);
|
||||||
|
System.exit(255);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void heartbeatInit() throws SQLException {
|
||||||
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
try (var stmt = connection.prepareStatement(
|
||||||
|
"""
|
||||||
|
INSERT INTO TASK_HEARTBEAT (TASK_NAME, TASK_BASE, INSTANCE, SERVICE_INSTANCE, HEARTBEAT_TIME, STATUS)
|
||||||
|
VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
|
||||||
|
ON DUPLICATE KEY UPDATE
|
||||||
|
INSTANCE = ?,
|
||||||
|
SERVICE_INSTANCE = ?,
|
||||||
|
HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
|
||||||
|
STATUS = 'STARTING'
|
||||||
|
"""
|
||||||
|
))
|
||||||
|
{
|
||||||
|
stmt.setString(1, taskName);
|
||||||
|
stmt.setString(2, taskBase);
|
||||||
|
stmt.setString(3, instanceUUID);
|
||||||
|
stmt.setString(4, serviceInstanceUUID);
|
||||||
|
stmt.setString(5, instanceUUID);
|
||||||
|
stmt.setString(6, serviceInstanceUUID);
|
||||||
|
stmt.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void heartbeatUpdate() throws SQLException {
|
||||||
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
try (var stmt = connection.prepareStatement(
|
||||||
|
"""
|
||||||
|
UPDATE TASK_HEARTBEAT
|
||||||
|
SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
|
||||||
|
STATUS = 'RUNNING',
|
||||||
|
PROGRESS = ?,
|
||||||
|
STAGE_NAME = ?
|
||||||
|
WHERE INSTANCE = ?
|
||||||
|
""")
|
||||||
|
)
|
||||||
|
{
|
||||||
|
stmt.setInt(1, (int) Math.round(100 * stepNum / (double) stepCount));
|
||||||
|
stmt.setString(2, step);
|
||||||
|
stmt.setString(3, instanceUUID);
|
||||||
|
stmt.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void heartbeatStop() throws SQLException {
|
||||||
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
try (var stmt = connection.prepareStatement(
|
||||||
|
"""
|
||||||
|
UPDATE TASK_HEARTBEAT
|
||||||
|
SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
|
||||||
|
STATUS='STOPPED',
|
||||||
|
PROGRESS = ?,
|
||||||
|
STAGE_NAME = ?
|
||||||
|
WHERE INSTANCE = ?
|
||||||
|
""")
|
||||||
|
)
|
||||||
|
{
|
||||||
|
stmt.setInt(1, (int) Math.round(100 * stepNum / (double) stepCount));
|
||||||
|
stmt.setString( 2, step);
|
||||||
|
stmt.setString( 3, instanceUUID);
|
||||||
|
stmt.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
shutDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -18,6 +18,7 @@ dependencies {
|
|||||||
implementation project(':code:features-index:index-journal')
|
implementation project(':code:features-index:index-journal')
|
||||||
implementation project(':code:features-index:lexicon')
|
implementation project(':code:features-index:lexicon')
|
||||||
implementation project(':code:common:model')
|
implementation project(':code:common:model')
|
||||||
|
implementation project(':code:common:service')
|
||||||
|
|
||||||
implementation project(':third-party:uppend')
|
implementation project(':third-party:uppend')
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ import nu.marginalia.array.LongArray;
|
|||||||
import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
|
import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
|
||||||
import nu.marginalia.model.idx.DocumentMetadata;
|
import nu.marginalia.model.idx.DocumentMetadata;
|
||||||
import nu.marginalia.ranking.DomainRankings;
|
import nu.marginalia.ranking.DomainRankings;
|
||||||
|
import nu.marginalia.service.control.ServiceHeartbeat;
|
||||||
import org.roaringbitmap.IntConsumer;
|
import org.roaringbitmap.IntConsumer;
|
||||||
import org.roaringbitmap.RoaringBitmap;
|
import org.roaringbitmap.RoaringBitmap;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
@ -19,6 +20,7 @@ import java.nio.file.Path;
|
|||||||
|
|
||||||
public class ForwardIndexConverter {
|
public class ForwardIndexConverter {
|
||||||
|
|
||||||
|
private final ServiceHeartbeat heartbeat;
|
||||||
private final File inputFile;
|
private final File inputFile;
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
@ -28,18 +30,27 @@ public class ForwardIndexConverter {
|
|||||||
private final DomainRankings domainRankings;
|
private final DomainRankings domainRankings;
|
||||||
|
|
||||||
|
|
||||||
public ForwardIndexConverter(
|
public ForwardIndexConverter(ServiceHeartbeat heartbeat,
|
||||||
File inputFile,
|
File inputFile,
|
||||||
Path outputFileDocsId,
|
Path outputFileDocsId,
|
||||||
Path outputFileDocsData,
|
Path outputFileDocsData,
|
||||||
DomainRankings domainRankings
|
DomainRankings domainRankings
|
||||||
) {
|
) {
|
||||||
|
this.heartbeat = heartbeat;
|
||||||
this.inputFile = inputFile;
|
this.inputFile = inputFile;
|
||||||
this.outputFileDocsId = outputFileDocsId;
|
this.outputFileDocsId = outputFileDocsId;
|
||||||
this.outputFileDocsData = outputFileDocsData;
|
this.outputFileDocsData = outputFileDocsData;
|
||||||
this.domainRankings = domainRankings;
|
this.domainRankings = domainRankings;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public enum TaskSteps {
|
||||||
|
GET_DOC_IDS,
|
||||||
|
GATHER_OFFSETS,
|
||||||
|
SUPPLEMENTAL_INDEXES,
|
||||||
|
FORCE,
|
||||||
|
FINISHED
|
||||||
|
}
|
||||||
|
|
||||||
public void convert() throws IOException {
|
public void convert() throws IOException {
|
||||||
deleteOldFiles();
|
deleteOldFiles();
|
||||||
|
|
||||||
@ -53,18 +64,21 @@ public class ForwardIndexConverter {
|
|||||||
|
|
||||||
logger.info("Domain Rankings size = {}", domainRankings.size());
|
logger.info("Domain Rankings size = {}", domainRankings.size());
|
||||||
|
|
||||||
try {
|
try (var progress = heartbeat.createServiceProcessHeartbeat(TaskSteps.class, "forwardIndexConverter")) {
|
||||||
|
progress.progress(TaskSteps.GET_DOC_IDS);
|
||||||
|
|
||||||
LongArray docsFileId = getDocIds(outputFileDocsId, journalReader);
|
LongArray docsFileId = getDocIds(outputFileDocsId, journalReader);
|
||||||
|
|
||||||
|
progress.progress(TaskSteps.GATHER_OFFSETS);
|
||||||
|
|
||||||
// doc ids -> sorted list of ids
|
// doc ids -> sorted list of ids
|
||||||
|
|
||||||
logger.info("Gathering Offsets");
|
|
||||||
Long2IntOpenHashMap docIdToIdx = new Long2IntOpenHashMap((int) docsFileId.size());
|
Long2IntOpenHashMap docIdToIdx = new Long2IntOpenHashMap((int) docsFileId.size());
|
||||||
docsFileId.forEach(0, docsFileId.size(), (pos, val) -> docIdToIdx.put(val, (int) pos));
|
docsFileId.forEach(0, docsFileId.size(), (pos, val) -> docIdToIdx.put(val, (int) pos));
|
||||||
|
|
||||||
// docIdToIdx -> file offset for id
|
progress.progress(TaskSteps.SUPPLEMENTAL_INDEXES);
|
||||||
|
|
||||||
logger.info("Creating Supplementary Indexes");
|
// docIdToIdx -> file offset for id
|
||||||
|
|
||||||
LongArray docFileData = LongArray.mmapForWriting(outputFileDocsData, ForwardIndexParameters.ENTRY_SIZE * docsFileId.size());
|
LongArray docFileData = LongArray.mmapForWriting(outputFileDocsData, ForwardIndexParameters.ENTRY_SIZE * docsFileId.size());
|
||||||
|
|
||||||
@ -78,11 +92,15 @@ public class ForwardIndexConverter {
|
|||||||
docFileData.set(entryOffset + ForwardIndexParameters.DOMAIN_OFFSET, entry.domainId());
|
docFileData.set(entryOffset + ForwardIndexParameters.DOMAIN_OFFSET, entry.domainId());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
progress.progress(TaskSteps.FORCE);
|
||||||
|
|
||||||
docFileData.force();
|
docFileData.force();
|
||||||
docsFileId.force();
|
docsFileId.force();
|
||||||
|
|
||||||
docFileData.advice(NativeIO.Advice.DontNeed);
|
docFileData.advice(NativeIO.Advice.DontNeed);
|
||||||
docsFileId.advice(NativeIO.Advice.DontNeed);
|
docsFileId.advice(NativeIO.Advice.DontNeed);
|
||||||
|
|
||||||
|
progress.progress(TaskSteps.FINISHED);
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
logger.error("Failed to convert", ex);
|
logger.error("Failed to convert", ex);
|
||||||
throw ex;
|
throw ex;
|
||||||
|
@ -20,6 +20,7 @@ dependencies {
|
|||||||
implementation project(':code:features-index:index-journal')
|
implementation project(':code:features-index:index-journal')
|
||||||
implementation project(':code:features-index:lexicon')
|
implementation project(':code:features-index:lexicon')
|
||||||
implementation project(':code:common:model')
|
implementation project(':code:common:model')
|
||||||
|
implementation project(':code:common:service')
|
||||||
|
|
||||||
implementation libs.lombok
|
implementation libs.lombok
|
||||||
annotationProcessor libs.lombok
|
annotationProcessor libs.lombok
|
||||||
|
@ -21,11 +21,14 @@ import java.nio.file.Files;
|
|||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.StandardOpenOption;
|
import java.nio.file.StandardOpenOption;
|
||||||
|
|
||||||
|
import nu.marginalia.service.control.ServiceHeartbeat;
|
||||||
|
|
||||||
import static nu.marginalia.index.full.ReverseIndexFullParameters.bTreeContext;
|
import static nu.marginalia.index.full.ReverseIndexFullParameters.bTreeContext;
|
||||||
|
|
||||||
public class ReverseIndexFullConverter {
|
public class ReverseIndexFullConverter {
|
||||||
private static final int RWF_BIN_SIZE = 10_000_000;
|
private static final int RWF_BIN_SIZE = 10_000_000;
|
||||||
|
|
||||||
|
private final ServiceHeartbeat heartbeat;
|
||||||
private final Path tmpFileDir;
|
private final Path tmpFileDir;
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
@ -36,11 +39,13 @@ public class ReverseIndexFullConverter {
|
|||||||
private final Path outputFileDocs;
|
private final Path outputFileDocs;
|
||||||
private final SortingContext sortingContext;
|
private final SortingContext sortingContext;
|
||||||
|
|
||||||
public ReverseIndexFullConverter(Path tmpFileDir,
|
public ReverseIndexFullConverter(ServiceHeartbeat heartbeat,
|
||||||
|
Path tmpFileDir,
|
||||||
IndexJournalReader journalReader,
|
IndexJournalReader journalReader,
|
||||||
DomainRankings domainRankings,
|
DomainRankings domainRankings,
|
||||||
Path outputFileWords,
|
Path outputFileWords,
|
||||||
Path outputFileDocs) {
|
Path outputFileDocs) {
|
||||||
|
this.heartbeat = heartbeat;
|
||||||
this.tmpFileDir = tmpFileDir;
|
this.tmpFileDir = tmpFileDir;
|
||||||
this.journalReader = journalReader;
|
this.journalReader = journalReader;
|
||||||
this.domainRankings = domainRankings;
|
this.domainRankings = domainRankings;
|
||||||
@ -49,6 +54,18 @@ public class ReverseIndexFullConverter {
|
|||||||
this.sortingContext = new SortingContext(tmpFileDir, 64_000);
|
this.sortingContext = new SortingContext(tmpFileDir, 64_000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public enum TaskSteps {
|
||||||
|
ACCUMULATE_STATISTICS,
|
||||||
|
INCREMENT_OFFSETS,
|
||||||
|
COUNT_OFFSETS,
|
||||||
|
CREATE_INTERMEDIATE_DOCS,
|
||||||
|
SORT_INTERMEDIATE_DOCS,
|
||||||
|
SIZING,
|
||||||
|
FINALIZING_DOCS,
|
||||||
|
FORCE,
|
||||||
|
FINISHED,
|
||||||
|
}
|
||||||
|
|
||||||
public void convert() throws IOException {
|
public void convert() throws IOException {
|
||||||
deleteOldFiles();
|
deleteOldFiles();
|
||||||
|
|
||||||
@ -57,28 +74,32 @@ public class ReverseIndexFullConverter {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
final IndexJournalStatistics statistics = journalReader.getStatistics();
|
|
||||||
|
|
||||||
final Path intermediateUrlsFile = Files.createTempFile(tmpFileDir, "urls-sorted", ".dat");
|
final Path intermediateUrlsFile = Files.createTempFile(tmpFileDir, "urls-sorted", ".dat");
|
||||||
|
|
||||||
|
try (var progress = heartbeat.createServiceProcessHeartbeat(TaskSteps.class, "reverseIndexFullConverter")) {
|
||||||
|
progress.progress(TaskSteps.ACCUMULATE_STATISTICS);
|
||||||
|
|
||||||
try {
|
final IndexJournalStatistics statistics = journalReader.getStatistics();
|
||||||
final long wordsFileSize = statistics.highestWord() + 1;
|
final long wordsFileSize = statistics.highestWord() + 1;
|
||||||
|
|
||||||
|
progress.progress(TaskSteps.INCREMENT_OFFSETS);
|
||||||
|
|
||||||
logger.debug("Words file size: {}", wordsFileSize);
|
logger.debug("Words file size: {}", wordsFileSize);
|
||||||
// Create a count of how many documents has contains each word
|
// Create a count of how many documents has contains each word
|
||||||
final LongArray wordsOffsets = LongArray.allocate(wordsFileSize);
|
final LongArray wordsOffsets = LongArray.allocate(wordsFileSize);
|
||||||
|
|
||||||
logger.info("Gathering Offsets");
|
|
||||||
journalReader.forEachWordId(wordsOffsets::increment);
|
journalReader.forEachWordId(wordsOffsets::increment);
|
||||||
|
progress.progress(TaskSteps.COUNT_OFFSETS);
|
||||||
|
|
||||||
wordsOffsets.transformEach(0, wordsFileSize, new CountToOffsetTransformer(ReverseIndexFullParameters.ENTRY_SIZE));
|
wordsOffsets.transformEach(0, wordsFileSize, new CountToOffsetTransformer(ReverseIndexFullParameters.ENTRY_SIZE));
|
||||||
|
|
||||||
|
progress.progress(TaskSteps.CREATE_INTERMEDIATE_DOCS);
|
||||||
|
|
||||||
// Construct an intermediate representation of the reverse documents index
|
// Construct an intermediate representation of the reverse documents index
|
||||||
try (FileChannel intermediateDocChannel =
|
try (FileChannel intermediateDocChannel =
|
||||||
(FileChannel) Files.newByteChannel(intermediateUrlsFile,
|
(FileChannel) Files.newByteChannel(intermediateUrlsFile,
|
||||||
StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE))
|
StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE))
|
||||||
{
|
{
|
||||||
logger.info("Creating Intermediate Docs File");
|
|
||||||
|
|
||||||
// Construct intermediate index
|
// Construct intermediate index
|
||||||
try (RandomWriteFunnel intermediateDocumentWriteFunnel = new RandomWriteFunnel(tmpFileDir, RWF_BIN_SIZE);
|
try (RandomWriteFunnel intermediateDocumentWriteFunnel = new RandomWriteFunnel(tmpFileDir, RWF_BIN_SIZE);
|
||||||
@ -89,8 +110,7 @@ public class ReverseIndexFullConverter {
|
|||||||
intermediateDocumentWriteFunnel.write(intermediateDocChannel);
|
intermediateDocumentWriteFunnel.write(intermediateDocChannel);
|
||||||
}
|
}
|
||||||
intermediateDocChannel.force(false);
|
intermediateDocChannel.force(false);
|
||||||
|
progress.progress(TaskSteps.SORT_INTERMEDIATE_DOCS);
|
||||||
logger.info("Sorting Intermediate Docs File");
|
|
||||||
|
|
||||||
// Sort each segment of the intermediate file
|
// Sort each segment of the intermediate file
|
||||||
{
|
{
|
||||||
@ -102,28 +122,29 @@ public class ReverseIndexFullConverter {
|
|||||||
intermediateDocs.force();
|
intermediateDocs.force();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
progress.progress(TaskSteps.SIZING);
|
||||||
logger.info("Sizing");
|
|
||||||
|
|
||||||
IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(
|
IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(
|
||||||
ReverseIndexFullParameters.bTreeContext,
|
ReverseIndexFullParameters.bTreeContext,
|
||||||
ReverseIndexFullParameters.ENTRY_SIZE);
|
ReverseIndexFullParameters.ENTRY_SIZE);
|
||||||
|
|
||||||
wordsOffsets.fold(0, 0, wordsOffsets.size(), sizeEstimator);
|
wordsOffsets.fold(0, 0, wordsOffsets.size(), sizeEstimator);
|
||||||
|
progress.progress(TaskSteps.FINALIZING_DOCS);
|
||||||
logger.info("Finalizing Docs File");
|
|
||||||
|
|
||||||
LongArray finalDocs = LongArray.mmapForWriting(outputFileDocs, sizeEstimator.size);
|
LongArray finalDocs = LongArray.mmapForWriting(outputFileDocs, sizeEstimator.size);
|
||||||
// Construct the proper reverse index
|
// Construct the proper reverse index
|
||||||
wordsOffsets.transformEachIO(0, wordsOffsets.size(), new ReverseIndexBTreeTransformer(finalDocs, ReverseIndexFullParameters.ENTRY_SIZE, bTreeContext, intermediateDocChannel));
|
wordsOffsets.transformEachIO(0, wordsOffsets.size(), new ReverseIndexBTreeTransformer(finalDocs, ReverseIndexFullParameters.ENTRY_SIZE, bTreeContext, intermediateDocChannel));
|
||||||
wordsOffsets.write(outputFileWords);
|
wordsOffsets.write(outputFileWords);
|
||||||
|
|
||||||
|
progress.progress(TaskSteps.FORCE);
|
||||||
|
|
||||||
// Attempt to clean up before forcing (important disk space preservation)
|
// Attempt to clean up before forcing (important disk space preservation)
|
||||||
Files.deleteIfExists(intermediateUrlsFile);
|
Files.deleteIfExists(intermediateUrlsFile);
|
||||||
|
|
||||||
wordsOffsets.force();
|
wordsOffsets.force();
|
||||||
finalDocs.force();
|
finalDocs.force();
|
||||||
logger.info("Done");
|
|
||||||
|
progress.progress(TaskSteps.FINISHED);
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
|
@ -12,6 +12,7 @@ import nu.marginalia.index.journal.model.IndexJournalStatistics;
|
|||||||
import nu.marginalia.index.journal.reader.IndexJournalReader;
|
import nu.marginalia.index.journal.reader.IndexJournalReader;
|
||||||
import nu.marginalia.ranking.DomainRankings;
|
import nu.marginalia.ranking.DomainRankings;
|
||||||
import nu.marginalia.rwf.RandomWriteFunnel;
|
import nu.marginalia.rwf.RandomWriteFunnel;
|
||||||
|
import nu.marginalia.service.control.ServiceHeartbeat;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -21,9 +22,12 @@ import java.nio.file.Files;
|
|||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.nio.file.StandardOpenOption;
|
import java.nio.file.StandardOpenOption;
|
||||||
|
|
||||||
|
import static nu.marginalia.index.priority.ReverseIndexPriorityParameters.bTreeContext;
|
||||||
|
|
||||||
public class ReverseIndexPriorityConverter {
|
public class ReverseIndexPriorityConverter {
|
||||||
private static final int RWF_BIN_SIZE = 10_000_000;
|
private static final int RWF_BIN_SIZE = 10_000_000;
|
||||||
|
|
||||||
|
private final ServiceHeartbeat heartbeat;
|
||||||
private final Path tmpFileDir;
|
private final Path tmpFileDir;
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
@ -34,11 +38,13 @@ public class ReverseIndexPriorityConverter {
|
|||||||
private final Path outputFileDocs;
|
private final Path outputFileDocs;
|
||||||
private final SortingContext sortingContext;
|
private final SortingContext sortingContext;
|
||||||
|
|
||||||
public ReverseIndexPriorityConverter(Path tmpFileDir,
|
public ReverseIndexPriorityConverter(ServiceHeartbeat heartbeat,
|
||||||
|
Path tmpFileDir,
|
||||||
IndexJournalReader journalReader,
|
IndexJournalReader journalReader,
|
||||||
DomainRankings domainRankings,
|
DomainRankings domainRankings,
|
||||||
Path outputFileWords,
|
Path outputFileWords,
|
||||||
Path outputFileDocs) {
|
Path outputFileDocs) {
|
||||||
|
this.heartbeat = heartbeat;
|
||||||
this.tmpFileDir = tmpFileDir;
|
this.tmpFileDir = tmpFileDir;
|
||||||
this.journalReader = journalReader;
|
this.journalReader = journalReader;
|
||||||
this.domainRankings = domainRankings;
|
this.domainRankings = domainRankings;
|
||||||
@ -47,6 +53,18 @@ public class ReverseIndexPriorityConverter {
|
|||||||
this.sortingContext = new SortingContext(tmpFileDir, 64_000);
|
this.sortingContext = new SortingContext(tmpFileDir, 64_000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public enum TaskSteps {
|
||||||
|
ACCUMULATE_STATISTICS,
|
||||||
|
INCREMENT_OFFSETS,
|
||||||
|
COUNT_OFFSETS,
|
||||||
|
CREATE_INTERMEDIATE_DOCS,
|
||||||
|
SORT_INTERMEDIATE_DOCS,
|
||||||
|
SIZING,
|
||||||
|
FINALIZING_DOCS,
|
||||||
|
FORCE,
|
||||||
|
FINISHED,
|
||||||
|
}
|
||||||
|
|
||||||
public void convert() throws IOException {
|
public void convert() throws IOException {
|
||||||
deleteOldFiles();
|
deleteOldFiles();
|
||||||
|
|
||||||
@ -55,28 +73,32 @@ public class ReverseIndexPriorityConverter {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
final IndexJournalStatistics statistics = journalReader.getStatistics();
|
|
||||||
|
|
||||||
final Path intermediateUrlsFile = Files.createTempFile(tmpFileDir, "urls-sorted", ".dat");
|
final Path intermediateUrlsFile = Files.createTempFile(tmpFileDir, "urls-sorted", ".dat");
|
||||||
|
|
||||||
|
try (var progress = heartbeat.createServiceProcessHeartbeat(TaskSteps.class, "reverseIndexPriorityConverter")) {
|
||||||
|
progress.progress(TaskSteps.ACCUMULATE_STATISTICS);
|
||||||
|
|
||||||
try {
|
final IndexJournalStatistics statistics = journalReader.getStatistics();
|
||||||
final long wordsFileSize = statistics.highestWord() + 1;
|
final long wordsFileSize = statistics.highestWord() + 1;
|
||||||
|
|
||||||
|
progress.progress(TaskSteps.INCREMENT_OFFSETS);
|
||||||
|
|
||||||
logger.debug("Words file size: {}", wordsFileSize);
|
logger.debug("Words file size: {}", wordsFileSize);
|
||||||
// Create a count of how many documents has contains each word
|
// Create a count of how many documents has contains each word
|
||||||
final LongArray wordsOffsets = LongArray.allocate(wordsFileSize);
|
final LongArray wordsOffsets = LongArray.allocate(wordsFileSize);
|
||||||
|
|
||||||
logger.info("Gathering Offsets");
|
|
||||||
journalReader.forEachWordId(wordsOffsets::increment);
|
journalReader.forEachWordId(wordsOffsets::increment);
|
||||||
|
progress.progress(TaskSteps.COUNT_OFFSETS);
|
||||||
|
|
||||||
wordsOffsets.transformEach(0, wordsFileSize, new CountToOffsetTransformer(ReverseIndexPriorityParameters.ENTRY_SIZE));
|
wordsOffsets.transformEach(0, wordsFileSize, new CountToOffsetTransformer(ReverseIndexPriorityParameters.ENTRY_SIZE));
|
||||||
|
|
||||||
|
progress.progress(TaskSteps.CREATE_INTERMEDIATE_DOCS);
|
||||||
|
|
||||||
// Construct an intermediate representation of the reverse documents index
|
// Construct an intermediate representation of the reverse documents index
|
||||||
try (FileChannel intermediateDocChannel =
|
try (FileChannel intermediateDocChannel =
|
||||||
(FileChannel) Files.newByteChannel(intermediateUrlsFile,
|
(FileChannel) Files.newByteChannel(intermediateUrlsFile,
|
||||||
StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE))
|
StandardOpenOption.CREATE, StandardOpenOption.READ, StandardOpenOption.WRITE))
|
||||||
{
|
{
|
||||||
logger.info("Creating Intermediate Docs File");
|
|
||||||
|
|
||||||
// Construct intermediate index
|
// Construct intermediate index
|
||||||
try (RandomWriteFunnel intermediateDocumentWriteFunnel = new RandomWriteFunnel(tmpFileDir, RWF_BIN_SIZE);
|
try (RandomWriteFunnel intermediateDocumentWriteFunnel = new RandomWriteFunnel(tmpFileDir, RWF_BIN_SIZE);
|
||||||
@ -87,8 +109,7 @@ public class ReverseIndexPriorityConverter {
|
|||||||
intermediateDocumentWriteFunnel.write(intermediateDocChannel);
|
intermediateDocumentWriteFunnel.write(intermediateDocChannel);
|
||||||
}
|
}
|
||||||
intermediateDocChannel.force(false);
|
intermediateDocChannel.force(false);
|
||||||
|
progress.progress(TaskSteps.SORT_INTERMEDIATE_DOCS);
|
||||||
logger.info("Sorting Intermediate Docs File");
|
|
||||||
|
|
||||||
// Sort each segment of the intermediate file
|
// Sort each segment of the intermediate file
|
||||||
{
|
{
|
||||||
@ -100,32 +121,29 @@ public class ReverseIndexPriorityConverter {
|
|||||||
intermediateDocs.force();
|
intermediateDocs.force();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
progress.progress(TaskSteps.SIZING);
|
||||||
|
|
||||||
logger.info("Sizing");
|
IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(
|
||||||
|
bTreeContext,
|
||||||
IndexSizeEstimator indexSizeEstimator = new IndexSizeEstimator(
|
|
||||||
ReverseIndexPriorityParameters.bTreeContext,
|
|
||||||
ReverseIndexPriorityParameters.ENTRY_SIZE);
|
ReverseIndexPriorityParameters.ENTRY_SIZE);
|
||||||
|
|
||||||
wordsOffsets.fold(0, 0, wordsOffsets.size(), indexSizeEstimator);
|
wordsOffsets.fold(0, 0, wordsOffsets.size(), sizeEstimator);
|
||||||
|
progress.progress(TaskSteps.FINALIZING_DOCS);
|
||||||
|
|
||||||
logger.info("Finalizing Docs File");
|
LongArray finalDocs = LongArray.mmapForWriting(outputFileDocs, sizeEstimator.size);
|
||||||
|
|
||||||
LongArray finalDocs = LongArray.mmapForWriting(outputFileDocs, indexSizeEstimator.size);
|
|
||||||
// Construct the proper reverse index
|
// Construct the proper reverse index
|
||||||
wordsOffsets.transformEachIO(0, wordsOffsets.size(),
|
wordsOffsets.transformEachIO(0, wordsOffsets.size(), new ReverseIndexBTreeTransformer(finalDocs, ReverseIndexPriorityParameters.ENTRY_SIZE, bTreeContext, intermediateDocChannel));
|
||||||
new ReverseIndexBTreeTransformer(finalDocs,
|
|
||||||
ReverseIndexPriorityParameters.ENTRY_SIZE,
|
|
||||||
ReverseIndexPriorityParameters.bTreeContext,
|
|
||||||
intermediateDocChannel));
|
|
||||||
wordsOffsets.write(outputFileWords);
|
wordsOffsets.write(outputFileWords);
|
||||||
|
|
||||||
|
progress.progress(TaskSteps.FORCE);
|
||||||
|
|
||||||
// Attempt to clean up before forcing (important disk space preservation)
|
// Attempt to clean up before forcing (important disk space preservation)
|
||||||
Files.deleteIfExists(intermediateUrlsFile);
|
Files.deleteIfExists(intermediateUrlsFile);
|
||||||
|
|
||||||
wordsOffsets.force();
|
wordsOffsets.force();
|
||||||
finalDocs.force();
|
finalDocs.force();
|
||||||
logger.info("Done");
|
|
||||||
|
progress.progress(TaskSteps.FINISHED);
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
|
@ -13,9 +13,11 @@ import nu.marginalia.ranking.DomainRankings;
|
|||||||
import nu.marginalia.lexicon.KeywordLexicon;
|
import nu.marginalia.lexicon.KeywordLexicon;
|
||||||
import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
|
import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
|
||||||
import nu.marginalia.model.idx.DocumentMetadata;
|
import nu.marginalia.model.idx.DocumentMetadata;
|
||||||
|
import nu.marginalia.service.control.ServiceHeartbeat;
|
||||||
import nu.marginalia.test.TestUtil;
|
import nu.marginalia.test.TestUtil;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.mockito.Mockito;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -84,7 +86,9 @@ class ReverseIndexFullConverterTest {
|
|||||||
var docsFile = dataDir.resolve("docs.dat");
|
var docsFile = dataDir.resolve("docs.dat");
|
||||||
var journalReader = new IndexJournalReaderSingleCompressedFile(indexFile);
|
var journalReader = new IndexJournalReaderSingleCompressedFile(indexFile);
|
||||||
|
|
||||||
new ReverseIndexFullConverter(tmpDir, journalReader, new DomainRankings(), wordsFile, docsFile)
|
new ReverseIndexFullConverter(
|
||||||
|
Mockito.mock(ServiceHeartbeat.class),
|
||||||
|
tmpDir, journalReader, new DomainRankings(), wordsFile, docsFile)
|
||||||
.convert();
|
.convert();
|
||||||
|
|
||||||
var reverseIndexReader = new ReverseIndexFullReader(wordsFile, docsFile);
|
var reverseIndexReader = new ReverseIndexFullReader(wordsFile, docsFile);
|
||||||
|
@ -14,10 +14,12 @@ import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
|
|||||||
import nu.marginalia.ranking.DomainRankings;
|
import nu.marginalia.ranking.DomainRankings;
|
||||||
import nu.marginalia.lexicon.KeywordLexicon;
|
import nu.marginalia.lexicon.KeywordLexicon;
|
||||||
import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
|
import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
|
||||||
|
import nu.marginalia.service.control.ServiceHeartbeat;
|
||||||
import nu.marginalia.test.TestUtil;
|
import nu.marginalia.test.TestUtil;
|
||||||
import org.junit.jupiter.api.AfterEach;
|
import org.junit.jupiter.api.AfterEach;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.mockito.Mockito;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -117,7 +119,7 @@ class ReverseIndexFullConverterTest2 {
|
|||||||
|
|
||||||
Path tmpDir = Path.of("/tmp");
|
Path tmpDir = Path.of("/tmp");
|
||||||
|
|
||||||
new ReverseIndexFullConverter(tmpDir, new IndexJournalReaderSingleCompressedFile(indexFile), new DomainRankings(), wordsFile, docsFile).convert();
|
new ReverseIndexFullConverter(Mockito.mock(ServiceHeartbeat.class), tmpDir, new IndexJournalReaderSingleCompressedFile(indexFile), new DomainRankings(), wordsFile, docsFile).convert();
|
||||||
|
|
||||||
var reverseReader = new ReverseIndexFullReader(wordsFile, docsFile);
|
var reverseReader = new ReverseIndexFullReader(wordsFile, docsFile);
|
||||||
|
|
||||||
@ -142,7 +144,7 @@ class ReverseIndexFullConverterTest2 {
|
|||||||
|
|
||||||
Path tmpDir = Path.of("/tmp");
|
Path tmpDir = Path.of("/tmp");
|
||||||
|
|
||||||
new ReverseIndexFullConverter(tmpDir, new IndexJournalReaderSingleCompressedFile(indexFile, null, ReverseIndexPriorityParameters::filterPriorityRecord), new DomainRankings(), wordsFile, docsFile).convert();
|
new ReverseIndexFullConverter(Mockito.mock(ServiceHeartbeat.class), tmpDir, new IndexJournalReaderSingleCompressedFile(indexFile, null, ReverseIndexPriorityParameters::filterPriorityRecord), new DomainRankings(), wordsFile, docsFile).convert();
|
||||||
|
|
||||||
var reverseReader = new ReverseIndexFullReader(wordsFile, docsFile);
|
var reverseReader = new ReverseIndexFullReader(wordsFile, docsFile);
|
||||||
|
|
||||||
|
@ -14,10 +14,12 @@ import nu.marginalia.lexicon.KeywordLexicon;
|
|||||||
import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
|
import nu.marginalia.lexicon.journal.KeywordLexiconJournal;
|
||||||
import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
|
import nu.marginalia.lexicon.journal.KeywordLexiconJournalMode;
|
||||||
import nu.marginalia.ranking.DomainRankings;
|
import nu.marginalia.ranking.DomainRankings;
|
||||||
|
import nu.marginalia.service.control.ServiceHeartbeat;
|
||||||
import nu.marginalia.test.TestUtil;
|
import nu.marginalia.test.TestUtil;
|
||||||
import org.junit.jupiter.api.AfterEach;
|
import org.junit.jupiter.api.AfterEach;
|
||||||
import org.junit.jupiter.api.BeforeEach;
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.mockito.Mockito;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -117,7 +119,7 @@ class ReverseIndexPriorityConverterTest2 {
|
|||||||
|
|
||||||
Path tmpDir = Path.of("/tmp");
|
Path tmpDir = Path.of("/tmp");
|
||||||
|
|
||||||
new ReverseIndexPriorityConverter(tmpDir, new IndexJournalReaderSingleCompressedFile(indexFile), new DomainRankings(), wordsFile, docsFile).convert();
|
new ReverseIndexPriorityConverter(Mockito.mock(ServiceHeartbeat.class), tmpDir, new IndexJournalReaderSingleCompressedFile(indexFile), new DomainRankings(), wordsFile, docsFile).convert();
|
||||||
|
|
||||||
var reverseReader = new ReverseIndexPriorityReader(wordsFile, docsFile);
|
var reverseReader = new ReverseIndexPriorityReader(wordsFile, docsFile);
|
||||||
|
|
||||||
@ -142,7 +144,7 @@ class ReverseIndexPriorityConverterTest2 {
|
|||||||
|
|
||||||
Path tmpDir = Path.of("/tmp");
|
Path tmpDir = Path.of("/tmp");
|
||||||
|
|
||||||
new ReverseIndexPriorityConverter(tmpDir, new IndexJournalReaderSingleCompressedFile(indexFile, null, ReverseIndexPriorityParameters::filterPriorityRecord), new DomainRankings(), wordsFile, docsFile).convert();
|
new ReverseIndexPriorityConverter(Mockito.mock(ServiceHeartbeat.class), tmpDir, new IndexJournalReaderSingleCompressedFile(indexFile, null, ReverseIndexPriorityParameters::filterPriorityRecord), new DomainRankings(), wordsFile, docsFile).convert();
|
||||||
|
|
||||||
var reverseReader = new ReverseIndexPriorityReader(wordsFile, docsFile);
|
var reverseReader = new ReverseIndexPriorityReader(wordsFile, docsFile);
|
||||||
|
|
||||||
|
@ -266,11 +266,11 @@ public class ControlService extends Service {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private Object processesModel(Request request, Response response) {
|
private Object processesModel(Request request, Response response) {
|
||||||
var heartbeatsAll = heartbeatService.getProcessHeartbeats();
|
var processes = heartbeatService.getProcessHeartbeats();
|
||||||
var byIsJob = heartbeatsAll.stream().collect(Collectors.partitioningBy(ProcessHeartbeat::isServiceJob));
|
var jobs = heartbeatService.getTaskHeartbeats();
|
||||||
|
|
||||||
return Map.of("processes", byIsJob.get(false),
|
return Map.of("processes", processes,
|
||||||
"jobs", byIsJob.get(true),
|
"jobs", jobs,
|
||||||
"actors", controlActorService.getActorStates(),
|
"actors", controlActorService.getActorStates(),
|
||||||
"messages", messageQueueViewService.getLastEntries(20));
|
"messages", messageQueueViewService.getLastEntries(20));
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,7 @@ package nu.marginalia.control.actor.monitor;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import nu.marginalia.control.model.ProcessHeartbeat;
|
import nu.marginalia.control.model.ProcessHeartbeat;
|
||||||
|
import nu.marginalia.control.model.ServiceHeartbeat;
|
||||||
import nu.marginalia.control.svc.HeartbeatService;
|
import nu.marginalia.control.svc.HeartbeatService;
|
||||||
import nu.marginalia.control.svc.ProcessService;
|
import nu.marginalia.control.svc.ProcessService;
|
||||||
import nu.marginalia.mqsm.StateFactory;
|
import nu.marginalia.mqsm.StateFactory;
|
||||||
@ -11,6 +12,7 @@ import nu.marginalia.mqsm.graph.GraphState;
|
|||||||
import nu.marginalia.mqsm.graph.ResumeBehavior;
|
import nu.marginalia.mqsm.graph.ResumeBehavior;
|
||||||
|
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
@Singleton
|
@Singleton
|
||||||
public class ProcessLivenessMonitorActor extends AbstractStateGraph {
|
public class ProcessLivenessMonitorActor extends AbstractStateGraph {
|
||||||
@ -46,12 +48,33 @@ public class ProcessLivenessMonitorActor extends AbstractStateGraph {
|
|||||||
public void monitor() throws Exception {
|
public void monitor() throws Exception {
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
var processHeartbeats = heartbeatService.getProcessHeartbeats();
|
for (var heartbeat : heartbeatService.getProcessHeartbeats()) {
|
||||||
|
if (!heartbeat.isRunning()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var processId = heartbeat.getProcessId();
|
||||||
|
if (null == processId)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (processService.isRunning(processId) && heartbeat.lastSeenMillis() < 10000) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
heartbeatService.flagProcessAsStopped(heartbeat);
|
||||||
|
}
|
||||||
|
|
||||||
|
var livingServices = heartbeatService.getServiceHeartbeats().stream()
|
||||||
|
.filter(ServiceHeartbeat::alive)
|
||||||
|
.map(ServiceHeartbeat::uuidFull)
|
||||||
|
.collect(Collectors.toSet());
|
||||||
|
|
||||||
|
for (var heartbeat : heartbeatService.getTaskHeartbeats()) {
|
||||||
|
if (!livingServices.contains(heartbeat.serviceUuuidFull())) {
|
||||||
|
heartbeatService.removeTaskHeartbeat(heartbeat);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
processHeartbeats.stream()
|
|
||||||
.filter(ProcessHeartbeat::isRunning)
|
|
||||||
.filter(p -> !processService.isRunning(p.getProcessId()))
|
|
||||||
.forEach(heartbeatService::flagProcessAsStopped);
|
|
||||||
|
|
||||||
TimeUnit.SECONDS.sleep(60);
|
TimeUnit.SECONDS.sleep(60);
|
||||||
}
|
}
|
||||||
|
@ -44,7 +44,17 @@ public record ProcessHeartbeat(
|
|||||||
case "loader" -> ProcessService.ProcessId.LOADER;
|
case "loader" -> ProcessService.ProcessId.LOADER;
|
||||||
case "website-adjacencies-calculator" -> ProcessService.ProcessId.ADJACENCIES_CALCULATOR;
|
case "website-adjacencies-calculator" -> ProcessService.ProcessId.ADJACENCIES_CALCULATOR;
|
||||||
case "crawl-job-extractor" -> ProcessService.ProcessId.CRAWL_JOB_EXTRACTOR;
|
case "crawl-job-extractor" -> ProcessService.ProcessId.CRAWL_JOB_EXTRACTOR;
|
||||||
default -> throw new RuntimeException("Unknown process base: " + processBase);
|
default -> null;
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String displayName() {
|
||||||
|
var pid = getProcessId();
|
||||||
|
if (pid != null) {
|
||||||
|
return pid.name();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return processBase;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,29 @@
|
|||||||
|
package nu.marginalia.control.model;
|
||||||
|
|
||||||
|
|
||||||
|
public record TaskHeartbeat(
|
||||||
|
String taskName,
|
||||||
|
String taskBase,
|
||||||
|
String serviceUuuidFull,
|
||||||
|
double lastSeenMillis,
|
||||||
|
Integer progress,
|
||||||
|
String stage,
|
||||||
|
String status
|
||||||
|
) {
|
||||||
|
public boolean isStopped() {
|
||||||
|
return "STOPPED".equals(status);
|
||||||
|
}
|
||||||
|
public boolean isRunning() {
|
||||||
|
return "RUNNING".equals(status);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String progressStyle() {
|
||||||
|
if ("RUNNING".equals(status) && progress != null) {
|
||||||
|
return """
|
||||||
|
background: linear-gradient(90deg, #ccc 0%%, #ccc %d%%, #fff %d%%)
|
||||||
|
""".formatted(progress, progress, progress);
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -5,6 +5,7 @@ import com.google.inject.Singleton;
|
|||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.control.model.ProcessHeartbeat;
|
import nu.marginalia.control.model.ProcessHeartbeat;
|
||||||
import nu.marginalia.control.model.ServiceHeartbeat;
|
import nu.marginalia.control.model.ServiceHeartbeat;
|
||||||
|
import nu.marginalia.control.model.TaskHeartbeat;
|
||||||
import nu.marginalia.service.control.ServiceEventLog;
|
import nu.marginalia.service.control.ServiceEventLog;
|
||||||
|
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
@ -51,6 +52,49 @@ public class HeartbeatService {
|
|||||||
return heartbeats;
|
return heartbeats;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<TaskHeartbeat> getTaskHeartbeats() {
|
||||||
|
List<TaskHeartbeat> heartbeats = new ArrayList<>();
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("""
|
||||||
|
SELECT TASK_NAME, TASK_BASE, SERVICE_INSTANCE, STATUS, STAGE_NAME, PROGRESS, TIMESTAMPDIFF(MICROSECOND, TASK_HEARTBEAT.HEARTBEAT_TIME, CURRENT_TIMESTAMP(6)) AS TSDIFF
|
||||||
|
FROM TASK_HEARTBEAT
|
||||||
|
INNER JOIN SERVICE_HEARTBEAT ON SERVICE_HEARTBEAT.`INSTANCE` = SERVICE_INSTANCE
|
||||||
|
""")) {
|
||||||
|
var rs = stmt.executeQuery();
|
||||||
|
while (rs.next()) {
|
||||||
|
int progress = rs.getInt("PROGRESS");
|
||||||
|
heartbeats.add(new TaskHeartbeat(
|
||||||
|
rs.getString("TASK_NAME"),
|
||||||
|
rs.getString("TASK_BASE"),
|
||||||
|
rs.getString("SERVICE_INSTANCE"),
|
||||||
|
rs.getLong("TSDIFF") / 1000.,
|
||||||
|
progress < 0 ? null : progress,
|
||||||
|
rs.getString("STAGE_NAME"),
|
||||||
|
rs.getString("STATUS")
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
throw new RuntimeException(ex);
|
||||||
|
}
|
||||||
|
return heartbeats;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void removeTaskHeartbeat(TaskHeartbeat heartbeat) {
|
||||||
|
try (var conn = dataSource.getConnection();
|
||||||
|
var stmt = conn.prepareStatement("""
|
||||||
|
DELETE FROM TASK_HEARTBEAT
|
||||||
|
WHERE SERVICE_INSTANCE = ?
|
||||||
|
""")) {
|
||||||
|
|
||||||
|
stmt.setString(1, heartbeat.serviceUuuidFull());
|
||||||
|
stmt.executeUpdate();
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
throw new RuntimeException(ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public List<ProcessHeartbeat> getProcessHeartbeats() {
|
public List<ProcessHeartbeat> getProcessHeartbeats() {
|
||||||
List<ProcessHeartbeat> heartbeats = new ArrayList<>();
|
List<ProcessHeartbeat> heartbeats = new ArrayList<>();
|
||||||
|
|
||||||
@ -99,5 +143,4 @@ public class HeartbeatService {
|
|||||||
throw new RuntimeException(ex);
|
throw new RuntimeException(ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -32,8 +32,7 @@ public class ProcessService {
|
|||||||
CONVERTER("converter-process/bin/converter-process"),
|
CONVERTER("converter-process/bin/converter-process"),
|
||||||
LOADER("loader-process/bin/loader-process"),
|
LOADER("loader-process/bin/loader-process"),
|
||||||
ADJACENCIES_CALCULATOR("website-adjacencies-calculator/bin/website-adjacencies-calculator"),
|
ADJACENCIES_CALCULATOR("website-adjacencies-calculator/bin/website-adjacencies-calculator"),
|
||||||
CRAWL_JOB_EXTRACTOR("crawl-job-extractor-process/bin/crawl-job-extractor-process"),
|
CRAWL_JOB_EXTRACTOR("crawl-job-extractor-process/bin/crawl-job-extractor-process")
|
||||||
|
|
||||||
;
|
;
|
||||||
|
|
||||||
public final String path;
|
public final String path;
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
<script src="/refresh.js"></script>
|
<script src="/refresh.js"></script>
|
||||||
<script>
|
<script>
|
||||||
window.setInterval(() => {
|
window.setInterval(() => {
|
||||||
refresh(["processes", "actors", "queue"]);
|
refresh(["processes", "jobs", "actors", "queue"]);
|
||||||
}, 2000);
|
}, 2000);
|
||||||
</script>
|
</script>
|
||||||
</html>
|
</html>
|
@ -1,5 +1,6 @@
|
|||||||
|
|
||||||
<h1>Processes</h1>
|
<h1>Processes</h1>
|
||||||
|
|
||||||
<table id="processes">
|
<table id="processes">
|
||||||
<tr>
|
<tr>
|
||||||
<th>Process ID</th>
|
<th>Process ID</th>
|
||||||
@ -10,7 +11,7 @@
|
|||||||
</tr>
|
</tr>
|
||||||
{{#each processes}}
|
{{#each processes}}
|
||||||
<tr class="{{#if isMissing}}missing{{/if}}">
|
<tr class="{{#if isMissing}}missing{{/if}}">
|
||||||
<td>{{processId}}</td>
|
<td>{{displayName}}</td>
|
||||||
<td title="{{uuidFull}}">
|
<td title="{{uuidFull}}">
|
||||||
<span style="background-color: {{uuidColor}}" class="uuidPip"> </span><span style="background-color: {{uuidColor2}}" class="uuidPip"> </span>
|
<span style="background-color: {{uuidColor}}" class="uuidPip"> </span><span style="background-color: {{uuidColor2}}" class="uuidPip"> </span>
|
||||||
{{uuid}}
|
{{uuid}}
|
||||||
@ -21,3 +22,21 @@
|
|||||||
</tr>
|
</tr>
|
||||||
{{/each}}
|
{{/each}}
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
|
<h1>Jobs</h1>
|
||||||
|
<table id="jobs">
|
||||||
|
<tr>
|
||||||
|
<th>Process ID</th>
|
||||||
|
<th>Status</th>
|
||||||
|
<th>Progress</th>
|
||||||
|
<th>Last Seen (ms)</th>
|
||||||
|
</tr>
|
||||||
|
{{#each jobs}}
|
||||||
|
<tr class="{{#if isMissing}}missing{{/if}}">
|
||||||
|
<td>{{taskBase}}</td>
|
||||||
|
<td>{{status}}</td>
|
||||||
|
<td style="{{progressStyle}}">{{#if progress}}{{progress}}%{{/if}} {{stage}}</td>
|
||||||
|
<td>{{#unless isStopped}}{{lastSeenMillis}}{{/unless}}</td>
|
||||||
|
</tr>
|
||||||
|
{{/each}}
|
||||||
|
</table>
|
@ -15,9 +15,9 @@ import nu.marginalia.index.full.ReverseIndexFullConverter;
|
|||||||
import nu.marginalia.index.priority.ReverseIndexPriorityReader;
|
import nu.marginalia.index.priority.ReverseIndexPriorityReader;
|
||||||
import nu.marginalia.index.priority.ReverseIndexPriorityParameters;
|
import nu.marginalia.index.priority.ReverseIndexPriorityParameters;
|
||||||
import nu.marginalia.index.full.ReverseIndexFullReader;
|
import nu.marginalia.index.full.ReverseIndexFullReader;
|
||||||
import nu.marginalia.lexicon.KeywordLexicon;
|
|
||||||
import nu.marginalia.ranking.DomainRankings;
|
import nu.marginalia.ranking.DomainRankings;
|
||||||
import nu.marginalia.index.index.SearchIndexReader;
|
import nu.marginalia.index.index.SearchIndexReader;
|
||||||
|
import nu.marginalia.service.control.ServiceHeartbeat;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -33,6 +33,7 @@ import java.util.stream.Stream;
|
|||||||
@Singleton
|
@Singleton
|
||||||
public class IndexServicesFactory {
|
public class IndexServicesFactory {
|
||||||
private final Path tmpFileDir;
|
private final Path tmpFileDir;
|
||||||
|
private final ServiceHeartbeat heartbeat;
|
||||||
private final Path liveStorage;
|
private final Path liveStorage;
|
||||||
private final Path stagingStorage;
|
private final Path stagingStorage;
|
||||||
|
|
||||||
@ -55,8 +56,10 @@ public class IndexServicesFactory {
|
|||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public IndexServicesFactory(
|
public IndexServicesFactory(
|
||||||
|
ServiceHeartbeat heartbeat,
|
||||||
FileStorageService fileStorageService
|
FileStorageService fileStorageService
|
||||||
) throws IOException, SQLException {
|
) throws IOException, SQLException {
|
||||||
|
this.heartbeat = heartbeat;
|
||||||
|
|
||||||
liveStorage = fileStorageService.getStorageByType(FileStorageType.INDEX_LIVE).asPath();
|
liveStorage = fileStorageService.getStorageByType(FileStorageType.INDEX_LIVE).asPath();
|
||||||
stagingStorage = fileStorageService.getStorageByType(FileStorageType.INDEX_STAGING).asPath();
|
stagingStorage = fileStorageService.getStorageByType(FileStorageType.INDEX_STAGING).asPath();
|
||||||
@ -100,17 +103,34 @@ public class IndexServicesFactory {
|
|||||||
).noneMatch(Files::exists);
|
).noneMatch(Files::exists);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum ConvertSteps {
|
||||||
|
FORWARD_INDEX,
|
||||||
|
FULL_REVERSE_INDEX,
|
||||||
|
PRIORITY_REVERSE_INDEX,
|
||||||
|
FINISHED
|
||||||
|
}
|
||||||
public void convertIndex(DomainRankings domainRankings) throws IOException {
|
public void convertIndex(DomainRankings domainRankings) throws IOException {
|
||||||
convertForwardIndex(domainRankings);
|
try (var hb = heartbeat.createServiceProcessHeartbeat(ConvertSteps.class, "index-conversion")) {
|
||||||
convertFullReverseIndex(domainRankings);
|
hb.progress(ConvertSteps.FORWARD_INDEX);
|
||||||
convertPriorityReverseIndex(domainRankings);
|
convertForwardIndex(domainRankings);
|
||||||
|
|
||||||
|
hb.progress(ConvertSteps.FULL_REVERSE_INDEX);
|
||||||
|
convertFullReverseIndex(domainRankings);
|
||||||
|
|
||||||
|
hb.progress(ConvertSteps.PRIORITY_REVERSE_INDEX);
|
||||||
|
convertPriorityReverseIndex(domainRankings);
|
||||||
|
|
||||||
|
hb.progress(ConvertSteps.FINISHED);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void convertFullReverseIndex(DomainRankings domainRankings) throws IOException {
|
private void convertFullReverseIndex(DomainRankings domainRankings) throws IOException {
|
||||||
logger.info("Converting full reverse index {}", writerIndexFile);
|
logger.info("Converting full reverse index {}", writerIndexFile);
|
||||||
|
|
||||||
var journalReader = new IndexJournalReaderSingleCompressedFile(writerIndexFile);
|
var journalReader = new IndexJournalReaderSingleCompressedFile(writerIndexFile);
|
||||||
var converter = new ReverseIndexFullConverter(tmpFileDir,
|
var converter = new ReverseIndexFullConverter(
|
||||||
|
heartbeat,
|
||||||
|
tmpFileDir,
|
||||||
journalReader,
|
journalReader,
|
||||||
domainRankings,
|
domainRankings,
|
||||||
revIndexWords.get(NEXT_PART).toPath(),
|
revIndexWords.get(NEXT_PART).toPath(),
|
||||||
@ -128,7 +148,8 @@ public class IndexServicesFactory {
|
|||||||
var journalReader = new IndexJournalReaderSingleCompressedFile(writerIndexFile, null,
|
var journalReader = new IndexJournalReaderSingleCompressedFile(writerIndexFile, null,
|
||||||
ReverseIndexPriorityParameters::filterPriorityRecord);
|
ReverseIndexPriorityParameters::filterPriorityRecord);
|
||||||
|
|
||||||
var converter = new ReverseIndexPriorityConverter(tmpFileDir,
|
var converter = new ReverseIndexPriorityConverter(heartbeat,
|
||||||
|
tmpFileDir,
|
||||||
journalReader,
|
journalReader,
|
||||||
domainRankings,
|
domainRankings,
|
||||||
revPrioIndexWords.get(NEXT_PART).toPath(),
|
revPrioIndexWords.get(NEXT_PART).toPath(),
|
||||||
@ -144,7 +165,8 @@ public class IndexServicesFactory {
|
|||||||
|
|
||||||
logger.info("Converting forward index data {}", writerIndexFile);
|
logger.info("Converting forward index data {}", writerIndexFile);
|
||||||
|
|
||||||
new ForwardIndexConverter(writerIndexFile.toFile(),
|
new ForwardIndexConverter(heartbeat,
|
||||||
|
writerIndexFile.toFile(),
|
||||||
fwdIndexDocId.get(NEXT_PART).toPath(),
|
fwdIndexDocId.get(NEXT_PART).toPath(),
|
||||||
fwdIndexDocData.get(NEXT_PART).toPath(),
|
fwdIndexDocData.get(NEXT_PART).toPath(),
|
||||||
domainRankings)
|
domainRankings)
|
||||||
|
@ -21,6 +21,7 @@ import nu.marginalia.index.config.RankingSettings;
|
|||||||
import nu.marginalia.ranking.DomainRankings;
|
import nu.marginalia.ranking.DomainRankings;
|
||||||
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
|
import nu.marginalia.index.client.model.query.SearchSetIdentifier;
|
||||||
import nu.marginalia.index.db.DbUpdateRanks;
|
import nu.marginalia.index.db.DbUpdateRanks;
|
||||||
|
import nu.marginalia.service.control.ServiceHeartbeat;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
@ -30,6 +31,7 @@ import java.io.IOException;
|
|||||||
public class IndexSearchSetsService {
|
public class IndexSearchSetsService {
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
private final DomainTypes domainTypes;
|
private final DomainTypes domainTypes;
|
||||||
|
private final ServiceHeartbeat heartbeat;
|
||||||
private final DbUpdateRanks dbUpdateRanks;
|
private final DbUpdateRanks dbUpdateRanks;
|
||||||
private final RankingDomainFetcher similarityDomains;
|
private final RankingDomainFetcher similarityDomains;
|
||||||
private final RankingSettings rankingSettings;
|
private final RankingSettings rankingSettings;
|
||||||
@ -47,12 +49,14 @@ public class IndexSearchSetsService {
|
|||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public IndexSearchSetsService(DomainTypes domainTypes,
|
public IndexSearchSetsService(DomainTypes domainTypes,
|
||||||
|
ServiceHeartbeat heartbeat,
|
||||||
RankingDomainFetcher rankingDomains,
|
RankingDomainFetcher rankingDomains,
|
||||||
RankingDomainFetcherForSimilarityData similarityDomains,
|
RankingDomainFetcherForSimilarityData similarityDomains,
|
||||||
RankingSettings rankingSettings,
|
RankingSettings rankingSettings,
|
||||||
IndexServicesFactory servicesFactory,
|
IndexServicesFactory servicesFactory,
|
||||||
DbUpdateRanks dbUpdateRanks) throws IOException {
|
DbUpdateRanks dbUpdateRanks) throws IOException {
|
||||||
this.domainTypes = domainTypes;
|
this.domainTypes = domainTypes;
|
||||||
|
this.heartbeat = heartbeat;
|
||||||
|
|
||||||
this.dbUpdateRanks = dbUpdateRanks;
|
this.dbUpdateRanks = dbUpdateRanks;
|
||||||
|
|
||||||
@ -90,12 +94,34 @@ public class IndexSearchSetsService {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
enum RepartitionSteps {
|
||||||
|
UPDATE_ACADEMIA,
|
||||||
|
UPDATE_RETRO,
|
||||||
|
UPDATE_SMALL_WEB,
|
||||||
|
UPDATE_BLOGS,
|
||||||
|
UPDATE_RANKINGS,
|
||||||
|
FINISHED
|
||||||
|
}
|
||||||
public void recalculateAll() {
|
public void recalculateAll() {
|
||||||
updateAcademiaDomainsSet();
|
try (var processHeartbeat = heartbeat.createServiceProcessHeartbeat(RepartitionSteps.class, "repartitionAll")) {
|
||||||
updateRetroDomainsSet();
|
|
||||||
updateSmallWebDomainsSet();
|
processHeartbeat.progress(RepartitionSteps.UPDATE_ACADEMIA);
|
||||||
updateBlogsSet();
|
updateAcademiaDomainsSet();
|
||||||
updateDomainRankings();
|
|
||||||
|
processHeartbeat.progress(RepartitionSteps.UPDATE_RETRO);
|
||||||
|
updateRetroDomainsSet();
|
||||||
|
|
||||||
|
processHeartbeat.progress(RepartitionSteps.UPDATE_SMALL_WEB);
|
||||||
|
updateSmallWebDomainsSet();
|
||||||
|
|
||||||
|
processHeartbeat.progress(RepartitionSteps.UPDATE_BLOGS);
|
||||||
|
updateBlogsSet();
|
||||||
|
|
||||||
|
processHeartbeat.progress(RepartitionSteps.UPDATE_RANKINGS);
|
||||||
|
updateDomainRankings();
|
||||||
|
|
||||||
|
processHeartbeat.progress(RepartitionSteps.FINISHED);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void updateDomainRankings() {
|
private void updateDomainRankings() {
|
||||||
|
@ -63,6 +63,7 @@ public class IndexQueryServiceIntegrationTestModule extends AbstractModule {
|
|||||||
when(fileStorageServiceMock.getStorageByType(FileStorageType.INDEX_STAGING)).thenReturn(new FileStorage(null, null, null, slowDir.toString(), null));
|
when(fileStorageServiceMock.getStorageByType(FileStorageType.INDEX_STAGING)).thenReturn(new FileStorage(null, null, null, slowDir.toString(), null));
|
||||||
|
|
||||||
var servicesFactory = new IndexServicesFactory(
|
var servicesFactory = new IndexServicesFactory(
|
||||||
|
Mockito.mock(ServiceHeartbeat.class),
|
||||||
fileStorageServiceMock
|
fileStorageServiceMock
|
||||||
);
|
);
|
||||||
bind(IndexServicesFactory.class).toInstance(servicesFactory);
|
bind(IndexServicesFactory.class).toInstance(servicesFactory);
|
||||||
|
Loading…
Reference in New Issue
Block a user