mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
Merge pull request #122 from MarginaliaSearch/fetch-rss-feeds
Automatic RSS feed polling
This commit is contained in:
commit
a31a3b53c4
@ -11,4 +11,14 @@ public class FakeServiceHeartbeat implements ServiceHeartbeat {
|
|||||||
public void close() {}
|
public void close() {}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ServiceAdHocTaskHeartbeat createServiceAdHocTaskHeartbeat(String taskName) {
|
||||||
|
return new ServiceAdHocTaskHeartbeat() {
|
||||||
|
@Override
|
||||||
|
public void progress(String step, int stepProgress, int stepCount) {}
|
||||||
|
@Override
|
||||||
|
public void close() {}
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,7 @@
|
|||||||
|
package nu.marginalia.service.control;
|
||||||
|
|
||||||
|
public interface ServiceAdHocTaskHeartbeat extends AutoCloseable {
|
||||||
|
void progress(String step, int progress, int total);
|
||||||
|
|
||||||
|
void close();
|
||||||
|
}
|
@ -0,0 +1,190 @@
|
|||||||
|
package nu.marginalia.service.control;
|
||||||
|
|
||||||
|
|
||||||
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
|
import nu.marginalia.service.module.ServiceConfiguration;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
/** This object sends a heartbeat to the database every few seconds,
|
||||||
|
* updating with the progress of a task within a service. Progress is tracked by providing
|
||||||
|
* enumerations corresponding to the steps in the task. It's important they're arranged in the same
|
||||||
|
* order as the steps in the task in order to get an accurate progress tracking.
|
||||||
|
*/
|
||||||
|
public class ServiceAdHocTaskHeartbeatImpl implements AutoCloseable, ServiceAdHocTaskHeartbeat {
|
||||||
|
private final Logger logger = LoggerFactory.getLogger(ServiceAdHocTaskHeartbeatImpl.class);
|
||||||
|
private final String taskName;
|
||||||
|
private final String taskBase;
|
||||||
|
private final int node;
|
||||||
|
private final String instanceUUID;
|
||||||
|
private final HikariDataSource dataSource;
|
||||||
|
|
||||||
|
|
||||||
|
private final Thread runnerThread;
|
||||||
|
private final int heartbeatInterval = Integer.getInteger("mcp.heartbeat.interval", 1);
|
||||||
|
private final String serviceInstanceUUID;
|
||||||
|
private int progress;
|
||||||
|
|
||||||
|
private volatile boolean running = false;
|
||||||
|
private volatile String step = "-";
|
||||||
|
|
||||||
|
ServiceAdHocTaskHeartbeatImpl(ServiceConfiguration configuration,
|
||||||
|
String taskName,
|
||||||
|
HikariDataSource dataSource)
|
||||||
|
{
|
||||||
|
this.taskName = configuration.serviceName() + "." + taskName + ":" + configuration.node();
|
||||||
|
this.taskBase = configuration.serviceName() + "." + taskName;
|
||||||
|
this.node = configuration.node();
|
||||||
|
this.dataSource = dataSource;
|
||||||
|
|
||||||
|
this.instanceUUID = UUID.randomUUID().toString();
|
||||||
|
this.serviceInstanceUUID = configuration.instanceUuid().toString();
|
||||||
|
|
||||||
|
heartbeatInit();
|
||||||
|
|
||||||
|
runnerThread = new Thread(this::run);
|
||||||
|
runnerThread.start();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Update the progress of the task. This is a fast function that doesn't block;
|
||||||
|
* the actual update is done in a separate thread.
|
||||||
|
*
|
||||||
|
* @param step The current step in the task.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void progress(String step, int stepProgress, int stepCount) {
|
||||||
|
this.step = step;
|
||||||
|
|
||||||
|
|
||||||
|
// off by one since we calculate the progress based on the number of steps,
|
||||||
|
// and Enum.ordinal() is zero-based (so the 5th step in a 5 step task is 4, not 5; resulting in the
|
||||||
|
// final progress being 80% and not 100%)
|
||||||
|
|
||||||
|
this.progress = (int) Math.round(100. * stepProgress / (double) stepCount);
|
||||||
|
|
||||||
|
logger.info("ServiceTask {} progress: {}%", taskBase, progress);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void shutDown() {
|
||||||
|
if (!running)
|
||||||
|
return;
|
||||||
|
|
||||||
|
running = false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
runnerThread.join();
|
||||||
|
heartbeatStop();
|
||||||
|
}
|
||||||
|
catch (InterruptedException|SQLException ex) {
|
||||||
|
logger.warn("ServiceHeartbeat shutdown failed", ex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void run() {
|
||||||
|
if (!running)
|
||||||
|
running = true;
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
|
||||||
|
try {
|
||||||
|
while (running) {
|
||||||
|
try {
|
||||||
|
heartbeatUpdate();
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
logger.warn("ServiceHeartbeat failed to update", ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
TimeUnit.SECONDS.sleep(heartbeatInterval);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (InterruptedException ex) {
|
||||||
|
logger.error("ServiceHeartbeat caught irrecoverable exception, killing service", ex);
|
||||||
|
System.exit(255);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void heartbeatInit() {
|
||||||
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
try (var stmt = connection.prepareStatement(
|
||||||
|
"""
|
||||||
|
INSERT INTO TASK_HEARTBEAT (TASK_NAME, TASK_BASE, NODE, INSTANCE, SERVICE_INSTANCE, HEARTBEAT_TIME, STATUS)
|
||||||
|
VALUES (?, ?, ?, ?, ?, CURRENT_TIMESTAMP(6), 'STARTING')
|
||||||
|
ON DUPLICATE KEY UPDATE
|
||||||
|
INSTANCE = ?,
|
||||||
|
SERVICE_INSTANCE = ?,
|
||||||
|
HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
|
||||||
|
STATUS = 'STARTING'
|
||||||
|
"""
|
||||||
|
))
|
||||||
|
{
|
||||||
|
stmt.setString(1, taskName);
|
||||||
|
stmt.setString(2, taskBase);
|
||||||
|
stmt.setInt(3, node);
|
||||||
|
stmt.setString(4, instanceUUID);
|
||||||
|
stmt.setString(5, serviceInstanceUUID);
|
||||||
|
stmt.setString(6, instanceUUID);
|
||||||
|
stmt.setString(7, serviceInstanceUUID);
|
||||||
|
stmt.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (SQLException ex) {
|
||||||
|
logger.error("ServiceHeartbeat failed to initialize", ex);
|
||||||
|
throw new RuntimeException(ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void heartbeatUpdate() throws SQLException {
|
||||||
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
try (var stmt = connection.prepareStatement(
|
||||||
|
"""
|
||||||
|
UPDATE TASK_HEARTBEAT
|
||||||
|
SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
|
||||||
|
STATUS = 'RUNNING',
|
||||||
|
PROGRESS = ?,
|
||||||
|
STAGE_NAME = ?
|
||||||
|
WHERE INSTANCE = ?
|
||||||
|
""")
|
||||||
|
)
|
||||||
|
{
|
||||||
|
stmt.setInt(1, progress);
|
||||||
|
stmt.setString(2, step);
|
||||||
|
stmt.setString(3, instanceUUID);
|
||||||
|
stmt.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void heartbeatStop() throws SQLException {
|
||||||
|
try (var connection = dataSource.getConnection()) {
|
||||||
|
try (var stmt = connection.prepareStatement(
|
||||||
|
"""
|
||||||
|
UPDATE TASK_HEARTBEAT
|
||||||
|
SET HEARTBEAT_TIME = CURRENT_TIMESTAMP(6),
|
||||||
|
STATUS='STOPPED',
|
||||||
|
PROGRESS = ?,
|
||||||
|
STAGE_NAME = ?
|
||||||
|
WHERE INSTANCE = ?
|
||||||
|
""")
|
||||||
|
)
|
||||||
|
{
|
||||||
|
stmt.setInt(1, progress);
|
||||||
|
stmt.setString( 2, step);
|
||||||
|
stmt.setString( 3, instanceUUID);
|
||||||
|
stmt.executeUpdate();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
shutDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -5,4 +5,5 @@ import com.google.inject.ImplementedBy;
|
|||||||
@ImplementedBy(ServiceHeartbeatImpl.class)
|
@ImplementedBy(ServiceHeartbeatImpl.class)
|
||||||
public interface ServiceHeartbeat {
|
public interface ServiceHeartbeat {
|
||||||
<T extends Enum<T>> ServiceTaskHeartbeat<T> createServiceTaskHeartbeat(Class<T> steps, String processName);
|
<T extends Enum<T>> ServiceTaskHeartbeat<T> createServiceTaskHeartbeat(Class<T> steps, String processName);
|
||||||
|
ServiceAdHocTaskHeartbeat createServiceAdHocTaskHeartbeat(String taskName);
|
||||||
}
|
}
|
||||||
|
@ -54,6 +54,11 @@ public class ServiceHeartbeatImpl implements ServiceHeartbeat {
|
|||||||
return new ServiceTaskHeartbeatImpl<>(steps, configuration, processName, eventLog, dataSource);
|
return new ServiceTaskHeartbeatImpl<>(steps, configuration, processName, eventLog, dataSource);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ServiceAdHocTaskHeartbeat createServiceAdHocTaskHeartbeat(String taskName) {
|
||||||
|
return new ServiceAdHocTaskHeartbeatImpl(configuration, taskName, dataSource);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public void start() {
|
public void start() {
|
||||||
if (!running) {
|
if (!running) {
|
||||||
|
@ -9,23 +9,25 @@ import nu.marginalia.executor.storage.FileStorageFile;
|
|||||||
import nu.marginalia.executor.upload.UploadDirContents;
|
import nu.marginalia.executor.upload.UploadDirContents;
|
||||||
import nu.marginalia.executor.upload.UploadDirItem;
|
import nu.marginalia.executor.upload.UploadDirItem;
|
||||||
import nu.marginalia.functions.execution.api.*;
|
import nu.marginalia.functions.execution.api.*;
|
||||||
|
import nu.marginalia.service.ServiceId;
|
||||||
import nu.marginalia.service.client.GrpcChannelPoolFactory;
|
import nu.marginalia.service.client.GrpcChannelPoolFactory;
|
||||||
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
|
import nu.marginalia.service.client.GrpcMultiNodeChannelPool;
|
||||||
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
import nu.marginalia.service.discovery.ServiceRegistryIf;
|
||||||
import nu.marginalia.service.discovery.property.ServiceKey;
|
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||||
import nu.marginalia.service.ServiceId;
|
|
||||||
import nu.marginalia.storage.model.FileStorage;
|
import nu.marginalia.storage.model.FileStorage;
|
||||||
import nu.marginalia.storage.model.FileStorageId;
|
import nu.marginalia.storage.model.FileStorageId;
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import java.net.*;
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.net.URLEncoder;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import static nu.marginalia.functions.execution.api.ExecutorApiGrpc.*;
|
import static nu.marginalia.functions.execution.api.ExecutorApiGrpc.ExecutorApiBlockingStub;
|
||||||
|
|
||||||
@Singleton
|
@Singleton
|
||||||
public class ExecutorClient {
|
public class ExecutorClient {
|
||||||
@ -163,8 +165,8 @@ public class ExecutorClient {
|
|||||||
* The endpoint is compatible with range requests.
|
* The endpoint is compatible with range requests.
|
||||||
* */
|
* */
|
||||||
public URL remoteFileURL(FileStorage fileStorage, String path) {
|
public URL remoteFileURL(FileStorage fileStorage, String path) {
|
||||||
String uriPath = STR."/transfer/file/\{fileStorage.id()}";
|
String uriPath = "/transfer/file/" + fileStorage.id();
|
||||||
String uriQuery = STR."path=\{URLEncoder.encode(path, StandardCharsets.UTF_8)}";
|
String uriQuery = "path=" + URLEncoder.encode(path, StandardCharsets.UTF_8);
|
||||||
|
|
||||||
var endpoints = registry.getEndpoints(ServiceKey.forRest(ServiceId.Executor, fileStorage.node()));
|
var endpoints = registry.getEndpoints(ServiceKey.forRest(ServiceId.Executor, fileStorage.node()));
|
||||||
if (endpoints.isEmpty()) {
|
if (endpoints.isEmpty()) {
|
||||||
|
@ -35,6 +35,7 @@ dependencies {
|
|||||||
implementation project(':code:libraries:term-frequency-dict')
|
implementation project(':code:libraries:term-frequency-dict')
|
||||||
|
|
||||||
implementation project(':code:functions:link-graph:api')
|
implementation project(':code:functions:link-graph:api')
|
||||||
|
implementation project(':code:functions:live-capture:api')
|
||||||
implementation project(':code:functions:search-query')
|
implementation project(':code:functions:search-query')
|
||||||
implementation project(':code:execution:api')
|
implementation project(':code:execution:api')
|
||||||
|
|
||||||
|
@ -22,7 +22,8 @@ public enum ExecutorActor {
|
|||||||
RESTORE_BACKUP,
|
RESTORE_BACKUP,
|
||||||
EXPORT_SAMPLE_DATA,
|
EXPORT_SAMPLE_DATA,
|
||||||
DOWNLOAD_SAMPLE,
|
DOWNLOAD_SAMPLE,
|
||||||
SCRAPE_FEEDS;
|
SCRAPE_FEEDS,
|
||||||
|
UPDATE_RSS;
|
||||||
|
|
||||||
public String id() {
|
public String id() {
|
||||||
return "fsm:" + name().toLowerCase();
|
return "fsm:" + name().toLowerCase();
|
||||||
|
@ -50,7 +50,8 @@ public class ExecutorActorControlService {
|
|||||||
ExportSegmentationModelActor exportSegmentationModelActor,
|
ExportSegmentationModelActor exportSegmentationModelActor,
|
||||||
DownloadSampleActor downloadSampleActor,
|
DownloadSampleActor downloadSampleActor,
|
||||||
ScrapeFeedsActor scrapeFeedsActor,
|
ScrapeFeedsActor scrapeFeedsActor,
|
||||||
ExecutorActorStateMachines stateMachines) {
|
ExecutorActorStateMachines stateMachines,
|
||||||
|
UpdateRssActor updateRssActor) {
|
||||||
this.messageQueueFactory = messageQueueFactory;
|
this.messageQueueFactory = messageQueueFactory;
|
||||||
this.eventLog = baseServiceParams.eventLog;
|
this.eventLog = baseServiceParams.eventLog;
|
||||||
this.stateMachines = stateMachines;
|
this.stateMachines = stateMachines;
|
||||||
@ -83,6 +84,7 @@ public class ExecutorActorControlService {
|
|||||||
register(ExecutorActor.DOWNLOAD_SAMPLE, downloadSampleActor);
|
register(ExecutorActor.DOWNLOAD_SAMPLE, downloadSampleActor);
|
||||||
|
|
||||||
register(ExecutorActor.SCRAPE_FEEDS, scrapeFeedsActor);
|
register(ExecutorActor.SCRAPE_FEEDS, scrapeFeedsActor);
|
||||||
|
register(ExecutorActor.UPDATE_RSS, updateRssActor);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void register(ExecutorActor process, RecordActorPrototype graph) {
|
private void register(ExecutorActor process, RecordActorPrototype graph) {
|
||||||
|
134
code/execution/java/nu/marginalia/actor/proc/UpdateRssActor.java
Normal file
134
code/execution/java/nu/marginalia/actor/proc/UpdateRssActor.java
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
package nu.marginalia.actor.proc;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import nu.marginalia.actor.prototype.RecordActorPrototype;
|
||||||
|
import nu.marginalia.actor.state.ActorResumeBehavior;
|
||||||
|
import nu.marginalia.actor.state.ActorStep;
|
||||||
|
import nu.marginalia.actor.state.Resume;
|
||||||
|
import nu.marginalia.api.feeds.FeedsClient;
|
||||||
|
import nu.marginalia.api.feeds.RpcFeedUpdateMode;
|
||||||
|
import nu.marginalia.mq.MqMessage;
|
||||||
|
import nu.marginalia.mq.MqMessageState;
|
||||||
|
import nu.marginalia.mq.persistence.MqPersistence;
|
||||||
|
import nu.marginalia.service.module.ServiceConfiguration;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
|
||||||
|
public class UpdateRssActor extends RecordActorPrototype {
|
||||||
|
|
||||||
|
private final FeedsClient feedsClient;
|
||||||
|
private final int nodeId;
|
||||||
|
|
||||||
|
private final Duration initialDelay = Duration.ofMinutes(5);
|
||||||
|
private final Duration updateInterval = Duration.ofHours(24);
|
||||||
|
private final int cleanInterval = 60;
|
||||||
|
|
||||||
|
private final MqPersistence persistence;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public UpdateRssActor(Gson gson, FeedsClient feedsClient, ServiceConfiguration serviceConfiguration, MqPersistence persistence) {
|
||||||
|
super(gson);
|
||||||
|
this.feedsClient = feedsClient;
|
||||||
|
this.nodeId = serviceConfiguration.node();
|
||||||
|
this.persistence = persistence;
|
||||||
|
}
|
||||||
|
|
||||||
|
public record Initial() implements ActorStep {}
|
||||||
|
@Resume(behavior = ActorResumeBehavior.RETRY)
|
||||||
|
public record Wait(String ts, int refreshCount) implements ActorStep {}
|
||||||
|
@Resume(behavior = ActorResumeBehavior.RETRY)
|
||||||
|
public record UpdateRefresh(int refreshCount, long msgId) implements ActorStep {
|
||||||
|
public UpdateRefresh(int refreshCount) {
|
||||||
|
this(refreshCount, -1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@Resume(behavior = ActorResumeBehavior.RETRY)
|
||||||
|
public record UpdateClean(long msgId) implements ActorStep {
|
||||||
|
public UpdateClean() {
|
||||||
|
this(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ActorStep transition(ActorStep self) throws Exception {
|
||||||
|
return switch (self) {
|
||||||
|
case Initial() -> {
|
||||||
|
if (nodeId > 1) {
|
||||||
|
// Only run on the first node
|
||||||
|
yield new End();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Wait for 5 minutes before starting the first update, to give the system time to start up properly
|
||||||
|
yield new Wait(LocalDateTime.now().plus(initialDelay).toString(), 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case Wait(String untilTs, int count) -> {
|
||||||
|
var until = LocalDateTime.parse(untilTs);
|
||||||
|
var now = LocalDateTime.now();
|
||||||
|
|
||||||
|
long remaining = Duration.between(now, until).toMillis();
|
||||||
|
|
||||||
|
if (remaining > 0) {
|
||||||
|
Thread.sleep(remaining);
|
||||||
|
yield new Wait(untilTs, count);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
|
||||||
|
// Once every `cleanInterval` updates, do a clean update;
|
||||||
|
// otherwise do a refresh update
|
||||||
|
if (count > cleanInterval) {
|
||||||
|
yield new UpdateClean();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
yield new UpdateRefresh(count);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case UpdateRefresh(int count, long msgId) when msgId < 0 -> {
|
||||||
|
long messageId = feedsClient.updateFeeds(RpcFeedUpdateMode.REFRESH);
|
||||||
|
yield new UpdateRefresh(count, messageId);
|
||||||
|
}
|
||||||
|
case UpdateRefresh(int count, long msgId) -> {
|
||||||
|
MqMessage msg = persistence.waitForMessageTerminalState(msgId, Duration.ofSeconds(10), Duration.ofHours(12));
|
||||||
|
if (msg == null) {
|
||||||
|
// Retry the update
|
||||||
|
yield new Error("Failed to update feeds: message not found");
|
||||||
|
} else if (msg.state() != MqMessageState.OK) {
|
||||||
|
// Retry the update
|
||||||
|
yield new Error("Failed to update feeds: " + msg.state());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Increment the refresh count
|
||||||
|
yield new Wait(LocalDateTime.now().plus(updateInterval).toString(), count + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
case UpdateClean(long msgId) when msgId < 0 -> {
|
||||||
|
long messageId = feedsClient.updateFeeds(RpcFeedUpdateMode.CLEAN);
|
||||||
|
yield new UpdateClean(messageId);
|
||||||
|
}
|
||||||
|
case UpdateClean(long msgId) -> {
|
||||||
|
MqMessage msg = persistence.waitForMessageTerminalState(msgId, Duration.ofSeconds(10), Duration.ofHours(12));
|
||||||
|
if (msg == null) {
|
||||||
|
// Retry the update
|
||||||
|
yield new Error("Failed to update feeds: message not found");
|
||||||
|
} else if (msg.state() != MqMessageState.OK) {
|
||||||
|
// Retry the update
|
||||||
|
yield new Error("Failed to update feeds: " + msg.state());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Reset the refresh count after a successful update
|
||||||
|
yield new Wait(LocalDateTime.now().plus(updateInterval).toString(), 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
default -> new Error("Unknown actor step: " + self);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String describe() {
|
||||||
|
return "Periodically updates RSS and Atom feeds";
|
||||||
|
}
|
||||||
|
}
|
@ -1,24 +0,0 @@
|
|||||||
plugins {
|
|
||||||
id 'java'
|
|
||||||
id 'jvm-test-suite'
|
|
||||||
}
|
|
||||||
|
|
||||||
java {
|
|
||||||
toolchain {
|
|
||||||
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
|
||||||
|
|
||||||
dependencies {
|
|
||||||
implementation libs.bundles.slf4j
|
|
||||||
|
|
||||||
implementation libs.notnull
|
|
||||||
implementation libs.gson
|
|
||||||
|
|
||||||
testImplementation libs.bundles.slf4j.test
|
|
||||||
testImplementation libs.bundles.junit
|
|
||||||
testImplementation libs.mockito
|
|
||||||
|
|
||||||
}
|
|
@ -1,58 +0,0 @@
|
|||||||
package nu.marginalia.feedlot;
|
|
||||||
|
|
||||||
import com.google.gson.Gson;
|
|
||||||
import nu.marginalia.feedlot.model.FeedItems;
|
|
||||||
|
|
||||||
import java.net.URI;
|
|
||||||
import java.net.http.HttpClient;
|
|
||||||
import java.net.http.HttpRequest;
|
|
||||||
import java.net.http.HttpResponse;
|
|
||||||
import java.time.Duration;
|
|
||||||
import java.util.concurrent.Executors;
|
|
||||||
import java.util.concurrent.CompletableFuture;
|
|
||||||
|
|
||||||
public class FeedlotClient {
|
|
||||||
private final String feedlotHost;
|
|
||||||
private final int feedlotPort;
|
|
||||||
private final Gson gson;
|
|
||||||
private final HttpClient httpClient;
|
|
||||||
private final Duration requestTimeout;
|
|
||||||
|
|
||||||
public FeedlotClient(String feedlotHost,
|
|
||||||
int feedlotPort,
|
|
||||||
Gson gson,
|
|
||||||
Duration connectTimeout,
|
|
||||||
Duration requestTimeout
|
|
||||||
)
|
|
||||||
{
|
|
||||||
this.feedlotHost = feedlotHost;
|
|
||||||
this.feedlotPort = feedlotPort;
|
|
||||||
this.gson = gson;
|
|
||||||
|
|
||||||
httpClient = HttpClient.newBuilder()
|
|
||||||
.executor(Executors.newCachedThreadPool())
|
|
||||||
.connectTimeout(connectTimeout)
|
|
||||||
.build();
|
|
||||||
this.requestTimeout = requestTimeout;
|
|
||||||
}
|
|
||||||
|
|
||||||
public CompletableFuture<FeedItems> getFeedItems(String domainName) {
|
|
||||||
return httpClient.sendAsync(
|
|
||||||
HttpRequest.newBuilder()
|
|
||||||
.uri(URI.create("http://%s:%d/feed/%s".formatted(feedlotHost, feedlotPort, domainName)))
|
|
||||||
.GET()
|
|
||||||
.timeout(requestTimeout)
|
|
||||||
.build(),
|
|
||||||
HttpResponse.BodyHandlers.ofString()
|
|
||||||
).thenApply(HttpResponse::body)
|
|
||||||
.thenApply(this::parseFeedItems);
|
|
||||||
}
|
|
||||||
|
|
||||||
private FeedItems parseFeedItems(String s) {
|
|
||||||
return gson.fromJson(s, FeedItems.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void stop() {
|
|
||||||
httpClient.close();
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,17 +0,0 @@
|
|||||||
package nu.marginalia.feedlot.model;
|
|
||||||
|
|
||||||
public record FeedItem(String title, String date, String description, String url) {
|
|
||||||
|
|
||||||
public String pubDay() { // Extract the date from an ISO style date string
|
|
||||||
if (date.length() > 10) {
|
|
||||||
return date.substring(0, 10);
|
|
||||||
}
|
|
||||||
return date;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String descriptionSafe() {
|
|
||||||
return description
|
|
||||||
.replace("<", "<")
|
|
||||||
.replace(">", ">");
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,6 +0,0 @@
|
|||||||
package nu.marginalia.feedlot.model;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public record FeedItems(String domain, String feedUrl, String updated, List<FeedItem> items) {
|
|
||||||
}
|
|
@ -1,20 +0,0 @@
|
|||||||
Client for [FeedlotTheFeedBot](https://github.com/MarginaliaSearch/FeedLotTheFeedBot),
|
|
||||||
the RSS/Atom feed fetcher and cache for Marginalia Search.
|
|
||||||
|
|
||||||
This service is external to the Marginalia Search codebase,
|
|
||||||
as it is not a core part of the search engine and has other
|
|
||||||
utilities.
|
|
||||||
|
|
||||||
## Example
|
|
||||||
|
|
||||||
```java
|
|
||||||
|
|
||||||
import java.time.Duration;
|
|
||||||
|
|
||||||
var client = new FeedlotClient("localhost", 8080,
|
|
||||||
gson,
|
|
||||||
Duration.ofMillis(100), // connect timeout
|
|
||||||
Duration.ofMillis(100)); // request timeout
|
|
||||||
|
|
||||||
CompleteableFuture<FeedItems> items = client.getFeedItems("www.marginalia.nu");
|
|
||||||
```
|
|
@ -20,6 +20,7 @@ dependencies {
|
|||||||
implementation project(':code:common:model')
|
implementation project(':code:common:model')
|
||||||
implementation project(':code:common:config')
|
implementation project(':code:common:config')
|
||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
||||||
|
implementation project(':code:libraries:message-queue')
|
||||||
|
|
||||||
implementation libs.bundles.slf4j
|
implementation libs.bundles.slf4j
|
||||||
|
|
||||||
|
@ -0,0 +1,65 @@
|
|||||||
|
package nu.marginalia.api.feeds;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import nu.marginalia.mq.outbox.MqOutbox;
|
||||||
|
import nu.marginalia.mq.persistence.MqPersistence;
|
||||||
|
import nu.marginalia.service.client.GrpcChannelPoolFactory;
|
||||||
|
import nu.marginalia.service.client.GrpcSingleNodeChannelPool;
|
||||||
|
import nu.marginalia.service.discovery.property.ServiceKey;
|
||||||
|
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||||
|
import nu.marginalia.service.module.ServiceConfiguration;
|
||||||
|
|
||||||
|
import javax.annotation.CheckReturnValue;
|
||||||
|
import java.util.UUID;
|
||||||
|
import java.util.concurrent.CompletableFuture;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class FeedsClient {
|
||||||
|
private final ExecutorService executorService = Executors.newCachedThreadPool();
|
||||||
|
private final GrpcSingleNodeChannelPool<FeedApiGrpc.FeedApiBlockingStub> channelPool;
|
||||||
|
private final MqOutbox updateFeedsOutbox;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public FeedsClient(GrpcChannelPoolFactory factory, MqPersistence mqPersistence, ServiceConfiguration serviceConfiguration) {
|
||||||
|
|
||||||
|
// The client is only interested in the primary node
|
||||||
|
var key = ServiceKey.forGrpcApi(FeedApiGrpc.class, ServicePartition.any());
|
||||||
|
|
||||||
|
this.channelPool = factory.createSingle(key, FeedApiGrpc::newBlockingStub);
|
||||||
|
this.updateFeedsOutbox = new MqOutbox(mqPersistence,
|
||||||
|
"update-rss-feeds", 0,
|
||||||
|
serviceConfiguration.serviceName(), serviceConfiguration.node(),
|
||||||
|
UUID.randomUUID());
|
||||||
|
}
|
||||||
|
|
||||||
|
public CompletableFuture<RpcFeed> getFeed(int domainId) {
|
||||||
|
try {
|
||||||
|
return channelPool.call(FeedApiGrpc.FeedApiBlockingStub::getFeed)
|
||||||
|
.async(executorService)
|
||||||
|
.run(RpcDomainId.newBuilder().setDomainId(domainId).build());
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
return CompletableFuture.failedFuture(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Update the feeds, return a message ID for the update */
|
||||||
|
@CheckReturnValue
|
||||||
|
public long updateFeeds(RpcFeedUpdateMode mode) throws Exception {
|
||||||
|
// Create a message for the {@link MqLongRunningTask} paradigm to use for tracking the task
|
||||||
|
long msgId = updateFeedsOutbox.sendAsync("updateFeeds", "");
|
||||||
|
|
||||||
|
channelPool.call(FeedApiGrpc.FeedApiBlockingStub::updateFeeds)
|
||||||
|
.run(RpcUpdateRequest.newBuilder()
|
||||||
|
.setMode(mode)
|
||||||
|
.setMsgId(msgId)
|
||||||
|
.build()
|
||||||
|
);
|
||||||
|
|
||||||
|
return msgId;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,43 @@
|
|||||||
|
syntax="proto3";
|
||||||
|
package nu.marginalia.api.feeds;
|
||||||
|
|
||||||
|
option java_package="nu.marginalia.api.feeds";
|
||||||
|
option java_multiple_files=true;
|
||||||
|
|
||||||
|
|
||||||
|
service FeedApi {
|
||||||
|
rpc getFeed(RpcDomainId) returns (RpcFeed) {}
|
||||||
|
rpc updateFeeds(RpcUpdateRequest) returns (Empty) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
message RpcDomainId {
|
||||||
|
int32 domainId = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message RpcUpdateRequest {
|
||||||
|
RpcFeedUpdateMode mode = 1;
|
||||||
|
int64 msgId = 2; // Id for a message on the message queue, will be replied to with a dummy response when the task is done,
|
||||||
|
// if the message id is not positive, no response will be attempted to be sent.
|
||||||
|
}
|
||||||
|
|
||||||
|
enum RpcFeedUpdateMode {
|
||||||
|
CLEAN = 0; // Start over with a new database from system rss exports
|
||||||
|
REFRESH = 1; // Refresh known feeds
|
||||||
|
}
|
||||||
|
|
||||||
|
message RpcFeed {
|
||||||
|
int32 domainId = 1;
|
||||||
|
string domain = 2;
|
||||||
|
string feedUrl = 3;
|
||||||
|
string updated = 4;
|
||||||
|
repeated RpcFeedItem items = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
message RpcFeedItem {
|
||||||
|
string title = 1;
|
||||||
|
string date = 2;
|
||||||
|
string description = 3;
|
||||||
|
string url = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Empty {}
|
@ -4,15 +4,12 @@ package nu.marginalia.api.livecapture;
|
|||||||
option java_package="nu.marginalia.api.livecapture";
|
option java_package="nu.marginalia.api.livecapture";
|
||||||
option java_multiple_files=true;
|
option java_multiple_files=true;
|
||||||
|
|
||||||
service LiveCaptureApi {
|
|
||||||
rpc requestScreengrab(RpcDomainId) returns (Empty) {}
|
|
||||||
}
|
|
||||||
|
|
||||||
message Void {
|
|
||||||
}
|
|
||||||
|
|
||||||
message RpcDomainId {
|
message RpcDomainId {
|
||||||
int32 domainId = 1;
|
int32 domainId = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
service LiveCaptureApi {
|
||||||
|
rpc requestScreengrab(RpcDomainId) returns (Empty) {}
|
||||||
|
}
|
||||||
|
|
||||||
message Empty {}
|
message Empty {}
|
@ -20,7 +20,15 @@ dependencies {
|
|||||||
implementation project(':code:common:service')
|
implementation project(':code:common:service')
|
||||||
implementation project(':code:common:model')
|
implementation project(':code:common:model')
|
||||||
implementation project(':code:common:db')
|
implementation project(':code:common:db')
|
||||||
|
implementation project(':code:libraries:blocking-thread-pool')
|
||||||
|
implementation project(':code:libraries:message-queue')
|
||||||
|
|
||||||
|
implementation project(':code:execution:api')
|
||||||
|
|
||||||
|
implementation libs.jsoup
|
||||||
|
implementation libs.rssreader
|
||||||
|
implementation libs.opencsv
|
||||||
|
implementation libs.sqlite
|
||||||
implementation libs.bundles.slf4j
|
implementation libs.bundles.slf4j
|
||||||
implementation libs.commons.lang3
|
implementation libs.commons.lang3
|
||||||
|
|
||||||
|
@ -0,0 +1,144 @@
|
|||||||
|
package nu.marginalia.rss.db;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.google.inject.Singleton;
|
||||||
|
import nu.marginalia.WmsaHome;
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.rss.model.FeedDefinition;
|
||||||
|
import nu.marginalia.rss.model.FeedItems;
|
||||||
|
import nu.marginalia.service.module.ServiceConfiguration;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.StandardCopyOption;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
@Singleton
|
||||||
|
public class FeedDb {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(FeedDb.class);
|
||||||
|
|
||||||
|
private static final String dbFileName = "rss-feeds.db";
|
||||||
|
|
||||||
|
private final Path readerDbPath;
|
||||||
|
private volatile FeedDbReader reader;
|
||||||
|
|
||||||
|
private final boolean feedDbEnabled;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public FeedDb(ServiceConfiguration serviceConfiguration) {
|
||||||
|
feedDbEnabled = serviceConfiguration.node() <= 1;
|
||||||
|
readerDbPath = WmsaHome.getDataPath().resolve(dbFileName);
|
||||||
|
|
||||||
|
if (!feedDbEnabled) {
|
||||||
|
logger.info("Feed database is disabled on this node");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
try {
|
||||||
|
reader = new FeedDbReader(readerDbPath);
|
||||||
|
} catch (Exception e) {
|
||||||
|
reader = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEnabled() {
|
||||||
|
return feedDbEnabled;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<FeedDefinition> getAllFeeds() {
|
||||||
|
if (!feedDbEnabled) {
|
||||||
|
throw new IllegalStateException("Feed database is disabled on this node");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Capture the current reader to avoid concurrency issues
|
||||||
|
FeedDbReader reader = this.reader;
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (reader != null) {
|
||||||
|
return reader.getAllFeeds();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
logger.error("Error getting all feeds", e);
|
||||||
|
}
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
|
||||||
|
@NotNull
|
||||||
|
public FeedItems getFeed(EdgeDomain domain) {
|
||||||
|
if (!feedDbEnabled) {
|
||||||
|
throw new IllegalStateException("Feed database is disabled on this node");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Capture the current reader to avoid concurrency issues
|
||||||
|
FeedDbReader reader = this.reader;
|
||||||
|
try {
|
||||||
|
if (reader != null) {
|
||||||
|
return reader.getFeed(domain);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
logger.error("Error getting feed for " + domain, e);
|
||||||
|
}
|
||||||
|
return FeedItems.none();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Optional<String> getFeedAsJson(String domain) {
|
||||||
|
if (!feedDbEnabled) {
|
||||||
|
throw new IllegalStateException("Feed database is disabled on this node");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Capture the current reader to avoid concurrency issues
|
||||||
|
FeedDbReader reader = this.reader;
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (reader != null) {
|
||||||
|
return reader.getFeedAsJson(domain);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
logger.error("Error getting feed for " + domain, e);
|
||||||
|
}
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
public FeedDbWriter createWriter() {
|
||||||
|
if (!feedDbEnabled) {
|
||||||
|
throw new IllegalStateException("Feed database is disabled on this node");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
Path dbFile = Files.createTempFile(WmsaHome.getDataPath(), "rss-feeds", ".tmp.db");
|
||||||
|
return new FeedDbWriter(dbFile);
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Error creating new database writer", e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void switchDb(FeedDbWriter writer) {
|
||||||
|
if (!feedDbEnabled) {
|
||||||
|
throw new IllegalStateException("Feed database is disabled on this node");
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
logger.info("Switching to new feed database from " + writer.getDbPath() + " to " + readerDbPath);
|
||||||
|
|
||||||
|
writer.close();
|
||||||
|
reader.close();
|
||||||
|
|
||||||
|
Files.move(writer.getDbPath(), readerDbPath, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
|
||||||
|
|
||||||
|
reader = new FeedDbReader(readerDbPath);
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.error("Fatal error switching to new feed database", e);
|
||||||
|
|
||||||
|
reader = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,102 @@
|
|||||||
|
package nu.marginalia.rss.db;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.gson.GsonBuilder;
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.rss.model.FeedDefinition;
|
||||||
|
import nu.marginalia.rss.model.FeedItems;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.DriverManager;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
public class FeedDbReader implements AutoCloseable {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(FeedDbReader.class);
|
||||||
|
private static final Gson gson = new GsonBuilder().create();
|
||||||
|
private final Connection connection;
|
||||||
|
|
||||||
|
public FeedDbReader(Path dbPath) throws SQLException {
|
||||||
|
String dbUrl = "jdbc:sqlite:" + dbPath.toAbsolutePath();
|
||||||
|
|
||||||
|
logger.info("Opening feed db at " + dbUrl);
|
||||||
|
|
||||||
|
connection = DriverManager.getConnection(dbUrl);
|
||||||
|
|
||||||
|
// Create table if it doesn't exist to avoid errors before any feeds have been fetched
|
||||||
|
try (var stmt = connection.createStatement()) {
|
||||||
|
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS feed (domain TEXT PRIMARY KEY, feed JSON)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<FeedDefinition> getAllFeeds() {
|
||||||
|
List<FeedDefinition> ret = new ArrayList<>();
|
||||||
|
|
||||||
|
try (var stmt = connection.createStatement()) {
|
||||||
|
var rs = stmt.executeQuery("""
|
||||||
|
select
|
||||||
|
json_extract(feed, '$.domain') as domain,
|
||||||
|
json_extract(feed, '$.feedUrl') as url,
|
||||||
|
json_extract(feed, '$.updated') as updated
|
||||||
|
from feed
|
||||||
|
""");
|
||||||
|
|
||||||
|
while (rs.next()) {
|
||||||
|
ret.add(new FeedDefinition(
|
||||||
|
rs.getString("domain"),
|
||||||
|
rs.getString("url"),
|
||||||
|
rs.getString("updated")));
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (SQLException e) {
|
||||||
|
logger.error("Error getting all feeds", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Optional<String> getFeedAsJson(String domain) {
|
||||||
|
try (var stmt = connection.prepareStatement("SELECT FEED FROM feed WHERE DOMAIN = ?")) {
|
||||||
|
stmt.setString(1, domain);
|
||||||
|
|
||||||
|
var rs = stmt.executeQuery();
|
||||||
|
if (rs.next()) {
|
||||||
|
return Optional.of(rs.getString(1));
|
||||||
|
}
|
||||||
|
} catch (SQLException e) {
|
||||||
|
logger.error("Error getting feed for " + domain, e);
|
||||||
|
}
|
||||||
|
return Optional.empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
public FeedItems getFeed(EdgeDomain domain) {
|
||||||
|
try (var stmt = connection.prepareStatement("SELECT FEED FROM feed WHERE DOMAIN = ?")) {
|
||||||
|
stmt.setString(1, domain.toString());
|
||||||
|
var rs = stmt.executeQuery();
|
||||||
|
|
||||||
|
if (rs.next()) {
|
||||||
|
return deserialize(rs.getString(1));
|
||||||
|
}
|
||||||
|
} catch (SQLException e) {
|
||||||
|
logger.error("Error getting feed for " + domain, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return FeedItems.none();
|
||||||
|
}
|
||||||
|
|
||||||
|
private FeedItems deserialize(String string) {
|
||||||
|
return gson.fromJson(string, FeedItems.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws SQLException {
|
||||||
|
connection.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,68 @@
|
|||||||
|
package nu.marginalia.rss.db;
|
||||||
|
|
||||||
|
import com.google.gson.Gson;
|
||||||
|
import com.google.gson.GsonBuilder;
|
||||||
|
import nu.marginalia.rss.model.FeedItems;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.DriverManager;
|
||||||
|
import java.sql.PreparedStatement;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
|
||||||
|
public class FeedDbWriter implements AutoCloseable {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(FeedDbWriter.class);
|
||||||
|
|
||||||
|
private static final Gson gson = new GsonBuilder().create();
|
||||||
|
|
||||||
|
private final Connection connection;
|
||||||
|
private final PreparedStatement insertFeedStmt;
|
||||||
|
private final Path dbPath;
|
||||||
|
|
||||||
|
private volatile boolean closed = false;
|
||||||
|
|
||||||
|
public FeedDbWriter(Path dbPath) throws SQLException {
|
||||||
|
String dbUrl = "jdbc:sqlite:" + dbPath.toAbsolutePath();
|
||||||
|
|
||||||
|
this.dbPath = dbPath;
|
||||||
|
|
||||||
|
connection = DriverManager.getConnection(dbUrl);
|
||||||
|
|
||||||
|
try (var stmt = connection.createStatement()) {
|
||||||
|
stmt.executeUpdate("CREATE TABLE IF NOT EXISTS feed (domain TEXT PRIMARY KEY, feed JSON)");
|
||||||
|
}
|
||||||
|
|
||||||
|
insertFeedStmt = connection.prepareStatement("INSERT INTO feed (domain, feed) VALUES (?, ?)");
|
||||||
|
}
|
||||||
|
|
||||||
|
public Path getDbPath() {
|
||||||
|
return dbPath;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public synchronized void saveFeed(FeedItems items) {
|
||||||
|
try {
|
||||||
|
insertFeedStmt.setString(1, items.domain().toLowerCase());
|
||||||
|
insertFeedStmt.setString(2, serialize(items));
|
||||||
|
insertFeedStmt.executeUpdate();
|
||||||
|
}
|
||||||
|
catch (SQLException e) {
|
||||||
|
logger.error("Error saving feed for " + items.domain(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String serialize(FeedItems items) {
|
||||||
|
return gson.toJson(items);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws SQLException {
|
||||||
|
if (!closed) {
|
||||||
|
insertFeedStmt.close();
|
||||||
|
connection.close();
|
||||||
|
closed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,28 @@
|
|||||||
|
package nu.marginalia.rss.model;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
|
||||||
|
public record FeedDefinition(
|
||||||
|
String domain,
|
||||||
|
String feedUrl,
|
||||||
|
@Nullable String updated)
|
||||||
|
{
|
||||||
|
|
||||||
|
private static final Duration defaultDuration = Duration.ofDays(30);
|
||||||
|
|
||||||
|
public Duration durationSinceUpdated() {
|
||||||
|
if (updated == null) { // Default to 30 days if no update time is available
|
||||||
|
return defaultDuration;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
return Duration.between(ZonedDateTime.parse(updated), ZonedDateTime.now());
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
return defaultDuration;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,62 @@
|
|||||||
|
package nu.marginalia.rss.model;
|
||||||
|
|
||||||
|
import com.apptasticsoftware.rssreader.Item;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
public record FeedItem(String title,
|
||||||
|
String date,
|
||||||
|
String description,
|
||||||
|
String url) implements Comparable<FeedItem>
|
||||||
|
{
|
||||||
|
public static final int MAX_DESC_LENGTH = 255;
|
||||||
|
public static final DateTimeFormatter DATE_FORMAT = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSZ");
|
||||||
|
|
||||||
|
public static FeedItem fromItem(Item item) {
|
||||||
|
String title = item.getTitle().orElse("");
|
||||||
|
String date = getItemDate(item);
|
||||||
|
String description = getItemDescription(item);
|
||||||
|
String url = item.getLink().orElse("");
|
||||||
|
|
||||||
|
return new FeedItem(title, date, description, url);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String getItemDescription(Item item) {
|
||||||
|
Optional<String> description = item.getDescription();
|
||||||
|
if (description.isEmpty())
|
||||||
|
return "";
|
||||||
|
|
||||||
|
String rawDescription = description.get();
|
||||||
|
if (rawDescription.indexOf('<') >= 0) {
|
||||||
|
rawDescription = Jsoup.parseBodyFragment(rawDescription).text();
|
||||||
|
}
|
||||||
|
|
||||||
|
return StringUtils.truncate(rawDescription, MAX_DESC_LENGTH);
|
||||||
|
}
|
||||||
|
|
||||||
|
// e.g. http://fabiensanglard.net/rss.xml does dates like this: 1 Apr 2021 00:00:00 +0000
|
||||||
|
private static final DateTimeFormatter extraFormatter = DateTimeFormatter.ofPattern("d MMM yyyy HH:mm:ss Z");
|
||||||
|
private static String getItemDate(Item item) {
|
||||||
|
Optional<ZonedDateTime> zonedDateTime = Optional.empty();
|
||||||
|
try {
|
||||||
|
zonedDateTime = item.getPubDateZonedDateTime();
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
zonedDateTime = item.getPubDate()
|
||||||
|
.map(extraFormatter::parse)
|
||||||
|
.map(ZonedDateTime::from);
|
||||||
|
}
|
||||||
|
|
||||||
|
return zonedDateTime.map(date -> date.format(DATE_FORMAT)).orElse("");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(@NotNull FeedItem o) {
|
||||||
|
return o.date.compareTo(date);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,33 @@
|
|||||||
|
package nu.marginalia.rss.model;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
public record FeedItems(String domain,
|
||||||
|
String feedUrl,
|
||||||
|
String updated,
|
||||||
|
List<FeedItem> items) {
|
||||||
|
|
||||||
|
// List.of() is immutable, so we can use the same instance for all empty FeedItems
|
||||||
|
private static final FeedItems EMPTY = new FeedItems("", "","1970-01-01T00:00:00.000+00000", List.of());
|
||||||
|
public static FeedItems none() {
|
||||||
|
return EMPTY;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isEmpty() {
|
||||||
|
return items.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Optional<FeedItem> getLatest() {
|
||||||
|
if (items.isEmpty())
|
||||||
|
return Optional.empty();
|
||||||
|
|
||||||
|
return Optional.of(
|
||||||
|
items.getFirst()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Optional<String> getLatestDate() {
|
||||||
|
return getLatest().map(FeedItem::date);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,269 @@
|
|||||||
|
package nu.marginalia.rss.svc;
|
||||||
|
|
||||||
|
import com.apptasticsoftware.rssreader.RssReader;
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import com.opencsv.CSVReader;
|
||||||
|
import nu.marginalia.WmsaHome;
|
||||||
|
import nu.marginalia.executor.client.ExecutorClient;
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.nodecfg.NodeConfigurationService;
|
||||||
|
import nu.marginalia.rss.db.FeedDb;
|
||||||
|
import nu.marginalia.rss.model.FeedDefinition;
|
||||||
|
import nu.marginalia.rss.model.FeedItem;
|
||||||
|
import nu.marginalia.rss.model.FeedItems;
|
||||||
|
import nu.marginalia.service.control.ServiceHeartbeat;
|
||||||
|
import nu.marginalia.storage.FileStorageService;
|
||||||
|
import nu.marginalia.storage.model.FileStorage;
|
||||||
|
import nu.marginalia.storage.model.FileStorageType;
|
||||||
|
import nu.marginalia.util.SimpleBlockingThreadPool;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.http.HttpClient;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
import java.util.*;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.ThreadLocalRandom;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import java.util.function.BiFunction;
|
||||||
|
import java.util.function.Predicate;
|
||||||
|
import java.util.zip.GZIPInputStream;
|
||||||
|
|
||||||
|
public class FeedFetcherService {
|
||||||
|
private static final int MAX_FEED_ITEMS = 10;
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(FeedFetcherService.class);
|
||||||
|
|
||||||
|
private final RssReader rssReader = new RssReader(
|
||||||
|
HttpClient.newBuilder()
|
||||||
|
.connectTimeout(Duration.ofSeconds(5))
|
||||||
|
.executor(Executors.newCachedThreadPool())
|
||||||
|
.followRedirects(HttpClient.Redirect.NORMAL)
|
||||||
|
.version(HttpClient.Version.HTTP_2)
|
||||||
|
.build()
|
||||||
|
);
|
||||||
|
|
||||||
|
private final FeedDb feedDb;
|
||||||
|
private final FileStorageService fileStorageService;
|
||||||
|
private final NodeConfigurationService nodeConfigurationService;
|
||||||
|
private final ServiceHeartbeat serviceHeartbeat;
|
||||||
|
private final ExecutorClient executorClient;
|
||||||
|
|
||||||
|
private volatile boolean updating;
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public FeedFetcherService(FeedDb feedDb,
|
||||||
|
FileStorageService fileStorageService,
|
||||||
|
NodeConfigurationService nodeConfigurationService,
|
||||||
|
ServiceHeartbeat serviceHeartbeat,
|
||||||
|
ExecutorClient executorClient)
|
||||||
|
{
|
||||||
|
this.feedDb = feedDb;
|
||||||
|
this.fileStorageService = fileStorageService;
|
||||||
|
this.nodeConfigurationService = nodeConfigurationService;
|
||||||
|
this.serviceHeartbeat = serviceHeartbeat;
|
||||||
|
this.executorClient = executorClient;
|
||||||
|
|
||||||
|
rssReader.addHeader("User-Agent", WmsaHome.getUserAgent().uaIdentifier() + " RSS Feed Fetcher");
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum UpdateMode {
|
||||||
|
CLEAN,
|
||||||
|
REFRESH
|
||||||
|
};
|
||||||
|
|
||||||
|
public void updateFeeds(UpdateMode updateMode) throws IOException {
|
||||||
|
if (updating) // Prevent concurrent updates
|
||||||
|
{
|
||||||
|
throw new IllegalStateException("Already updating feeds, refusing to start another update");
|
||||||
|
}
|
||||||
|
|
||||||
|
try (var writer = feedDb.createWriter();
|
||||||
|
var heartbeat = serviceHeartbeat.createServiceAdHocTaskHeartbeat("Update Rss Feeds")
|
||||||
|
) {
|
||||||
|
updating = true;
|
||||||
|
|
||||||
|
// Read the feed definitions from the database, if they are not available, read them from the system's
|
||||||
|
// RSS exports instead
|
||||||
|
|
||||||
|
Collection<FeedDefinition> definitions = feedDb.getAllFeeds();
|
||||||
|
|
||||||
|
// If we didn't get any definitions, or a clean update is requested, read the definitions from the system
|
||||||
|
// instead
|
||||||
|
if (definitions == null || updateMode == UpdateMode.CLEAN) {
|
||||||
|
definitions = readDefinitionsFromSystem();
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info("Found {} feed definitions", definitions.size());
|
||||||
|
|
||||||
|
final AtomicInteger definitionsUpdated = new AtomicInteger(0);
|
||||||
|
final int totalDefinitions = definitions.size();
|
||||||
|
|
||||||
|
SimpleBlockingThreadPool executor = new SimpleBlockingThreadPool("FeedFetcher", 64, 4);
|
||||||
|
|
||||||
|
for (var feed : definitions) {
|
||||||
|
executor.submitQuietly(() -> {
|
||||||
|
var oldData = feedDb.getFeed(new EdgeDomain(feed.domain()));
|
||||||
|
|
||||||
|
// If we have existing data, we might skip updating it with a probability that increases with time,
|
||||||
|
// this is to avoid hammering the feeds that are updated very rarely and save some time and resources
|
||||||
|
// on our end
|
||||||
|
|
||||||
|
if (!oldData.isEmpty()) {
|
||||||
|
Duration duration = feed.durationSinceUpdated();
|
||||||
|
long daysSinceUpdate = duration.toDays();
|
||||||
|
|
||||||
|
|
||||||
|
if (daysSinceUpdate > 2 && ThreadLocalRandom.current()
|
||||||
|
.nextInt(1, 1 + (int) Math.min(10, daysSinceUpdate) / 2) > 1)
|
||||||
|
{
|
||||||
|
// Skip updating this feed, just write the old data back instead
|
||||||
|
writer.saveFeed(oldData);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
var items = fetchFeed(feed);
|
||||||
|
if (!items.isEmpty()) {
|
||||||
|
writer.saveFeed(items);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((definitionsUpdated.incrementAndGet() % 1_000) == 0) {
|
||||||
|
// Update the progress every 1k feeds, to avoid hammering the database and flooding the logs
|
||||||
|
heartbeat.progress("Updated " + definitionsUpdated + "/" + totalDefinitions + " feeds", definitionsUpdated.get(), totalDefinitions);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
executor.shutDown();
|
||||||
|
// Wait for the executor to finish, but give up after 60 minutes to avoid hanging indefinitely
|
||||||
|
for (int waitedMinutes = 0; waitedMinutes < 60; waitedMinutes++) {
|
||||||
|
if (executor.awaitTermination(1, TimeUnit.MINUTES)) break;
|
||||||
|
}
|
||||||
|
executor.shutDownNow();
|
||||||
|
|
||||||
|
// Wait for any in-progress writes to finish before switching the database
|
||||||
|
// in case we ended up murdering the writer with shutDownNow. It's a very
|
||||||
|
// slim chance but this increases the odds of a clean switch over.
|
||||||
|
|
||||||
|
TimeUnit.SECONDS.sleep(5);
|
||||||
|
|
||||||
|
feedDb.switchDb(writer);
|
||||||
|
|
||||||
|
} catch (SQLException|InterruptedException e) {
|
||||||
|
logger.error("Error updating feeds", e);
|
||||||
|
}
|
||||||
|
finally {
|
||||||
|
updating = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Collection<FeedDefinition> readDefinitionsFromSystem() throws IOException {
|
||||||
|
Collection<FileStorage> storages = getLatestFeedStorages();
|
||||||
|
List<FeedDefinition> feedDefinitionList = new ArrayList<>();
|
||||||
|
|
||||||
|
for (var storage : storages) {
|
||||||
|
var url = executorClient.remoteFileURL(storage, "feeds.csv.gz");
|
||||||
|
|
||||||
|
try (var feedStream = new GZIPInputStream(url.openStream())) {
|
||||||
|
CSVReader reader = new CSVReader(new java.io.InputStreamReader(feedStream));
|
||||||
|
|
||||||
|
for (String[] row : reader) {
|
||||||
|
if (row.length < 3) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
var domain = row[0].trim();
|
||||||
|
var feedUrl = row[2].trim();
|
||||||
|
|
||||||
|
feedDefinitionList.add(new FeedDefinition(domain, feedUrl, null));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return feedDefinitionList;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Collection<FileStorage> getLatestFeedStorages() {
|
||||||
|
// Find the newest feed storage for each node
|
||||||
|
|
||||||
|
Map<Integer, FileStorage> newestStorageByNode = new HashMap<>();
|
||||||
|
|
||||||
|
for (var node : nodeConfigurationService.getAll()) {
|
||||||
|
int nodeId = node.node();
|
||||||
|
|
||||||
|
for (var storage: fileStorageService.getEachFileStorage(nodeId, FileStorageType.EXPORT)) {
|
||||||
|
if (!storage.description().startsWith("Feeds "))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
newestStorageByNode.compute(storage.node(), new KeepNewerFeedStorage(storage));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
return newestStorageByNode.values();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static class KeepNewerFeedStorage implements BiFunction<Integer, FileStorage, FileStorage> {
|
||||||
|
private final FileStorage newValue;
|
||||||
|
|
||||||
|
private KeepNewerFeedStorage(FileStorage newValue) {
|
||||||
|
this.newValue = newValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FileStorage apply(Integer node, @Nullable FileStorage oldValue) {
|
||||||
|
if (oldValue == null) return newValue;
|
||||||
|
|
||||||
|
return newValue.createDateTime().isAfter(oldValue.createDateTime())
|
||||||
|
? newValue
|
||||||
|
: oldValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public FeedItems fetchFeed(FeedDefinition definition) {
|
||||||
|
try {
|
||||||
|
var items = rssReader.read(definition.feedUrl())
|
||||||
|
.map(FeedItem::fromItem)
|
||||||
|
.filter(new IsFeedItemDateValid())
|
||||||
|
.sorted()
|
||||||
|
.limit(MAX_FEED_ITEMS)
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
return new FeedItems(
|
||||||
|
definition.domain(),
|
||||||
|
definition.feedUrl(),
|
||||||
|
LocalDateTime.now().format(DateTimeFormatter.ISO_LOCAL_DATE_TIME),
|
||||||
|
items);
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.debug("Exception", e);
|
||||||
|
return FeedItems.none();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class IsFeedItemDateValid implements Predicate<FeedItem> {
|
||||||
|
private final String today = ZonedDateTime.now().format(DateTimeFormatter.ISO_ZONED_DATE_TIME);
|
||||||
|
|
||||||
|
public boolean test(FeedItem item) {
|
||||||
|
var date = item.date();
|
||||||
|
|
||||||
|
if (date.isBlank()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (date.compareTo(today) > 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,102 @@
|
|||||||
|
package nu.marginalia.rss.svc;
|
||||||
|
|
||||||
|
import com.google.inject.Inject;
|
||||||
|
import io.grpc.stub.StreamObserver;
|
||||||
|
import nu.marginalia.api.feeds.*;
|
||||||
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
|
import nu.marginalia.model.EdgeDomain;
|
||||||
|
import nu.marginalia.mq.persistence.MqPersistence;
|
||||||
|
import nu.marginalia.mq.task.MqLongRunningTask;
|
||||||
|
import nu.marginalia.mq.task.MqTaskResult;
|
||||||
|
import nu.marginalia.rss.db.FeedDb;
|
||||||
|
import nu.marginalia.rss.model.FeedItems;
|
||||||
|
import nu.marginalia.service.server.DiscoverableService;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
public class FeedsGrpcService extends FeedApiGrpc.FeedApiImplBase implements DiscoverableService {
|
||||||
|
private final FeedDb feedDb;
|
||||||
|
private final DbDomainQueries domainQueries;
|
||||||
|
private final MqPersistence mqPersistence;
|
||||||
|
private final FeedFetcherService feedFetcherService;
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(FeedsGrpcService.class);
|
||||||
|
|
||||||
|
@Inject
|
||||||
|
public FeedsGrpcService(FeedDb feedDb,
|
||||||
|
DbDomainQueries domainQueries,
|
||||||
|
MqPersistence mqPersistence,
|
||||||
|
FeedFetcherService feedFetcherService) {
|
||||||
|
this.feedDb = feedDb;
|
||||||
|
this.domainQueries = domainQueries;
|
||||||
|
this.mqPersistence = mqPersistence;
|
||||||
|
this.feedFetcherService = feedFetcherService;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that the service is only registered if it is enabled
|
||||||
|
@Override
|
||||||
|
public boolean shouldRegisterService() {
|
||||||
|
return feedDb.isEnabled();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void updateFeeds(RpcUpdateRequest request,
|
||||||
|
StreamObserver<Empty> responseObserver)
|
||||||
|
{
|
||||||
|
FeedFetcherService.UpdateMode updateMode = switch(request.getMode()) {
|
||||||
|
case CLEAN -> FeedFetcherService.UpdateMode.CLEAN;
|
||||||
|
case REFRESH -> FeedFetcherService.UpdateMode.REFRESH;
|
||||||
|
default -> throw new IllegalStateException("Unexpected value: " + request.getMode());
|
||||||
|
};
|
||||||
|
|
||||||
|
// Start a long-running task to update the feeds
|
||||||
|
MqLongRunningTask
|
||||||
|
.of(request.getMsgId(), "updateFeeds", mqPersistence)
|
||||||
|
.asThread(() -> {
|
||||||
|
feedFetcherService.updateFeeds(updateMode);
|
||||||
|
return new MqTaskResult.Success();
|
||||||
|
})
|
||||||
|
.start();
|
||||||
|
|
||||||
|
responseObserver.onNext(Empty.getDefaultInstance());
|
||||||
|
responseObserver.onCompleted();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void getFeed(RpcDomainId request,
|
||||||
|
StreamObserver<RpcFeed> responseObserver)
|
||||||
|
{
|
||||||
|
if (!feedDb.isEnabled()) {
|
||||||
|
responseObserver.onError(new IllegalStateException("Feed database is disabled on this node"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Optional<EdgeDomain> domainName = domainQueries.getDomain(request.getDomainId());
|
||||||
|
if (domainName.isEmpty()) {
|
||||||
|
responseObserver.onError(new IllegalArgumentException("Domain not found"));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
FeedItems feedItems = feedDb.getFeed(domainName.get());
|
||||||
|
|
||||||
|
RpcFeed.Builder retB = RpcFeed.newBuilder()
|
||||||
|
.setDomainId(request.getDomainId())
|
||||||
|
.setDomain(domainName.get().toString())
|
||||||
|
.setFeedUrl(feedItems.feedUrl())
|
||||||
|
.setUpdated(feedItems.updated());
|
||||||
|
|
||||||
|
for (var item : feedItems.items()) {
|
||||||
|
retB.addItemsBuilder()
|
||||||
|
.setTitle(item.title())
|
||||||
|
.setUrl(item.url())
|
||||||
|
.setDescription(item.description())
|
||||||
|
.setDate(item.date())
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
responseObserver.onNext(retB.build());
|
||||||
|
responseObserver.onCompleted();
|
||||||
|
}
|
||||||
|
}
|
@ -93,14 +93,14 @@ public class MqOutbox {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/** Send a message and wait for a response. */
|
/** Send a message and wait for a response. */
|
||||||
public MqMessage send(String function, String payload) throws Exception {
|
public MqMessage sendBlocking(String function, String payload) throws Exception {
|
||||||
final long id = sendAsync(function, payload);
|
final long id = sendAsync(function, payload);
|
||||||
|
|
||||||
return waitResponse(id);
|
return waitResponse(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Send a message and wait for a response */
|
/** Send a message and wait for a response */
|
||||||
public MqMessage send(Object object) throws Exception {
|
public MqMessage sendBlocking(Object object) throws Exception {
|
||||||
final long id = sendAsync(object);
|
final long id = sendAsync(object);
|
||||||
|
|
||||||
return waitResponse(id);
|
return waitResponse(id);
|
||||||
|
@ -5,8 +5,10 @@ import com.google.gson.Gson;
|
|||||||
import com.google.inject.Inject;
|
import com.google.inject.Inject;
|
||||||
import com.google.inject.Singleton;
|
import com.google.inject.Singleton;
|
||||||
import com.zaxxer.hikari.HikariDataSource;
|
import com.zaxxer.hikari.HikariDataSource;
|
||||||
import nu.marginalia.mq.MqMessageState;
|
|
||||||
import nu.marginalia.mq.MqMessage;
|
import nu.marginalia.mq.MqMessage;
|
||||||
|
import nu.marginalia.mq.MqMessageState;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import javax.annotation.Nullable;
|
import javax.annotation.Nullable;
|
||||||
import java.sql.SQLException;
|
import java.sql.SQLException;
|
||||||
@ -24,6 +26,8 @@ public class MqPersistence {
|
|||||||
private final HikariDataSource dataSource;
|
private final HikariDataSource dataSource;
|
||||||
private final Gson gson;
|
private final Gson gson;
|
||||||
|
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(MqPersistence.class);
|
||||||
|
|
||||||
public MqPersistence(HikariDataSource dataSource) {
|
public MqPersistence(HikariDataSource dataSource) {
|
||||||
this.dataSource = dataSource;
|
this.dataSource = dataSource;
|
||||||
this.gson = null;
|
this.gson = null;
|
||||||
@ -145,6 +149,35 @@ public class MqPersistence {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Blocks until a message reaches a terminal state or the timeout passes.
|
||||||
|
* <p>
|
||||||
|
* @param msgId The id of the message to wait for
|
||||||
|
* @param pollInterval The interval to poll the database for updates
|
||||||
|
* @param timeout The maximum time to wait for the message to reach a terminal state
|
||||||
|
* @return The message if it reaches a terminal state, or null if the timeout passes
|
||||||
|
*/
|
||||||
|
@Nullable
|
||||||
|
public MqMessage waitForMessageTerminalState(long msgId, Duration pollInterval, Duration timeout) throws InterruptedException, SQLException {
|
||||||
|
long deadline = System.currentTimeMillis() + timeout.toMillis();
|
||||||
|
|
||||||
|
do {
|
||||||
|
var message = getMessage(msgId);
|
||||||
|
if (message.state().isTerminal()) {
|
||||||
|
return message;
|
||||||
|
}
|
||||||
|
|
||||||
|
long timeLeft = deadline - System.currentTimeMillis();
|
||||||
|
if (timeLeft <= 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
long sleepTime = Math.min(pollInterval.toMillis(), timeLeft);
|
||||||
|
|
||||||
|
Thread.sleep(sleepTime);
|
||||||
|
} while (System.currentTimeMillis() < deadline);
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/** Creates a new message in the queue referencing as a reply to an existing message
|
/** Creates a new message in the queue referencing as a reply to an existing message
|
||||||
* This message will have it's RELATED_ID set to the original message's ID.
|
* This message will have it's RELATED_ID set to the original message's ID.
|
||||||
*/
|
*/
|
||||||
@ -197,7 +230,6 @@ public class MqPersistence {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Marks unclaimed messages addressed to this inbox with instanceUUID and tick,
|
/** Marks unclaimed messages addressed to this inbox with instanceUUID and tick,
|
||||||
* then returns the number of messages marked. This is an atomic operation that
|
* then returns the number of messages marked. This is an atomic operation that
|
||||||
* ensures that messages aren't double processed.
|
* ensures that messages aren't double processed.
|
||||||
|
@ -0,0 +1,138 @@
|
|||||||
|
package nu.marginalia.mq.task;
|
||||||
|
|
||||||
|
import nu.marginalia.mq.MqMessageState;
|
||||||
|
import nu.marginalia.mq.persistence.MqPersistence;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
|
|
||||||
|
/** A long-running task that can be executed asynchronously
|
||||||
|
* and report back to the message queue.
|
||||||
|
* <p></p>
|
||||||
|
* The idiomatic pattern is to create an outbox and send a message corresponding to the task,
|
||||||
|
* and then pass the message id along with the request to trigger the task over gRPC.
|
||||||
|
* <p></p>
|
||||||
|
* The gRPC service will spin off a thread and return immediately, while the task is executed
|
||||||
|
* in the background. The task can then report back to the message queue with the result
|
||||||
|
* of the task as it completes, by updating the message's state.
|
||||||
|
* */
|
||||||
|
public abstract class MqLongRunningTask implements Runnable {
|
||||||
|
private static final Logger logger = LoggerFactory.getLogger(MqLongRunningTask.class);
|
||||||
|
|
||||||
|
/** Create a new task with the given message id, name, and persistence. If the msgId is
|
||||||
|
* not positive, a dummy implementation is provided that does not report to the message queue.
|
||||||
|
*/
|
||||||
|
public static MqLongRunningTask of(long msgId, String name, MqPersistence persistence) {
|
||||||
|
if (msgId <= 0) {
|
||||||
|
return new MqLongRunningTaskDummyImpl(name);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return new MqLongRunningTaskImpl(persistence, msgId, name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Creates a thread that will execute the task. The thread is not started automatically */
|
||||||
|
public Thread asThread(MqTaskFunction r) {
|
||||||
|
return new Thread(() -> runNow(r), name());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Creates a future that will execute the task on the provided ExecutorService. */
|
||||||
|
public Future<Boolean> asFuture(ExecutorService executor, MqTaskFunction r) {
|
||||||
|
return executor.submit(() -> runNow(r));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Execute the task synchronously and return true if the task was successful */
|
||||||
|
public boolean runNow(MqTaskFunction r) {
|
||||||
|
try {
|
||||||
|
switch (r.run()) {
|
||||||
|
case MqTaskResult.Success success -> {
|
||||||
|
finish();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
case MqTaskResult.Failure failure -> fail();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
logger.error("Task failed", e);
|
||||||
|
fail();
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
abstract void finish();
|
||||||
|
|
||||||
|
abstract void fail();
|
||||||
|
|
||||||
|
public abstract String name();
|
||||||
|
}
|
||||||
|
|
||||||
|
class MqLongRunningTaskDummyImpl extends MqLongRunningTask {
|
||||||
|
private final String name;
|
||||||
|
|
||||||
|
MqLongRunningTaskDummyImpl(String name) {
|
||||||
|
this.name = name;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finish() {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void fail() {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String name() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class MqLongRunningTaskImpl extends MqLongRunningTask {
|
||||||
|
private final MqPersistence persistence;
|
||||||
|
private final long msgId;
|
||||||
|
private final String name;
|
||||||
|
|
||||||
|
MqLongRunningTaskImpl(MqPersistence persistence, long msgId, String name) {
|
||||||
|
this.persistence = persistence;
|
||||||
|
this.msgId = msgId;
|
||||||
|
this.name = name;
|
||||||
|
|
||||||
|
try {
|
||||||
|
persistence.updateMessageState(msgId, MqMessageState.ACK);
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finish() {
|
||||||
|
try {
|
||||||
|
persistence.updateMessageState(msgId, MqMessageState.OK);
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void fail() {
|
||||||
|
try {
|
||||||
|
persistence.updateMessageState(msgId, MqMessageState.ERR);
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String name() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,5 @@
|
|||||||
|
package nu.marginalia.mq.task;
|
||||||
|
|
||||||
|
public interface MqTaskFunction {
|
||||||
|
MqTaskResult run() throws Exception;
|
||||||
|
}
|
@ -0,0 +1,14 @@
|
|||||||
|
package nu.marginalia.mq.task;
|
||||||
|
|
||||||
|
public sealed interface MqTaskResult {
|
||||||
|
record Success(String message) implements MqTaskResult {
|
||||||
|
public Success(){
|
||||||
|
this("Ok");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
record Failure(String message) implements MqTaskResult {
|
||||||
|
public Failure(Throwable e){
|
||||||
|
this(e.getClass().getSimpleName() + " : " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -110,9 +110,9 @@ public class MqOutboxTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSend() throws Exception {
|
public void testSendBlocking() throws Exception {
|
||||||
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
|
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
|
||||||
Executors.newSingleThreadExecutor().submit(() -> outbox.send("test", "Hello World"));
|
Executors.newSingleThreadExecutor().submit(() -> outbox.sendBlocking("test", "Hello World"));
|
||||||
|
|
||||||
TimeUnit.MILLISECONDS.sleep(100);
|
TimeUnit.MILLISECONDS.sleep(100);
|
||||||
|
|
||||||
@ -125,14 +125,14 @@ public class MqOutboxTest {
|
|||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSendAndRespondAsyncInbox() throws Exception {
|
public void testSendBlockingAndRespondAsyncInbox() throws Exception {
|
||||||
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
|
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
|
||||||
|
|
||||||
var inbox = new MqAsynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
|
var inbox = new MqAsynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
|
||||||
inbox.subscribe(justRespond("Alright then"));
|
inbox.subscribe(justRespond("Alright then"));
|
||||||
inbox.start();
|
inbox.start();
|
||||||
|
|
||||||
var rsp = outbox.send("test", "Hello World");
|
var rsp = outbox.sendBlocking("test", "Hello World");
|
||||||
|
|
||||||
assertEquals(MqMessageState.OK, rsp.state());
|
assertEquals(MqMessageState.OK, rsp.state());
|
||||||
assertEquals("Alright then", rsp.payload());
|
assertEquals("Alright then", rsp.payload());
|
||||||
@ -146,14 +146,14 @@ public class MqOutboxTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSendAndRespondSyncInbox() throws Exception {
|
public void testSendBlockingAndRespondSyncInbox() throws Exception {
|
||||||
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
|
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
|
||||||
|
|
||||||
var inbox = new MqSynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
|
var inbox = new MqSynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
|
||||||
inbox.subscribe(justRespond("Alright then"));
|
inbox.subscribe(justRespond("Alright then"));
|
||||||
inbox.start();
|
inbox.start();
|
||||||
|
|
||||||
var rsp = outbox.send("test", "Hello World");
|
var rsp = outbox.sendBlocking("test", "Hello World");
|
||||||
|
|
||||||
assertEquals(MqMessageState.OK, rsp.state());
|
assertEquals(MqMessageState.OK, rsp.state());
|
||||||
assertEquals("Alright then", rsp.payload());
|
assertEquals("Alright then", rsp.payload());
|
||||||
@ -167,17 +167,17 @@ public class MqOutboxTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSendMultipleAsyncInbox() throws Exception {
|
public void testSendBlockingMultipleAsyncInbox() throws Exception {
|
||||||
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
|
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
|
||||||
|
|
||||||
var inbox = new MqAsynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
|
var inbox = new MqAsynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
|
||||||
inbox.subscribe(echo());
|
inbox.subscribe(echo());
|
||||||
inbox.start();
|
inbox.start();
|
||||||
|
|
||||||
var rsp1 = outbox.send("test", "one");
|
var rsp1 = outbox.sendBlocking("test", "one");
|
||||||
var rsp2 = outbox.send("test", "two");
|
var rsp2 = outbox.sendBlocking("test", "two");
|
||||||
var rsp3 = outbox.send("test", "three");
|
var rsp3 = outbox.sendBlocking("test", "three");
|
||||||
var rsp4 = outbox.send("test", "four");
|
var rsp4 = outbox.sendBlocking("test", "four");
|
||||||
|
|
||||||
Thread.sleep(500);
|
Thread.sleep(500);
|
||||||
|
|
||||||
@ -201,17 +201,17 @@ public class MqOutboxTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSendMultipleSyncInbox() throws Exception {
|
public void testSendBlockingMultipleSyncInbox() throws Exception {
|
||||||
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
|
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
|
||||||
|
|
||||||
var inbox = new MqSynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
|
var inbox = new MqSynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
|
||||||
inbox.subscribe(echo());
|
inbox.subscribe(echo());
|
||||||
inbox.start();
|
inbox.start();
|
||||||
|
|
||||||
var rsp1 = outbox.send("test", "one");
|
var rsp1 = outbox.sendBlocking("test", "one");
|
||||||
var rsp2 = outbox.send("test", "two");
|
var rsp2 = outbox.sendBlocking("test", "two");
|
||||||
var rsp3 = outbox.send("test", "three");
|
var rsp3 = outbox.sendBlocking("test", "three");
|
||||||
var rsp4 = outbox.send("test", "four");
|
var rsp4 = outbox.sendBlocking("test", "four");
|
||||||
|
|
||||||
Thread.sleep(500);
|
Thread.sleep(500);
|
||||||
|
|
||||||
@ -235,13 +235,13 @@ public class MqOutboxTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSendAndRespondWithErrorHandlerAsyncInbox() throws Exception {
|
public void testSendBlockingAndRespondWithErrorHandlerAsyncInbox() throws Exception {
|
||||||
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
|
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
|
||||||
var inbox = new MqAsynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
|
var inbox = new MqAsynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
|
||||||
|
|
||||||
inbox.start();
|
inbox.start();
|
||||||
|
|
||||||
var rsp = outbox.send("test", "Hello World");
|
var rsp = outbox.sendBlocking("test", "Hello World");
|
||||||
|
|
||||||
assertEquals(MqMessageState.ERR, rsp.state());
|
assertEquals(MqMessageState.ERR, rsp.state());
|
||||||
|
|
||||||
@ -254,13 +254,13 @@ public class MqOutboxTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSendAndRespondWithErrorHandlerSyncInbox() throws Exception {
|
public void testSendBlockingAndRespondWithErrorHandlerSyncInbox() throws Exception {
|
||||||
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
|
var outbox = new MqOutbox(new MqPersistence(dataSource), inboxId, 0, inboxId+"/reply", 0, UUID.randomUUID());
|
||||||
var inbox = new MqSynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
|
var inbox = new MqSynchronousInbox(new MqPersistence(dataSource), inboxId+":0", UUID.randomUUID());
|
||||||
|
|
||||||
inbox.start();
|
inbox.start();
|
||||||
|
|
||||||
var rsp = outbox.send("test", "Hello World");
|
var rsp = outbox.sendBlocking("test", "Hello World");
|
||||||
|
|
||||||
assertEquals(MqMessageState.ERR, rsp.state());
|
assertEquals(MqMessageState.ERR, rsp.state());
|
||||||
|
|
||||||
|
@ -53,7 +53,6 @@ dependencies {
|
|||||||
|
|
||||||
implementation project(':code:features-search:screenshots')
|
implementation project(':code:features-search:screenshots')
|
||||||
implementation project(':code:features-search:random-websites')
|
implementation project(':code:features-search:random-websites')
|
||||||
implementation project(':code:features-search:feedlot-client')
|
|
||||||
|
|
||||||
implementation libs.bundles.slf4j
|
implementation libs.bundles.slf4j
|
||||||
|
|
||||||
|
@ -1,15 +1,10 @@
|
|||||||
package nu.marginalia.search;
|
package nu.marginalia.search;
|
||||||
|
|
||||||
import com.google.inject.AbstractModule;
|
import com.google.inject.AbstractModule;
|
||||||
import com.google.inject.Provides;
|
|
||||||
import nu.marginalia.LanguageModels;
|
import nu.marginalia.LanguageModels;
|
||||||
import nu.marginalia.WebsiteUrl;
|
import nu.marginalia.WebsiteUrl;
|
||||||
import nu.marginalia.WmsaHome;
|
import nu.marginalia.WmsaHome;
|
||||||
import nu.marginalia.model.gson.GsonFactory;
|
|
||||||
import nu.marginalia.renderer.config.HandlebarsConfigurator;
|
import nu.marginalia.renderer.config.HandlebarsConfigurator;
|
||||||
import nu.marginalia.feedlot.FeedlotClient;
|
|
||||||
|
|
||||||
import java.time.Duration;
|
|
||||||
|
|
||||||
public class SearchModule extends AbstractModule {
|
public class SearchModule extends AbstractModule {
|
||||||
|
|
||||||
@ -22,14 +17,4 @@ public class SearchModule extends AbstractModule {
|
|||||||
System.getProperty("search.websiteUrl", "https://search.marginalia.nu/")));
|
System.getProperty("search.websiteUrl", "https://search.marginalia.nu/")));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Provides
|
|
||||||
public FeedlotClient provideFeedlotClient() {
|
|
||||||
return new FeedlotClient(
|
|
||||||
System.getProperty("ext-svc-feedlot-bindAddress", "feedlot"),
|
|
||||||
Integer.getInteger("ext-svc-feedlot-port", 80),
|
|
||||||
GsonFactory.get(),
|
|
||||||
Duration.ofMillis(250),
|
|
||||||
Duration.ofMillis(100)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -4,10 +4,11 @@ import com.google.inject.Inject;
|
|||||||
import nu.marginalia.api.domains.DomainInfoClient;
|
import nu.marginalia.api.domains.DomainInfoClient;
|
||||||
import nu.marginalia.api.domains.model.DomainInformation;
|
import nu.marginalia.api.domains.model.DomainInformation;
|
||||||
import nu.marginalia.api.domains.model.SimilarDomain;
|
import nu.marginalia.api.domains.model.SimilarDomain;
|
||||||
|
import nu.marginalia.api.feeds.FeedsClient;
|
||||||
|
import nu.marginalia.api.feeds.RpcFeed;
|
||||||
|
import nu.marginalia.api.feeds.RpcFeedItem;
|
||||||
import nu.marginalia.api.livecapture.LiveCaptureClient;
|
import nu.marginalia.api.livecapture.LiveCaptureClient;
|
||||||
import nu.marginalia.db.DbDomainQueries;
|
import nu.marginalia.db.DbDomainQueries;
|
||||||
import nu.marginalia.feedlot.FeedlotClient;
|
|
||||||
import nu.marginalia.feedlot.model.FeedItems;
|
|
||||||
import nu.marginalia.model.EdgeDomain;
|
import nu.marginalia.model.EdgeDomain;
|
||||||
import nu.marginalia.renderer.MustacheRenderer;
|
import nu.marginalia.renderer.MustacheRenderer;
|
||||||
import nu.marginalia.renderer.RendererFactory;
|
import nu.marginalia.renderer.RendererFactory;
|
||||||
@ -37,7 +38,7 @@ public class SearchSiteInfoService {
|
|||||||
private final SearchFlagSiteService flagSiteService;
|
private final SearchFlagSiteService flagSiteService;
|
||||||
private final DbDomainQueries domainQueries;
|
private final DbDomainQueries domainQueries;
|
||||||
private final MustacheRenderer<Object> renderer;
|
private final MustacheRenderer<Object> renderer;
|
||||||
private final FeedlotClient feedlotClient;
|
private final FeedsClient feedsClient;
|
||||||
private final LiveCaptureClient liveCaptureClient;
|
private final LiveCaptureClient liveCaptureClient;
|
||||||
private final ScreenshotService screenshotService;
|
private final ScreenshotService screenshotService;
|
||||||
|
|
||||||
@ -47,7 +48,7 @@ public class SearchSiteInfoService {
|
|||||||
RendererFactory rendererFactory,
|
RendererFactory rendererFactory,
|
||||||
SearchFlagSiteService flagSiteService,
|
SearchFlagSiteService flagSiteService,
|
||||||
DbDomainQueries domainQueries,
|
DbDomainQueries domainQueries,
|
||||||
FeedlotClient feedlotClient,
|
FeedsClient feedsClient,
|
||||||
LiveCaptureClient liveCaptureClient,
|
LiveCaptureClient liveCaptureClient,
|
||||||
ScreenshotService screenshotService) throws IOException
|
ScreenshotService screenshotService) throws IOException
|
||||||
{
|
{
|
||||||
@ -58,7 +59,7 @@ public class SearchSiteInfoService {
|
|||||||
|
|
||||||
this.renderer = rendererFactory.renderer("search/site-info/site-info");
|
this.renderer = rendererFactory.renderer("search/site-info/site-info");
|
||||||
|
|
||||||
this.feedlotClient = feedlotClient;
|
this.feedsClient = feedsClient;
|
||||||
this.liveCaptureClient = liveCaptureClient;
|
this.liveCaptureClient = liveCaptureClient;
|
||||||
this.screenshotService = screenshotService;
|
this.screenshotService = screenshotService;
|
||||||
}
|
}
|
||||||
@ -135,26 +136,29 @@ public class SearchSiteInfoService {
|
|||||||
final Future<DomainInformation> domainInfoFuture;
|
final Future<DomainInformation> domainInfoFuture;
|
||||||
final Future<List<SimilarDomain>> similarSetFuture;
|
final Future<List<SimilarDomain>> similarSetFuture;
|
||||||
final Future<List<SimilarDomain>> linkingDomainsFuture;
|
final Future<List<SimilarDomain>> linkingDomainsFuture;
|
||||||
|
final CompletableFuture<RpcFeed> feedItemsFuture;
|
||||||
String url = "https://" + domainName + "/";
|
String url = "https://" + domainName + "/";
|
||||||
|
|
||||||
boolean hasScreenshot = screenshotService.hasScreenshot(domainId);
|
boolean hasScreenshot = screenshotService.hasScreenshot(domainId);
|
||||||
|
|
||||||
var feedItemsFuture = feedlotClient.getFeedItems(domainName);
|
|
||||||
if (domainId < 0) {
|
if (domainId < 0) {
|
||||||
domainInfoFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
|
domainInfoFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
|
||||||
similarSetFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
|
similarSetFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
|
||||||
linkingDomainsFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
|
linkingDomainsFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
|
||||||
|
feedItemsFuture = CompletableFuture.failedFuture(new Exception("Unknown Domain ID"));
|
||||||
}
|
}
|
||||||
else if (!domainInfoClient.isAccepting()) {
|
else if (!domainInfoClient.isAccepting()) {
|
||||||
domainInfoFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
|
domainInfoFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
|
||||||
similarSetFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
|
similarSetFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
|
||||||
linkingDomainsFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
|
linkingDomainsFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
|
||||||
|
feedItemsFuture = CompletableFuture.failedFuture(new Exception("Assistant Service Unavailable"));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
domainInfoFuture = domainInfoClient.domainInformation(domainId);
|
domainInfoFuture = domainInfoClient.domainInformation(domainId);
|
||||||
similarSetFuture = domainInfoClient.similarDomains(domainId, 25);
|
similarSetFuture = domainInfoClient.similarDomains(domainId, 25);
|
||||||
linkingDomainsFuture = domainInfoClient.linkedDomains(domainId, 25);
|
linkingDomainsFuture = domainInfoClient.linkedDomains(domainId, 25);
|
||||||
|
feedItemsFuture = feedsClient.getFeed(domainId);
|
||||||
}
|
}
|
||||||
|
|
||||||
List<UrlDetails> sampleResults = searchOperator.doSiteSearch(domainName, domainId,5);
|
List<UrlDetails> sampleResults = searchOperator.doSiteSearch(domainName, domainId,5);
|
||||||
@ -162,13 +166,6 @@ public class SearchSiteInfoService {
|
|||||||
url = sampleResults.getFirst().url.withPathAndParam("/", null).toString();
|
url = sampleResults.getFirst().url.withPathAndParam("/", null).toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
FeedItems feedItems = null;
|
|
||||||
try {
|
|
||||||
feedItems = feedItemsFuture.get();
|
|
||||||
} catch (Exception e) {
|
|
||||||
logger.debug("Failed to get feed items for {}: {}", domainName, e.getMessage());
|
|
||||||
}
|
|
||||||
|
|
||||||
var result = new SiteInfoWithContext(domainName,
|
var result = new SiteInfoWithContext(domainName,
|
||||||
domainId,
|
domainId,
|
||||||
url,
|
url,
|
||||||
@ -176,7 +173,7 @@ public class SearchSiteInfoService {
|
|||||||
waitForFuture(domainInfoFuture, () -> createDummySiteInfo(domainName)),
|
waitForFuture(domainInfoFuture, () -> createDummySiteInfo(domainName)),
|
||||||
waitForFuture(similarSetFuture, List::of),
|
waitForFuture(similarSetFuture, List::of),
|
||||||
waitForFuture(linkingDomainsFuture, List::of),
|
waitForFuture(linkingDomainsFuture, List::of),
|
||||||
feedItems,
|
waitForFuture(feedItemsFuture.thenApply(FeedItems::new), () -> FeedItems.dummyValue(domainName)),
|
||||||
sampleResults
|
sampleResults
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -356,6 +353,43 @@ public class SearchSiteInfoService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public record FeedItem(String title, String date, String description, String url) {
|
||||||
|
|
||||||
|
public FeedItem(RpcFeedItem rpcFeedItem) {
|
||||||
|
this(rpcFeedItem.getTitle(),
|
||||||
|
rpcFeedItem.getDate(),
|
||||||
|
rpcFeedItem.getDescription(),
|
||||||
|
rpcFeedItem.getUrl());
|
||||||
|
}
|
||||||
|
|
||||||
|
public String pubDay() { // Extract the date from an ISO style date string
|
||||||
|
if (date.length() > 10) {
|
||||||
|
return date.substring(0, 10);
|
||||||
|
}
|
||||||
|
return date;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String descriptionSafe() {
|
||||||
|
return description
|
||||||
|
.replace("<", "<")
|
||||||
|
.replace(">", ">");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public record FeedItems(String domain, String feedUrl, String updated, List<FeedItem> items) {
|
||||||
|
|
||||||
|
public static FeedItems dummyValue(String domain) {
|
||||||
|
return new FeedItems(domain, "", "", List.of());
|
||||||
|
}
|
||||||
|
|
||||||
|
public FeedItems(RpcFeed rpcFeedItems) {
|
||||||
|
this(rpcFeedItems.getDomain(),
|
||||||
|
rpcFeedItems.getFeedUrl(),
|
||||||
|
rpcFeedItems.getUpdated(),
|
||||||
|
rpcFeedItems.getItemsList().stream().map(FeedItem::new).toList());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public record ReportDomain(
|
public record ReportDomain(
|
||||||
Map<String, Boolean> view,
|
Map<String, Boolean> view,
|
||||||
String domain,
|
String domain,
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
|
{{#if feed.items}}
|
||||||
{{#with feed}}
|
{{#with feed}}
|
||||||
<h2><a title="Atom/RSS feed" target="external" href="{{feedUrl}}"><img width="16" height="16" src="/rss.svg"></a> Feed (Experimental)</h2>
|
<h2><a title="Atom/RSS feed" target="external" href="{{feedUrl}}"><img width="16" height="16" src="/rss.svg"></a> Feed</h2>
|
||||||
|
|
||||||
<dl>
|
<dl>
|
||||||
{{#each items}}
|
{{#each items}}
|
||||||
@ -8,8 +9,9 @@
|
|||||||
{{/each}}
|
{{/each}}
|
||||||
</dl>
|
</dl>
|
||||||
{{/with}}
|
{{/with}}
|
||||||
|
{{/if}}
|
||||||
|
|
||||||
{{#unless feed}}{{#if samples}}
|
{{#unless feed.items}}{{#if samples}}
|
||||||
<h2>Sample</h2>
|
<h2>Sample</h2>
|
||||||
<dl>
|
<dl>
|
||||||
{{#each samples}}
|
{{#each samples}}
|
||||||
|
@ -8,6 +8,7 @@ import nu.marginalia.functions.domains.DomainInfoGrpcService;
|
|||||||
import nu.marginalia.functions.math.MathGrpcService;
|
import nu.marginalia.functions.math.MathGrpcService;
|
||||||
import nu.marginalia.livecapture.LiveCaptureGrpcService;
|
import nu.marginalia.livecapture.LiveCaptureGrpcService;
|
||||||
import nu.marginalia.model.gson.GsonFactory;
|
import nu.marginalia.model.gson.GsonFactory;
|
||||||
|
import nu.marginalia.rss.svc.FeedsGrpcService;
|
||||||
import nu.marginalia.screenshot.ScreenshotService;
|
import nu.marginalia.screenshot.ScreenshotService;
|
||||||
import nu.marginalia.service.discovery.property.ServicePartition;
|
import nu.marginalia.service.discovery.property.ServicePartition;
|
||||||
import nu.marginalia.service.server.BaseServiceParams;
|
import nu.marginalia.service.server.BaseServiceParams;
|
||||||
@ -31,10 +32,15 @@ public class AssistantService extends Service {
|
|||||||
ScreenshotService screenshotService,
|
ScreenshotService screenshotService,
|
||||||
DomainInfoGrpcService domainInfoGrpcService,
|
DomainInfoGrpcService domainInfoGrpcService,
|
||||||
LiveCaptureGrpcService liveCaptureGrpcService,
|
LiveCaptureGrpcService liveCaptureGrpcService,
|
||||||
|
FeedsGrpcService feedsGrpcService,
|
||||||
MathGrpcService mathGrpcService,
|
MathGrpcService mathGrpcService,
|
||||||
Suggestions suggestions)
|
Suggestions suggestions)
|
||||||
{
|
{
|
||||||
super(params, ServicePartition.any(), List.of(domainInfoGrpcService, mathGrpcService, liveCaptureGrpcService));
|
super(params, ServicePartition.any(),
|
||||||
|
List.of(domainInfoGrpcService,
|
||||||
|
mathGrpcService,
|
||||||
|
liveCaptureGrpcService,
|
||||||
|
feedsGrpcService));
|
||||||
|
|
||||||
this.suggestions = suggestions;
|
this.suggestions = suggestions;
|
||||||
|
|
||||||
|
@ -56,7 +56,6 @@ include 'code:libraries:message-queue'
|
|||||||
|
|
||||||
include 'code:features-search:screenshots'
|
include 'code:features-search:screenshots'
|
||||||
include 'code:features-search:random-websites'
|
include 'code:features-search:random-websites'
|
||||||
include 'code:features-search:feedlot-client'
|
|
||||||
|
|
||||||
include 'code:processes:converting-process:ft-anchor-keywords'
|
include 'code:processes:converting-process:ft-anchor-keywords'
|
||||||
include 'code:execution:data-extractors'
|
include 'code:execution:data-extractors'
|
||||||
@ -155,6 +154,8 @@ dependencyResolutionManagement {
|
|||||||
library('prometheus-server', 'io.prometheus', 'simpleclient_httpserver').version('0.16.0')
|
library('prometheus-server', 'io.prometheus', 'simpleclient_httpserver').version('0.16.0')
|
||||||
library('prometheus-hotspot', 'io.prometheus', 'simpleclient_hotspot').version('0.16.0')
|
library('prometheus-hotspot', 'io.prometheus', 'simpleclient_hotspot').version('0.16.0')
|
||||||
|
|
||||||
|
library('rssreader', 'com.apptasticsoftware', 'rssreader').version('3.5.0')
|
||||||
|
|
||||||
library('slf4j.api', 'org.slf4j', 'slf4j-api').version('1.7.36')
|
library('slf4j.api', 'org.slf4j', 'slf4j-api').version('1.7.36')
|
||||||
library('slf4j.jdk14', 'org.slf4j', 'slf4j-jdk14').version('2.0.3')
|
library('slf4j.jdk14', 'org.slf4j', 'slf4j-jdk14').version('2.0.3')
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user