mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 04:58:59 +00:00
(linkdb) New Module for sqlite-backed document db
This commit is contained in:
parent
e8c0648e04
commit
b22f4fbb72
56
code/common/linkdb/build.gradle
Normal file
56
code/common/linkdb/build.gradle
Normal file
@ -0,0 +1,56 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id "io.freefair.lombok" version "8.2.2"
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(20))
|
||||
}
|
||||
}
|
||||
|
||||
configurations {
|
||||
flywayMigration.extendsFrom(implementation)
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation project(':code:common:model')
|
||||
|
||||
implementation libs.lombok
|
||||
annotationProcessor libs.lombok
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.guice
|
||||
implementation libs.bundles.gson
|
||||
|
||||
implementation libs.notnull
|
||||
|
||||
implementation libs.sqlite
|
||||
implementation libs.commons.lang3
|
||||
|
||||
implementation libs.trove
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
|
||||
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
|
||||
testImplementation 'org.testcontainers:mariadb:1.17.4'
|
||||
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
|
||||
}
|
||||
|
||||
|
||||
test {
|
||||
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
|
||||
maxHeapSize = "8G"
|
||||
useJUnitPlatform()
|
||||
}
|
||||
|
||||
task fastTests(type: Test) {
|
||||
maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1
|
||||
maxHeapSize = "8G"
|
||||
useJUnitPlatform {
|
||||
excludeTags "slow"
|
||||
}
|
||||
}
|
@ -0,0 +1,84 @@
|
||||
package nu.marginalia.linkdb;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import com.google.inject.name.Named;
|
||||
import gnu.trove.list.TLongList;
|
||||
import nu.marginalia.linkdb.model.UrlDetail;
|
||||
import nu.marginalia.linkdb.model.UrlProtocol;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
@Singleton
|
||||
public class LinkdbReader {
|
||||
Path dbFile;
|
||||
volatile Connection connection;
|
||||
|
||||
@Inject
|
||||
public LinkdbReader(@Named("linkdb-file") Path dbFile) throws SQLException {
|
||||
this.dbFile = dbFile;
|
||||
connection = createConnection();
|
||||
}
|
||||
|
||||
private Connection createConnection() throws SQLException {
|
||||
String connStr = "jdbc:sqlite:" + dbFile.toString();
|
||||
return DriverManager.getConnection(connStr);
|
||||
}
|
||||
|
||||
public void switchInput(Path newDbFile) throws IOException, SQLException {
|
||||
connection.close();
|
||||
|
||||
Files.move(newDbFile, dbFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
connection = createConnection();
|
||||
}
|
||||
|
||||
public List<UrlDetail> getUrlDetails(TLongList ids) throws SQLException {
|
||||
List<UrlDetail> ret = new ArrayList<>(ids.size());
|
||||
|
||||
if (connection.isClosed()) {
|
||||
throw new RuntimeException("URL query temporarily unavailable due to database switch");
|
||||
}
|
||||
|
||||
try (var stmt = connection.prepareStatement("""
|
||||
SELECT ID, URL, TITLE, DESCRIPTION, WORDS_TOTAL, FORMAT, FEATURES, DATA_HASH, QUALITY, PUB_YEAR
|
||||
FROM DOCUMENT WHERE ID = ?
|
||||
""")) {
|
||||
for (int i = 0; i < ids.size(); i++) {
|
||||
long id = ids.get(i);
|
||||
stmt.setLong(1, id);
|
||||
var rs = stmt.executeQuery();
|
||||
if (rs.next()) {
|
||||
var url = new EdgeUrl(rs.getString("URL"));
|
||||
ret.add(new UrlDetail(
|
||||
rs.getLong("ID"),
|
||||
url,
|
||||
rs.getString("TITLE"),
|
||||
rs.getString("DESCRIPTION"),
|
||||
rs.getDouble("QUALITY"),
|
||||
rs.getString("FORMAT"),
|
||||
rs.getInt("FEATURES"),
|
||||
rs.getInt("PUB_YEAR"),
|
||||
rs.getLong("DATA_HASH"),
|
||||
rs.getInt("WORDS_TOTAL")
|
||||
));
|
||||
}
|
||||
}
|
||||
} catch (URISyntaxException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
@ -0,0 +1,81 @@
|
||||
package nu.marginalia.linkdb;
|
||||
|
||||
import nu.marginalia.linkdb.model.UrlDetail;
|
||||
import nu.marginalia.linkdb.model.UrlProtocol;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Types;
|
||||
import java.util.List;
|
||||
|
||||
public class LinkdbWriter {
|
||||
|
||||
private final Connection connection;
|
||||
|
||||
public LinkdbWriter(Path outputFile) throws SQLException {
|
||||
String connStr = "jdbc:sqlite:" + outputFile.toString();
|
||||
connection = DriverManager.getConnection(connStr);
|
||||
|
||||
try (var stream = ClassLoader.getSystemResourceAsStream("db/linkdb.sql");
|
||||
var stmt = connection.createStatement()
|
||||
) {
|
||||
var sql = new String(stream.readAllBytes());
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
// Disable synchronous writing as this is a one-off operation with no recovery
|
||||
stmt.execute("PRAGMA synchronous = OFF");
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void add(UrlDetail urlDetail) throws SQLException {
|
||||
add(List.of(urlDetail));
|
||||
}
|
||||
|
||||
public void add(List<UrlDetail> urlDetail) throws SQLException {
|
||||
|
||||
try (var stmt = connection.prepareStatement("""
|
||||
INSERT OR IGNORE INTO DOCUMENT(ID, URL, TITLE, DESCRIPTION, WORDS_TOTAL, FORMAT, FEATURES, DATA_HASH, QUALITY, PUB_YEAR)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""")) {
|
||||
|
||||
int i = 0;
|
||||
for (var document : urlDetail) {
|
||||
var url = document.url();
|
||||
|
||||
stmt.setLong(1, document.urlId());
|
||||
stmt.setString(2, url.toString());
|
||||
|
||||
stmt.setString(3, document.title());
|
||||
stmt.setString(4, document.description());
|
||||
stmt.setInt(5, document.wordsTotal());
|
||||
stmt.setString(6, document.format());
|
||||
stmt.setInt(7, document.features());
|
||||
stmt.setLong(8, document.dataHash());
|
||||
stmt.setDouble(9, document.urlQuality());
|
||||
if (document.pubYear() == null) {
|
||||
stmt.setNull(10, Types.INTEGER);
|
||||
} else {
|
||||
stmt.setInt(10, document.pubYear());
|
||||
}
|
||||
|
||||
stmt.addBatch();
|
||||
|
||||
if (++i > 1000) {
|
||||
stmt.executeBatch();
|
||||
i = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (i != 0) stmt.executeBatch();
|
||||
}
|
||||
}
|
||||
|
||||
public void close() throws SQLException {
|
||||
connection.close();
|
||||
}
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
package nu.marginalia.linkdb.model;
|
||||
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
|
||||
public record UrlDetail(long urlId,
|
||||
EdgeUrl url,
|
||||
String title,
|
||||
String description,
|
||||
double urlQuality,
|
||||
String format,
|
||||
int features,
|
||||
Integer pubYear,
|
||||
long dataHash,
|
||||
int wordsTotal
|
||||
)
|
||||
|
||||
{
|
||||
}
|
@ -0,0 +1,24 @@
|
||||
package nu.marginalia.linkdb.model;
|
||||
|
||||
public enum UrlProtocol {
|
||||
HTTP,
|
||||
HTTPS;
|
||||
|
||||
public static int encode(String str) {
|
||||
if ("http".equalsIgnoreCase(str)) {
|
||||
return HTTP.ordinal();
|
||||
}
|
||||
else if ("https".equalsIgnoreCase(str)) {
|
||||
return HTTPS.ordinal();
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException(str);
|
||||
}
|
||||
|
||||
public static String decode(int ordinal) {
|
||||
return switch (values()[ordinal]) {
|
||||
case HTTP -> "http";
|
||||
case HTTPS -> "https";
|
||||
};
|
||||
};
|
||||
}
|
17
code/common/linkdb/src/main/resources/db/linkdb.sql
Normal file
17
code/common/linkdb/src/main/resources/db/linkdb.sql
Normal file
@ -0,0 +1,17 @@
|
||||
CREATE TABLE DOCUMENT (
|
||||
ID LONG PRIMARY KEY,
|
||||
|
||||
URL TEXT,
|
||||
|
||||
STATE INT,
|
||||
TITLE TEXT NOT NULL,
|
||||
DESCRIPTION TEXT NOT NULL,
|
||||
|
||||
WORDS_TOTAL INTEGER NOT NULL,
|
||||
FORMAT TEXT NOT NULL,
|
||||
FEATURES INTEGER NOT NULL,
|
||||
|
||||
DATA_HASH INTEGER NOT NULL,
|
||||
QUALITY REAL NOT NULL,
|
||||
PUB_YEAR INTEGER NOT NULL
|
||||
);
|
@ -0,0 +1,42 @@
|
||||
package nu.marginalia.linkdb;
|
||||
|
||||
import gnu.trove.list.array.TLongArrayList;
|
||||
import nu.marginalia.linkdb.model.UrlDetail;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.sql.SQLException;
|
||||
|
||||
public class LinkdbWriterTest {
|
||||
@Test
|
||||
public void testCreate() throws IOException {
|
||||
Path tempPath = Files.createTempFile("linkdb", ".db");
|
||||
try {
|
||||
var writer = new LinkdbWriter(tempPath);
|
||||
writer.add(new UrlDetail(
|
||||
1,
|
||||
new nu.marginalia.model.EdgeUrl("http", new EdgeDomain("example.com"), null, "/", null),
|
||||
"Test",
|
||||
"This is a test",
|
||||
-4.,
|
||||
"XHTML",
|
||||
5,
|
||||
2020,
|
||||
0xF00BA3,
|
||||
444
|
||||
));
|
||||
writer.close();
|
||||
|
||||
var reader = new LinkdbReader(tempPath);
|
||||
var deets = reader.getUrlDetails(new TLongArrayList(new long[]{1}));
|
||||
System.out.println(deets);
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
Files.deleteIfExists(tempPath);
|
||||
}
|
||||
}
|
||||
}
|
@ -51,6 +51,7 @@ include 'code:api:process-mqapi'
|
||||
include 'code:common:service-discovery'
|
||||
include 'code:common:service-client'
|
||||
include 'code:common:db'
|
||||
include 'code:common:linkdb'
|
||||
include 'code:common:service'
|
||||
include 'code:common:config'
|
||||
include 'code:common:model'
|
||||
|
Loading…
Reference in New Issue
Block a user