diff --git a/code/common/db/src/main/java/nu/marginalia/db/storage/FileStorageService.java b/code/common/db/src/main/java/nu/marginalia/db/storage/FileStorageService.java new file mode 100644 index 00000000..75fa5ccf --- /dev/null +++ b/code/common/db/src/main/java/nu/marginalia/db/storage/FileStorageService.java @@ -0,0 +1,240 @@ +package nu.marginalia.db.storage; + +import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.db.storage.model.*; + +import javax.inject.Inject; +import javax.inject.Singleton; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.attribute.PosixFilePermissions; +import java.sql.SQLException; + +/** Manages file storage for processes and services + */ +@Singleton +public class FileStorageService { + private final HikariDataSource dataSource; + + @Inject + public FileStorageService(HikariDataSource dataSource) { + this.dataSource = dataSource; + } + + /** @return the storage base with the given id, or null if it does not exist */ + public FileStorageBase getStorageBase(FileStorageBaseId type) throws SQLException { + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement(""" + SELECT ID, NAME, PATH, TYPE, MUST_CLEAN, PERMIT_TEMP + FROM FILE_STORAGE_BASE WHERE ID = ? + """)) { + stmt.setLong(1, type.id()); + try (var rs = stmt.executeQuery()) { + if (rs.next()) { + return new FileStorageBase( + new FileStorageBaseId(rs.getLong(1)), + FileStorageBaseType.valueOf(rs.getString(4)), + rs.getString(2), + rs.getString(3), + rs.getBoolean(5), + rs.getBoolean(6) + ); + } + } + } + return null; + } + + /** @return the storage base with the given type, or null if it does not exist */ + public FileStorageBase getStorageBase(FileStorageBaseType type) throws SQLException { + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement(""" + SELECT ID, NAME, PATH, TYPE, MUST_CLEAN, PERMIT_TEMP + FROM FILE_STORAGE_BASE WHERE TYPE = ? + """)) { + stmt.setString(1, type.name()); + try (var rs = stmt.executeQuery()) { + if (rs.next()) { + return new FileStorageBase( + new FileStorageBaseId(rs.getLong(1)), + FileStorageBaseType.valueOf(rs.getString(4)), + rs.getString(2), + rs.getString(3), + rs.getBoolean(5), + rs.getBoolean(6) + ); + } + } + } + return null; + } + + public FileStorageBase createStorageBase(String name, Path path, FileStorageBaseType type, boolean mustClean, boolean permitTemp) throws SQLException, FileNotFoundException { + + if (!Files.exists(path)) { + throw new FileNotFoundException("Storage base path does not exist: " + path); + } + + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement(""" + INSERT INTO FILE_STORAGE_BASE(NAME, PATH, TYPE, MUST_CLEAN, PERMIT_TEMP) + VALUES (?, ?, ?, ?, ?) + """)) { + stmt.setString(1, name); + stmt.setString(2, path.toString()); + stmt.setString(3, type.name()); + stmt.setBoolean(4, mustClean); + stmt.setBoolean(5, permitTemp); + + int update = stmt.executeUpdate(); + if (update < 0) { + throw new SQLException("Failed to create storage base"); + } + } + + return getStorageBase(type); + } + + /** Allocate a temporary storage of the given type if temporary allocation is permitted */ + public FileStorage allocateTemporaryStorage(FileStorageBase base, + FileStorageType type, + String prefix, + String description) throws IOException, SQLException + { + if (!base.permitTemp()) { + throw new IllegalArgumentException("Temporary storage not permitted in base " + base.name()); + } + + Path tempDir = Files.createTempDirectory(base.asPath(), prefix, + PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxr-xr-x")) + ); + + try (var conn = dataSource.getConnection(); + var update = conn.prepareStatement(""" + INSERT INTO FILE_STORAGE(PATH, TYPE, DESCRIPTION, BASE_ID) + VALUES (?, ?, ?, ?) + """); + var query = conn.prepareStatement(""" + SELECT ID FROM FILE_STORAGE WHERE PATH = ? AND BASE_ID = ? + """) + ) { + update.setString(1, tempDir.toString()); + update.setString(2, type.name()); + update.setString(3, description); + update.setLong(4, base.id().id()); + + if (update.executeUpdate() < 1) + throw new SQLException("Failed to insert storage"); + + query.setString(1, tempDir.toString()); + query.setLong(2, base.id().id()); + var rs = query.executeQuery(); + + if (rs.next()) { + return new FileStorage( + new FileStorageId(rs.getLong("ID")), + base, + type, + tempDir.toString(), + description + ); + } + + } + + throw new SQLException("Failed to insert storage"); + } + + + /** Allocate permanent storage in base */ + public FileStorage allocatePermanentStorage(FileStorageBase base, String relativePath, FileStorageType type, String description) throws IOException, SQLException { + + Path newDir = base.asPath().resolve(relativePath); + + if (Files.exists(newDir)) { + throw new IllegalArgumentException("Storage already exists: " + newDir); + } + + Files.createDirectory(newDir, PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxr-xr-x"))); + + try (var conn = dataSource.getConnection(); + var update = conn.prepareStatement(""" + INSERT INTO FILE_STORAGE(PATH, TYPE, DESCRIPTION, BASE_ID) + VALUES (?, ?, ?, ?) + """); + var query = conn.prepareStatement(""" + SELECT ID + FROM FILE_STORAGE WHERE PATH = ? AND BASE_ID = ? + """) + ) { + update.setString(1, relativePath); + update.setString(2, type.name()); + update.setString(3, description); + update.setLong(4, base.id().id()); + + if (update.executeUpdate() < 1) + throw new SQLException("Failed to insert storage"); + + query.setString(1, relativePath); + query.setLong(2, base.id().id()); + var rs = query.executeQuery(); + + if (rs.next()) { + return new FileStorage( + new FileStorageId(rs.getLong("ID")), + base, + type, + newDir.toString(), + description + ); + } + + } + + throw new SQLException("Failed to insert storage"); + } + + /** @return the storage with the given id, or null if it does not exist */ + public FileStorage getStorage(FileStorageId id) throws SQLException { + + try (var conn = dataSource.getConnection(); + var stmt = conn.prepareStatement(""" + SELECT PATH, TYPE, DESCRIPTION, ID, BASE_ID + FROM FILE_STORAGE_VIEW WHERE ID = ? + """)) { + stmt.setLong(1, id.id()); + + long storageId; + long baseId; + String path; + String description; + FileStorageType type; + + try (var rs = stmt.executeQuery()) { + if (rs.next()) { + baseId = rs.getLong("BASE_ID"); + storageId = rs.getLong("ID"); + type = FileStorageType.valueOf(rs.getString("TYPE")); + path = rs.getString("PATH"); + description = rs.getString("DESCRIPTION"); + } + else { + return null; + } + + var base = getStorageBase(new FileStorageBaseId(baseId)); + + return new FileStorage( + new FileStorageId(storageId), + base, + type, + path, + description + ); + } + } + } + +} diff --git a/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorage.java b/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorage.java new file mode 100644 index 00000000..3a619809 --- /dev/null +++ b/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorage.java @@ -0,0 +1,24 @@ +package nu.marginalia.db.storage.model; + +import java.nio.file.Path; + +/** + * Represents a file storage area + * + * @param id the id of the storage in the database + * @param base the base of the storage + * @param type the type of data expected + * @param path the full path of the storage on disk + * @param description a description of the storage + */ +public record FileStorage( + FileStorageId id, + FileStorageBase base, + FileStorageType type, + String path, + String description) +{ + public Path asPath() { + return Path.of(path); + } +} diff --git a/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageBase.java b/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageBase.java new file mode 100644 index 00000000..96f09698 --- /dev/null +++ b/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageBase.java @@ -0,0 +1,25 @@ +package nu.marginalia.db.storage.model; + +import java.nio.file.Path; + +/** + * Represents a file storage base directory + * + * @param id the id of the storage base in the database + * @param type the type of the storage base + * @param name the name of the storage base + * @param path the path of the storage base + * @param mustClean if true, the storage is small and *must* be cleaned after use + * @param permitTemp if true, the storage may be used for temporary files + */ +public record FileStorageBase(FileStorageBaseId id, + FileStorageBaseType type, + String name, + String path, + boolean mustClean, + boolean permitTemp + ) { + public Path asPath() { + return Path.of(path); + } +} diff --git a/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageBaseId.java b/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageBaseId.java new file mode 100644 index 00000000..e4dbaf68 --- /dev/null +++ b/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageBaseId.java @@ -0,0 +1,3 @@ +package nu.marginalia.db.storage.model; + +public record FileStorageBaseId(long id) {} diff --git a/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageBaseType.java b/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageBaseType.java new file mode 100644 index 00000000..df9f497f --- /dev/null +++ b/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageBaseType.java @@ -0,0 +1,7 @@ +package nu.marginalia.db.storage.model; + +public enum FileStorageBaseType { + SSD_INDEX, + SSD_WORK, + SLOW +} diff --git a/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageId.java b/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageId.java new file mode 100644 index 00000000..da8849ff --- /dev/null +++ b/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageId.java @@ -0,0 +1,3 @@ +package nu.marginalia.db.storage.model; + +public record FileStorageId(long id) {} diff --git a/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageType.java b/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageType.java new file mode 100644 index 00000000..04d5cc81 --- /dev/null +++ b/code/common/db/src/main/java/nu/marginalia/db/storage/model/FileStorageType.java @@ -0,0 +1,11 @@ +package nu.marginalia.db.storage.model; + +public enum FileStorageType { + CRAWL_SPEC, + CRAWL_DATA, + PROCESSED_DATA, + INDEX_STAGING, + LEXICON_STAGING, + INDEX_LIVE, + LEXICON_LIVE +} diff --git a/code/common/db/src/main/resources/sql/current/13-file-storage.sql b/code/common/db/src/main/resources/sql/current/13-file-storage.sql new file mode 100644 index 00000000..c09b140b --- /dev/null +++ b/code/common/db/src/main/resources/sql/current/13-file-storage.sql @@ -0,0 +1,35 @@ +CREATE TABLE IF NOT EXISTS FILE_STORAGE_BASE ( + ID BIGINT PRIMARY KEY AUTO_INCREMENT, + NAME VARCHAR(255) NOT NULL UNIQUE, + PATH VARCHAR(255) NOT NULL UNIQUE COMMENT 'The path to the storage base', + TYPE ENUM ('SSD_INDEX', 'SSD_WORK', 'SLOW') NOT NULL, + MUST_CLEAN BOOLEAN NOT NULL DEFAULT FALSE COMMENT 'If true, the storage must be cleaned after use', + PERMIT_TEMP BOOLEAN NOT NULL DEFAULT FALSE COMMENT 'If true, the storage can be used for temporary files' +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_bin; + +CREATE TABLE IF NOT EXISTS FILE_STORAGE ( + ID BIGINT PRIMARY KEY AUTO_INCREMENT, + BASE_ID BIGINT NOT NULL, + PATH VARCHAR(255) NOT NULL COMMENT 'The path to the storage relative to the base', + DESCRIPTION VARCHAR(255) NOT NULL, + TYPE ENUM ('CRAWL_SPEC', 'CRAWL_DATA', 'PROCESSED_DATA', 'INDEX_STAGING', 'LEXICON_STAGING', 'INDEX_LIVE', 'LEXICON_LIVE') NOT NULL, + DO_PURGE BOOLEAN NOT NULL DEFAULT FALSE COMMENT 'If true, the storage may be cleaned', + CREATE_DATE TIMESTAMP(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6), + CONSTRAINT CONS UNIQUE (BASE_ID, PATH), + FOREIGN KEY (BASE_ID) REFERENCES FILE_STORAGE_BASE(ID) ON DELETE CASCADE +) +CHARACTER SET utf8mb4 +COLLATE utf8mb4_bin; + +CREATE VIEW FILE_STORAGE_VIEW +AS SELECT + CONCAT(BASE.PATH, '/', STORAGE.PATH) AS PATH, + STORAGE.TYPE AS TYPE, + DESCRIPTION AS DESCRIPTION, + CREATE_DATE AS CREATE_DATE, + STORAGE.ID AS ID, + BASE.ID AS BASE_ID +FROM FILE_STORAGE STORAGE +INNER JOIN FILE_STORAGE_BASE BASE ON STORAGE.BASE_ID=BASE.ID; diff --git a/code/common/db/src/test/java/nu/marginalia/db/storage/FileStorageServiceTest.java b/code/common/db/src/test/java/nu/marginalia/db/storage/FileStorageServiceTest.java new file mode 100644 index 00000000..cfd1df26 --- /dev/null +++ b/code/common/db/src/test/java/nu/marginalia/db/storage/FileStorageServiceTest.java @@ -0,0 +1,155 @@ +package nu.marginalia.db.storage; + +import com.google.common.collect.Lists; +import com.zaxxer.hikari.HikariConfig; +import com.zaxxer.hikari.HikariDataSource; +import nu.marginalia.db.storage.model.FileStorageBaseType; +import nu.marginalia.db.storage.model.FileStorageType; +import org.junit.jupiter.api.*; +import org.junit.jupiter.api.parallel.Execution; +import org.testcontainers.containers.MariaDBContainer; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; +import java.util.UUID; + +import static org.junit.Assert.*; +import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD; + +@Testcontainers +@Execution(SAME_THREAD) +@Tag("slow") +public class FileStorageServiceTest { + @Container + static MariaDBContainer mariaDBContainer = new MariaDBContainer<>("mariadb") + .withDatabaseName("WMSA_prod") + .withUsername("wmsa") + .withPassword("wmsa") + .withInitScript("sql/current/13-file-storage.sql") + .withNetworkAliases("mariadb"); + + static HikariDataSource dataSource; + static FileStorageService fileStorageService; + + static List tempDirs = new ArrayList<>(); + + @BeforeAll + public static void setup() { + HikariConfig config = new HikariConfig(); + config.setJdbcUrl(mariaDBContainer.getJdbcUrl()); + config.setUsername("wmsa"); + config.setPassword("wmsa"); + + dataSource = new HikariDataSource(config); + } + + + @BeforeEach + public void setupEach() { + fileStorageService = new FileStorageService(dataSource); + } + + @AfterEach + public void tearDownEach() { + try (var conn = dataSource.getConnection(); + var stmt = conn.createStatement()) { + stmt.execute("DELETE FROM FILE_STORAGE"); + stmt.execute("DELETE FROM FILE_STORAGE_BASE"); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + @AfterAll + public static void teardown() { + dataSource.close(); + + Lists.reverse(tempDirs).forEach(path -> { + try { + System.out.println("Deleting " + path); + Files.delete(path); + } catch (IOException e) { + e.printStackTrace(); + } + }); + } + + private Path createTempDir() { + try { + Path dir = Files.createTempDirectory("file-storage-test"); + tempDirs.add(dir); + return dir; + } catch (IOException e) { + throw new RuntimeException(e); + } + + } + + @Test + public void testCreateBase() throws SQLException, FileNotFoundException { + String name = "test-" + UUID.randomUUID(); + + var storage = new FileStorageService(dataSource); + var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.SLOW, false, false); + + Assertions.assertEquals(name, base.name()); + Assertions.assertEquals(FileStorageBaseType.SLOW, base.type()); + Assertions.assertFalse(base.mustClean()); + Assertions.assertFalse(base.permitTemp()); + } + @Test + public void testAllocateTempInNonPermitted() throws SQLException, FileNotFoundException { + String name = "test-" + UUID.randomUUID(); + + var storage = new FileStorageService(dataSource); + + var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.SLOW, false, false); + + try { + storage.allocateTemporaryStorage(base, FileStorageType.CRAWL_DATA, "xyz", "thisShouldFail"); + fail(); + } + catch (IllegalArgumentException ex) {} // ok + catch (Exception ex) { + ex.printStackTrace(); + fail(); + } + } + + @Test + public void testAllocatePermanentInNonPermitted() throws SQLException, IOException { + String name = "test-" + UUID.randomUUID(); + + var storage = new FileStorageService(dataSource); + + var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.SLOW, false, false); + + var created = storage.allocatePermanentStorage(base, "xyz", FileStorageType.CRAWL_DATA, "thisShouldFail"); + tempDirs.add(created.asPath()); + + var actual = storage.getStorage(created.id()); + Assertions.assertEquals(created, actual); + } + + @Test + public void testAllocateTempInPermitted() throws IOException, SQLException { + String name = "test-" + UUID.randomUUID(); + + var storage = new FileStorageService(dataSource); + + var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.SLOW, false, true); + var fileStorage = storage.allocateTemporaryStorage(base, FileStorageType.CRAWL_DATA, "xyz", "thisShouldSucceed"); + + Assertions.assertTrue(Files.exists(fileStorage.asPath())); + tempDirs.add(fileStorage.asPath()); + } + + +} \ No newline at end of file