(db) Model for file storage areas

This commit is contained in:
Viktor Lofgren 2023-07-14 11:40:05 +02:00
parent d36e36c8fd
commit 23169ad818
9 changed files with 503 additions and 0 deletions

View File

@ -0,0 +1,240 @@
package nu.marginalia.db.storage;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.db.storage.model.*;
import javax.inject.Inject;
import javax.inject.Singleton;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.PosixFilePermissions;
import java.sql.SQLException;
/** Manages file storage for processes and services
*/
@Singleton
public class FileStorageService {
private final HikariDataSource dataSource;
@Inject
public FileStorageService(HikariDataSource dataSource) {
this.dataSource = dataSource;
}
/** @return the storage base with the given id, or null if it does not exist */
public FileStorageBase getStorageBase(FileStorageBaseId type) throws SQLException {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT ID, NAME, PATH, TYPE, MUST_CLEAN, PERMIT_TEMP
FROM FILE_STORAGE_BASE WHERE ID = ?
""")) {
stmt.setLong(1, type.id());
try (var rs = stmt.executeQuery()) {
if (rs.next()) {
return new FileStorageBase(
new FileStorageBaseId(rs.getLong(1)),
FileStorageBaseType.valueOf(rs.getString(4)),
rs.getString(2),
rs.getString(3),
rs.getBoolean(5),
rs.getBoolean(6)
);
}
}
}
return null;
}
/** @return the storage base with the given type, or null if it does not exist */
public FileStorageBase getStorageBase(FileStorageBaseType type) throws SQLException {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT ID, NAME, PATH, TYPE, MUST_CLEAN, PERMIT_TEMP
FROM FILE_STORAGE_BASE WHERE TYPE = ?
""")) {
stmt.setString(1, type.name());
try (var rs = stmt.executeQuery()) {
if (rs.next()) {
return new FileStorageBase(
new FileStorageBaseId(rs.getLong(1)),
FileStorageBaseType.valueOf(rs.getString(4)),
rs.getString(2),
rs.getString(3),
rs.getBoolean(5),
rs.getBoolean(6)
);
}
}
}
return null;
}
public FileStorageBase createStorageBase(String name, Path path, FileStorageBaseType type, boolean mustClean, boolean permitTemp) throws SQLException, FileNotFoundException {
if (!Files.exists(path)) {
throw new FileNotFoundException("Storage base path does not exist: " + path);
}
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
INSERT INTO FILE_STORAGE_BASE(NAME, PATH, TYPE, MUST_CLEAN, PERMIT_TEMP)
VALUES (?, ?, ?, ?, ?)
""")) {
stmt.setString(1, name);
stmt.setString(2, path.toString());
stmt.setString(3, type.name());
stmt.setBoolean(4, mustClean);
stmt.setBoolean(5, permitTemp);
int update = stmt.executeUpdate();
if (update < 0) {
throw new SQLException("Failed to create storage base");
}
}
return getStorageBase(type);
}
/** Allocate a temporary storage of the given type if temporary allocation is permitted */
public FileStorage allocateTemporaryStorage(FileStorageBase base,
FileStorageType type,
String prefix,
String description) throws IOException, SQLException
{
if (!base.permitTemp()) {
throw new IllegalArgumentException("Temporary storage not permitted in base " + base.name());
}
Path tempDir = Files.createTempDirectory(base.asPath(), prefix,
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxr-xr-x"))
);
try (var conn = dataSource.getConnection();
var update = conn.prepareStatement("""
INSERT INTO FILE_STORAGE(PATH, TYPE, DESCRIPTION, BASE_ID)
VALUES (?, ?, ?, ?)
""");
var query = conn.prepareStatement("""
SELECT ID FROM FILE_STORAGE WHERE PATH = ? AND BASE_ID = ?
""")
) {
update.setString(1, tempDir.toString());
update.setString(2, type.name());
update.setString(3, description);
update.setLong(4, base.id().id());
if (update.executeUpdate() < 1)
throw new SQLException("Failed to insert storage");
query.setString(1, tempDir.toString());
query.setLong(2, base.id().id());
var rs = query.executeQuery();
if (rs.next()) {
return new FileStorage(
new FileStorageId(rs.getLong("ID")),
base,
type,
tempDir.toString(),
description
);
}
}
throw new SQLException("Failed to insert storage");
}
/** Allocate permanent storage in base */
public FileStorage allocatePermanentStorage(FileStorageBase base, String relativePath, FileStorageType type, String description) throws IOException, SQLException {
Path newDir = base.asPath().resolve(relativePath);
if (Files.exists(newDir)) {
throw new IllegalArgumentException("Storage already exists: " + newDir);
}
Files.createDirectory(newDir, PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxr-xr-x")));
try (var conn = dataSource.getConnection();
var update = conn.prepareStatement("""
INSERT INTO FILE_STORAGE(PATH, TYPE, DESCRIPTION, BASE_ID)
VALUES (?, ?, ?, ?)
""");
var query = conn.prepareStatement("""
SELECT ID
FROM FILE_STORAGE WHERE PATH = ? AND BASE_ID = ?
""")
) {
update.setString(1, relativePath);
update.setString(2, type.name());
update.setString(3, description);
update.setLong(4, base.id().id());
if (update.executeUpdate() < 1)
throw new SQLException("Failed to insert storage");
query.setString(1, relativePath);
query.setLong(2, base.id().id());
var rs = query.executeQuery();
if (rs.next()) {
return new FileStorage(
new FileStorageId(rs.getLong("ID")),
base,
type,
newDir.toString(),
description
);
}
}
throw new SQLException("Failed to insert storage");
}
/** @return the storage with the given id, or null if it does not exist */
public FileStorage getStorage(FileStorageId id) throws SQLException {
try (var conn = dataSource.getConnection();
var stmt = conn.prepareStatement("""
SELECT PATH, TYPE, DESCRIPTION, ID, BASE_ID
FROM FILE_STORAGE_VIEW WHERE ID = ?
""")) {
stmt.setLong(1, id.id());
long storageId;
long baseId;
String path;
String description;
FileStorageType type;
try (var rs = stmt.executeQuery()) {
if (rs.next()) {
baseId = rs.getLong("BASE_ID");
storageId = rs.getLong("ID");
type = FileStorageType.valueOf(rs.getString("TYPE"));
path = rs.getString("PATH");
description = rs.getString("DESCRIPTION");
}
else {
return null;
}
var base = getStorageBase(new FileStorageBaseId(baseId));
return new FileStorage(
new FileStorageId(storageId),
base,
type,
path,
description
);
}
}
}
}

View File

@ -0,0 +1,24 @@
package nu.marginalia.db.storage.model;
import java.nio.file.Path;
/**
* Represents a file storage area
*
* @param id the id of the storage in the database
* @param base the base of the storage
* @param type the type of data expected
* @param path the full path of the storage on disk
* @param description a description of the storage
*/
public record FileStorage(
FileStorageId id,
FileStorageBase base,
FileStorageType type,
String path,
String description)
{
public Path asPath() {
return Path.of(path);
}
}

View File

@ -0,0 +1,25 @@
package nu.marginalia.db.storage.model;
import java.nio.file.Path;
/**
* Represents a file storage base directory
*
* @param id the id of the storage base in the database
* @param type the type of the storage base
* @param name the name of the storage base
* @param path the path of the storage base
* @param mustClean if true, the storage is small and *must* be cleaned after use
* @param permitTemp if true, the storage may be used for temporary files
*/
public record FileStorageBase(FileStorageBaseId id,
FileStorageBaseType type,
String name,
String path,
boolean mustClean,
boolean permitTemp
) {
public Path asPath() {
return Path.of(path);
}
}

View File

@ -0,0 +1,3 @@
package nu.marginalia.db.storage.model;
public record FileStorageBaseId(long id) {}

View File

@ -0,0 +1,7 @@
package nu.marginalia.db.storage.model;
public enum FileStorageBaseType {
SSD_INDEX,
SSD_WORK,
SLOW
}

View File

@ -0,0 +1,3 @@
package nu.marginalia.db.storage.model;
public record FileStorageId(long id) {}

View File

@ -0,0 +1,11 @@
package nu.marginalia.db.storage.model;
public enum FileStorageType {
CRAWL_SPEC,
CRAWL_DATA,
PROCESSED_DATA,
INDEX_STAGING,
LEXICON_STAGING,
INDEX_LIVE,
LEXICON_LIVE
}

View File

@ -0,0 +1,35 @@
CREATE TABLE IF NOT EXISTS FILE_STORAGE_BASE (
ID BIGINT PRIMARY KEY AUTO_INCREMENT,
NAME VARCHAR(255) NOT NULL UNIQUE,
PATH VARCHAR(255) NOT NULL UNIQUE COMMENT 'The path to the storage base',
TYPE ENUM ('SSD_INDEX', 'SSD_WORK', 'SLOW') NOT NULL,
MUST_CLEAN BOOLEAN NOT NULL DEFAULT FALSE COMMENT 'If true, the storage must be cleaned after use',
PERMIT_TEMP BOOLEAN NOT NULL DEFAULT FALSE COMMENT 'If true, the storage can be used for temporary files'
)
CHARACTER SET utf8mb4
COLLATE utf8mb4_bin;
CREATE TABLE IF NOT EXISTS FILE_STORAGE (
ID BIGINT PRIMARY KEY AUTO_INCREMENT,
BASE_ID BIGINT NOT NULL,
PATH VARCHAR(255) NOT NULL COMMENT 'The path to the storage relative to the base',
DESCRIPTION VARCHAR(255) NOT NULL,
TYPE ENUM ('CRAWL_SPEC', 'CRAWL_DATA', 'PROCESSED_DATA', 'INDEX_STAGING', 'LEXICON_STAGING', 'INDEX_LIVE', 'LEXICON_LIVE') NOT NULL,
DO_PURGE BOOLEAN NOT NULL DEFAULT FALSE COMMENT 'If true, the storage may be cleaned',
CREATE_DATE TIMESTAMP(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6),
CONSTRAINT CONS UNIQUE (BASE_ID, PATH),
FOREIGN KEY (BASE_ID) REFERENCES FILE_STORAGE_BASE(ID) ON DELETE CASCADE
)
CHARACTER SET utf8mb4
COLLATE utf8mb4_bin;
CREATE VIEW FILE_STORAGE_VIEW
AS SELECT
CONCAT(BASE.PATH, '/', STORAGE.PATH) AS PATH,
STORAGE.TYPE AS TYPE,
DESCRIPTION AS DESCRIPTION,
CREATE_DATE AS CREATE_DATE,
STORAGE.ID AS ID,
BASE.ID AS BASE_ID
FROM FILE_STORAGE STORAGE
INNER JOIN FILE_STORAGE_BASE BASE ON STORAGE.BASE_ID=BASE.ID;

View File

@ -0,0 +1,155 @@
package nu.marginalia.db.storage;
import com.google.common.collect.Lists;
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import nu.marginalia.db.storage.model.FileStorageBaseType;
import nu.marginalia.db.storage.model.FileStorageType;
import org.junit.jupiter.api.*;
import org.junit.jupiter.api.parallel.Execution;
import org.testcontainers.containers.MariaDBContainer;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import static org.junit.Assert.*;
import static org.junit.jupiter.api.parallel.ExecutionMode.SAME_THREAD;
@Testcontainers
@Execution(SAME_THREAD)
@Tag("slow")
public class FileStorageServiceTest {
@Container
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
.withDatabaseName("WMSA_prod")
.withUsername("wmsa")
.withPassword("wmsa")
.withInitScript("sql/current/13-file-storage.sql")
.withNetworkAliases("mariadb");
static HikariDataSource dataSource;
static FileStorageService fileStorageService;
static List<Path> tempDirs = new ArrayList<>();
@BeforeAll
public static void setup() {
HikariConfig config = new HikariConfig();
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
config.setUsername("wmsa");
config.setPassword("wmsa");
dataSource = new HikariDataSource(config);
}
@BeforeEach
public void setupEach() {
fileStorageService = new FileStorageService(dataSource);
}
@AfterEach
public void tearDownEach() {
try (var conn = dataSource.getConnection();
var stmt = conn.createStatement()) {
stmt.execute("DELETE FROM FILE_STORAGE");
stmt.execute("DELETE FROM FILE_STORAGE_BASE");
} catch (SQLException e) {
throw new RuntimeException(e);
}
}
@AfterAll
public static void teardown() {
dataSource.close();
Lists.reverse(tempDirs).forEach(path -> {
try {
System.out.println("Deleting " + path);
Files.delete(path);
} catch (IOException e) {
e.printStackTrace();
}
});
}
private Path createTempDir() {
try {
Path dir = Files.createTempDirectory("file-storage-test");
tempDirs.add(dir);
return dir;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Test
public void testCreateBase() throws SQLException, FileNotFoundException {
String name = "test-" + UUID.randomUUID();
var storage = new FileStorageService(dataSource);
var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.SLOW, false, false);
Assertions.assertEquals(name, base.name());
Assertions.assertEquals(FileStorageBaseType.SLOW, base.type());
Assertions.assertFalse(base.mustClean());
Assertions.assertFalse(base.permitTemp());
}
@Test
public void testAllocateTempInNonPermitted() throws SQLException, FileNotFoundException {
String name = "test-" + UUID.randomUUID();
var storage = new FileStorageService(dataSource);
var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.SLOW, false, false);
try {
storage.allocateTemporaryStorage(base, FileStorageType.CRAWL_DATA, "xyz", "thisShouldFail");
fail();
}
catch (IllegalArgumentException ex) {} // ok
catch (Exception ex) {
ex.printStackTrace();
fail();
}
}
@Test
public void testAllocatePermanentInNonPermitted() throws SQLException, IOException {
String name = "test-" + UUID.randomUUID();
var storage = new FileStorageService(dataSource);
var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.SLOW, false, false);
var created = storage.allocatePermanentStorage(base, "xyz", FileStorageType.CRAWL_DATA, "thisShouldFail");
tempDirs.add(created.asPath());
var actual = storage.getStorage(created.id());
Assertions.assertEquals(created, actual);
}
@Test
public void testAllocateTempInPermitted() throws IOException, SQLException {
String name = "test-" + UUID.randomUUID();
var storage = new FileStorageService(dataSource);
var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.SLOW, false, true);
var fileStorage = storage.allocateTemporaryStorage(base, FileStorageType.CRAWL_DATA, "xyz", "thisShouldSucceed");
Assertions.assertTrue(Files.exists(fileStorage.asPath()));
tempDirs.add(fileStorage.asPath());
}
}