mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00

Look, this will make the git history look funny, but trimming unnecessary depth from the source tree is a very necessary sanity-preserving measure when dealing with a super-modularized codebase like this one. While it makes the project configuration a bit less conventional, it will save you several clicks every time you jump between modules. Which you'll do a lot, because it's *modul*ar. The src/main/java convention makes a lot of sense for a non-modular project though. This ain't that.
129 lines
4.4 KiB
Java
129 lines
4.4 KiB
Java
package nu.marginalia.rwf;
|
|
|
|
import nu.marginalia.array.LongArray;
|
|
import nu.marginalia.array.LongArrayFactory;
|
|
|
|
import java.io.IOException;
|
|
import java.nio.file.Files;
|
|
import java.nio.file.Path;
|
|
import java.nio.file.StandardCopyOption;
|
|
import java.nio.file.StandardOpenOption;
|
|
|
|
/** A RandomFileAssembler is a way to write a large file out of order
|
|
* in a way that is efficient for SSDs.
|
|
*/
|
|
public interface RandomFileAssembler extends AutoCloseable {
|
|
|
|
void put(long address, long data) throws IOException;
|
|
void write(Path file) throws IOException;
|
|
void close() throws IOException;
|
|
|
|
|
|
/** Select the appropriate RandomFileAssembler implementation based on
|
|
* the system configuration.
|
|
*/
|
|
static RandomFileAssembler create(Path workDir,
|
|
long totalSize) throws IOException {
|
|
// If the system is configured to conserve memory, we use temp files
|
|
if (Boolean.getBoolean("system.conserveMemory")) {
|
|
return ofTempFiles(workDir, totalSize);
|
|
}
|
|
|
|
// If the file is small, we use straight mmap
|
|
if (totalSize < 128_000_000) { // 128M longs = 1 GB
|
|
return ofMmap(workDir, totalSize);
|
|
}
|
|
|
|
// If the file is large, we use an in-memory buffer to avoid disk thrashing
|
|
return ofInMemoryAsssembly(totalSize);
|
|
|
|
}
|
|
|
|
|
|
/** Create a RandomFileAssembler that writes to a series of small files.
|
|
* This has negligible memory overhead, but is slower than in-memory
|
|
* or mmap for small files.
|
|
*/
|
|
static RandomFileAssembler ofTempFiles(Path workDir, long sizeInLongs) throws IOException {
|
|
|
|
return new RandomFileAssembler() {
|
|
private final RandomWriteFunnel funnel = new RandomWriteFunnel(workDir, 10_000_000);
|
|
@Override
|
|
public void put(long address, long data) throws IOException {
|
|
funnel.put(address, data);
|
|
}
|
|
|
|
@Override
|
|
public void write(Path file) throws IOException {
|
|
try (var channel = Files.newByteChannel(file, StandardOpenOption.WRITE, StandardOpenOption.CREATE)) {
|
|
funnel.write(channel);
|
|
|
|
// It's very likely we'll have overshot the size a bit, truncate to the correct size
|
|
channel.truncate(8 * sizeInLongs);
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public void close() throws IOException {
|
|
funnel.close();
|
|
}
|
|
};
|
|
}
|
|
|
|
/** Create a RandomFileAssembler that writes to a LongArray in memory. */
|
|
static RandomFileAssembler ofInMemoryAsssembly(long size) {
|
|
return new RandomFileAssembler() {
|
|
private final LongArray buffer = LongArrayFactory.onHeapConfined(size);
|
|
|
|
@Override
|
|
public void put(long address, long data) {
|
|
buffer.set(address, data);
|
|
}
|
|
|
|
@Override
|
|
public void write(Path file) throws IOException {
|
|
buffer.write(file);
|
|
}
|
|
|
|
@Override
|
|
public void close() {
|
|
buffer.close();
|
|
}
|
|
};
|
|
}
|
|
|
|
/** Create a RandomFileAssembler that writes to a file using mmap.
|
|
* This is the fastest method for small files, but has a large memory
|
|
* overhead and is slow for large files, where the OS will start pushing
|
|
* changes to disk continuously.
|
|
* */
|
|
static RandomFileAssembler ofMmap(Path destDir, long size) throws IOException {
|
|
return new RandomFileAssembler() {
|
|
private final Path workFile = Files.createTempFile(destDir, "mmap", ".dat");
|
|
private final LongArray buffer = LongArrayFactory.mmapForWritingConfined(workFile, size);
|
|
|
|
@Override
|
|
public void put(long address, long data) {
|
|
buffer.set(address, data);
|
|
}
|
|
|
|
@Override
|
|
public void write(Path dest) throws IOException {
|
|
buffer.force();
|
|
|
|
Files.move(workFile, dest,
|
|
StandardCopyOption.REPLACE_EXISTING,
|
|
StandardCopyOption.ATOMIC_MOVE);
|
|
}
|
|
|
|
@Override
|
|
public void close() throws IOException {
|
|
buffer.close();
|
|
|
|
// Catch the case where e.g. write() fails with an exception and workFile doesn't get moved
|
|
Files.deleteIfExists(workFile);
|
|
}
|
|
};
|
|
}
|
|
}
|