mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-22 20:48:59 +00:00
(slop) Break slop out into its own repository
This commit is contained in:
parent
fd2bad39f3
commit
623ee5570f
@ -22,7 +22,6 @@ dependencies {
|
||||
|
||||
implementation project(':code:libraries:array')
|
||||
implementation project(':code:libraries:btree')
|
||||
implementation project(':code:libraries:slop')
|
||||
implementation project(':code:libraries:coded-sequence')
|
||||
implementation project(':code:libraries:language-processing')
|
||||
|
||||
@ -41,6 +40,7 @@ dependencies {
|
||||
implementation project(':code:index:index-journal')
|
||||
|
||||
|
||||
implementation libs.slop
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.prometheus
|
||||
|
@ -15,7 +15,6 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
dependencies {
|
||||
implementation project(':code:libraries:array')
|
||||
implementation project(':code:libraries:btree')
|
||||
implementation project(':code:libraries:slop')
|
||||
implementation project(':code:libraries:coded-sequence')
|
||||
implementation project(':code:libraries:language-processing')
|
||||
implementation project(':code:index:query')
|
||||
@ -30,6 +29,7 @@ dependencies {
|
||||
implementation libs.roaringbitmap
|
||||
implementation libs.fastutil
|
||||
implementation libs.trove
|
||||
implementation libs.slop
|
||||
|
||||
testImplementation project(':code:libraries:test-helpers')
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
|
@ -86,7 +86,7 @@ public class ForwardIndexConverter {
|
||||
|
||||
ByteBuffer workArea = ByteBuffer.allocate(65536);
|
||||
for (var instance : journal.pages()) {
|
||||
try (var slopTable = new SlopTable())
|
||||
try (var slopTable = new SlopTable(instance.page()))
|
||||
{
|
||||
var docIdReader = instance.openCombinedId(slopTable);
|
||||
var metaReader = instance.openDocumentMeta(slopTable);
|
||||
@ -152,7 +152,7 @@ public class ForwardIndexConverter {
|
||||
Roaring64Bitmap rbm = new Roaring64Bitmap();
|
||||
|
||||
for (var instance : journalReader.pages()) {
|
||||
try (var slopTable = new SlopTable()) {
|
||||
try (var slopTable = new SlopTable(instance.page())) {
|
||||
LongColumnReader idReader = instance.openCombinedId(slopTable);
|
||||
|
||||
while (idReader.hasRemaining()) {
|
||||
|
@ -15,7 +15,6 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
dependencies {
|
||||
implementation project(':code:libraries:coded-sequence')
|
||||
implementation project(':code:libraries:array')
|
||||
implementation project(':code:libraries:slop')
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:processes:converting-process:model')
|
||||
implementation project(':third-party:parquet-floor')
|
||||
@ -23,6 +22,7 @@ dependencies {
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.slop
|
||||
implementation libs.prometheus
|
||||
implementation libs.notnull
|
||||
implementation libs.guava
|
||||
|
@ -1,5 +1,7 @@
|
||||
package nu.marginalia.index.journal;
|
||||
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
@ -25,12 +27,7 @@ public record IndexJournal(Path journalDir) {
|
||||
|
||||
/** Returns the number of versions of the journal file in the base directory. */
|
||||
public static int numPages(Path baseDirectory) {
|
||||
for (int version = 0; ; version++) {
|
||||
if (!IndexJournalPage.combinedId.forPage(version).exists(baseDirectory)) {
|
||||
return version;
|
||||
}
|
||||
}
|
||||
|
||||
return SlopTable.getNumPages(baseDirectory, IndexJournalPage.combinedId);
|
||||
}
|
||||
|
||||
public IndexJournal {
|
||||
|
@ -3,6 +3,7 @@ package nu.marginalia.index.journal;
|
||||
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayColumn;
|
||||
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayReader;
|
||||
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayWriter;
|
||||
import nu.marginalia.slop.ColumnTypes;
|
||||
import nu.marginalia.slop.column.array.ByteArrayColumnReader;
|
||||
import nu.marginalia.slop.column.array.ByteArrayColumnWriter;
|
||||
import nu.marginalia.slop.column.array.LongArrayColumnReader;
|
||||
@ -12,7 +13,6 @@ import nu.marginalia.slop.column.primitive.IntColumnWriter;
|
||||
import nu.marginalia.slop.column.primitive.LongColumnReader;
|
||||
import nu.marginalia.slop.column.primitive.LongColumnWriter;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
|
||||
@ -20,16 +20,16 @@ import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public record IndexJournalPage(Path baseDir, int page) {
|
||||
public static final ColumnDesc<IntColumnReader, IntColumnWriter> features = new ColumnDesc<>("features", ColumnType.INT_LE, StorageType.PLAIN);
|
||||
public static final ColumnDesc<IntColumnReader, IntColumnWriter> size = new ColumnDesc<>("size", ColumnType.INT_LE, StorageType.PLAIN);
|
||||
public static final ColumnDesc<LongColumnReader, LongColumnWriter> combinedId = new ColumnDesc<>("combinedId", ColumnType.LONG_LE, StorageType.PLAIN);
|
||||
public static final ColumnDesc<LongColumnReader, LongColumnWriter> documentMeta = new ColumnDesc<>("documentMeta", ColumnType.LONG_LE, StorageType.PLAIN);
|
||||
public static final ColumnDesc<IntColumnReader, IntColumnWriter> features = new ColumnDesc<>("features", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
public static final ColumnDesc<IntColumnReader, IntColumnWriter> size = new ColumnDesc<>("size", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
public static final ColumnDesc<LongColumnReader, LongColumnWriter> combinedId = new ColumnDesc<>("combinedId", ColumnTypes.LONG_LE, StorageType.PLAIN);
|
||||
public static final ColumnDesc<LongColumnReader, LongColumnWriter> documentMeta = new ColumnDesc<>("documentMeta", ColumnTypes.LONG_LE, StorageType.PLAIN);
|
||||
|
||||
public static final ColumnDesc<LongArrayColumnReader, LongArrayColumnWriter> termIds = new ColumnDesc<>("termIds", ColumnType.LONG_ARRAY_LE, StorageType.ZSTD);
|
||||
public static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> termMeta = new ColumnDesc<>("termMetadata", ColumnType.BYTE_ARRAY, StorageType.ZSTD);
|
||||
public static final ColumnDesc<LongArrayColumnReader, LongArrayColumnWriter> termIds = new ColumnDesc<>("termIds", ColumnTypes.LONG_ARRAY_LE, StorageType.ZSTD);
|
||||
public static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> termMeta = new ColumnDesc<>("termMetadata", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD);
|
||||
public static final ColumnDesc<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> positions = new ColumnDesc<>("termPositions", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD);
|
||||
|
||||
public static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> spanCodes = new ColumnDesc<>("spanCodes", ColumnType.BYTE_ARRAY, StorageType.ZSTD);
|
||||
public static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> spanCodes = new ColumnDesc<>("spanCodes", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD);
|
||||
public static final ColumnDesc<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> spans = new ColumnDesc<>("spans", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD);
|
||||
|
||||
public IndexJournalPage {
|
||||
@ -39,39 +39,39 @@ public record IndexJournalPage(Path baseDir, int page) {
|
||||
}
|
||||
|
||||
public LongColumnReader openCombinedId(SlopTable table) throws IOException {
|
||||
return combinedId.forPage(page).open(table, baseDir);
|
||||
return combinedId.open(table, baseDir);
|
||||
}
|
||||
|
||||
public LongColumnReader openDocumentMeta(SlopTable table) throws IOException {
|
||||
return documentMeta.forPage(page).open(table, baseDir);
|
||||
return documentMeta.open(table, baseDir);
|
||||
}
|
||||
|
||||
public IntColumnReader openFeatures(SlopTable table) throws IOException {
|
||||
return features.forPage(page).open(table, baseDir);
|
||||
return features.open(table, baseDir);
|
||||
}
|
||||
|
||||
public IntColumnReader openSize(SlopTable table) throws IOException {
|
||||
return size.forPage(page).open(table, baseDir);
|
||||
return size.open(table, baseDir);
|
||||
}
|
||||
|
||||
|
||||
public LongArrayColumnReader openTermIds(SlopTable table) throws IOException {
|
||||
return termIds.forPage(page).open(table, baseDir);
|
||||
return termIds.open(table, baseDir);
|
||||
}
|
||||
|
||||
public ByteArrayColumnReader openTermMetadata(SlopTable table) throws IOException {
|
||||
return termMeta.forPage(page).open(table, baseDir);
|
||||
return termMeta.open(table, baseDir);
|
||||
}
|
||||
|
||||
public GammaCodedSequenceArrayReader openTermPositions(SlopTable table) throws IOException {
|
||||
return positions.forPage(page).open(table, baseDir);
|
||||
return positions.open(table, baseDir);
|
||||
}
|
||||
|
||||
public GammaCodedSequenceArrayReader openSpans(SlopTable table) throws IOException {
|
||||
return spans.forPage(page).open(table, baseDir);
|
||||
return spans.open(table, baseDir);
|
||||
}
|
||||
|
||||
public ByteArrayColumnReader openSpanCodes(SlopTable table) throws IOException {
|
||||
return spanCodes.forPage(page).open(table, baseDir);
|
||||
return spanCodes.open(table, baseDir);
|
||||
}
|
||||
}
|
||||
|
@ -32,23 +32,25 @@ public class IndexJournalSlopWriter extends SlopTable {
|
||||
private static final MurmurHash3_128 hash = new MurmurHash3_128();
|
||||
|
||||
public IndexJournalSlopWriter(Path dir, int page) throws IOException {
|
||||
|
||||
super(page);
|
||||
|
||||
if (!Files.exists(dir)) {
|
||||
Files.createDirectory(dir);
|
||||
}
|
||||
|
||||
featuresWriter = IndexJournalPage.features.create(this, dir);
|
||||
sizeWriter = IndexJournalPage.size.create(this, dir);
|
||||
|
||||
featuresWriter = IndexJournalPage.features.forPage(page).create(this, dir);
|
||||
sizeWriter = IndexJournalPage.size.forPage(page).create(this, dir);
|
||||
combinedIdWriter = IndexJournalPage.combinedId.create(this, dir);
|
||||
documentMetaWriter = IndexJournalPage.documentMeta.create(this, dir);
|
||||
|
||||
combinedIdWriter = IndexJournalPage.combinedId.forPage(page).create(this, dir);
|
||||
documentMetaWriter = IndexJournalPage.documentMeta.forPage(page).create(this, dir);
|
||||
termIdsWriter = IndexJournalPage.termIds.create(this, dir);
|
||||
termMetadataWriter = IndexJournalPage.termMeta.create(this, dir);
|
||||
termPositionsWriter = IndexJournalPage.positions.create(this, dir);
|
||||
|
||||
termIdsWriter = IndexJournalPage.termIds.forPage(page).create(this, dir);
|
||||
termMetadataWriter = IndexJournalPage.termMeta.forPage(page).create(this, dir);
|
||||
termPositionsWriter = IndexJournalPage.positions.forPage(page).create(this, dir);
|
||||
|
||||
spanCodesWriter = IndexJournalPage.spanCodes.forPage(page).create(this, dir);
|
||||
spansWriter = IndexJournalPage.spans.forPage(page).create(this, dir);
|
||||
spanCodesWriter = IndexJournalPage.spanCodes.create(this, dir);
|
||||
spansWriter = IndexJournalPage.spans.create(this, dir);
|
||||
}
|
||||
|
||||
@SneakyThrows
|
||||
|
@ -16,7 +16,6 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
dependencies {
|
||||
implementation project(':code:libraries:array')
|
||||
implementation project(':code:libraries:btree')
|
||||
implementation project(':code:libraries:slop')
|
||||
implementation project(':code:libraries:coded-sequence')
|
||||
implementation project(':code:libraries:random-write-funnel')
|
||||
implementation project(':code:index:query')
|
||||
@ -31,6 +30,7 @@ dependencies {
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.slop
|
||||
implementation libs.fastutil
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
|
@ -77,7 +77,7 @@ public class FullPreindexDocuments {
|
||||
final ByteBuffer tempBuffer = ByteBuffer.allocate(1024*1024*100);
|
||||
|
||||
try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs);
|
||||
var slopTable = new SlopTable())
|
||||
var slopTable = new SlopTable(journalInstance.page()))
|
||||
{
|
||||
var docIds = journalInstance.openCombinedId(slopTable);
|
||||
var termIds = journalInstance.openTermIds(slopTable);
|
||||
|
@ -60,7 +60,7 @@ public class FullPreindexWordSegments {
|
||||
Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
|
||||
countsMap.defaultReturnValue(0);
|
||||
|
||||
try (var slopTable = new SlopTable()) {
|
||||
try (var slopTable = new SlopTable(journalInstance.page())) {
|
||||
var termIds = journalInstance.openTermIds(slopTable);
|
||||
while (termIds.hasRemaining()) {
|
||||
long[] tids = termIds.get();
|
||||
|
@ -65,7 +65,7 @@ public class PrioPreindexDocuments {
|
||||
long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize();
|
||||
|
||||
try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs);
|
||||
var slopTable = new SlopTable())
|
||||
var slopTable = new SlopTable(journalInstance.page()))
|
||||
{
|
||||
var docIds = journalInstance.openCombinedId(slopTable);
|
||||
var termIds = journalInstance.openTermIds(slopTable);
|
||||
|
@ -60,7 +60,7 @@ public class PrioPreindexWordSegments {
|
||||
Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
|
||||
countsMap.defaultReturnValue(0);
|
||||
|
||||
try (var slopTable = new SlopTable()) {
|
||||
try (var slopTable = new SlopTable(journalInstance.page())) {
|
||||
var termIds = journalInstance.openTermIds(slopTable);
|
||||
var termMetas = journalInstance.openTermMetadata(slopTable);
|
||||
|
||||
|
@ -14,7 +14,7 @@ apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
dependencies {
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation project(':code:libraries:slop')
|
||||
implementation libs.slop
|
||||
implementation libs.fastutil
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
|
@ -1,6 +1,7 @@
|
||||
package nu.marginalia.sequence.slop;
|
||||
|
||||
import nu.marginalia.sequence.GammaCodedSequence;
|
||||
import nu.marginalia.slop.ColumnTypes;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumn;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
|
||||
@ -19,13 +20,13 @@ import java.util.List;
|
||||
/** Slop column extension for storing GammaCodedSequence objects. */
|
||||
public class GammaCodedSequenceArrayColumn {
|
||||
|
||||
public static ColumnType<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> TYPE = ColumnType.register("s8[]+gcs[]", ByteOrder.nativeOrder(), GammaCodedSequenceArrayColumn::open, GammaCodedSequenceArrayColumn::create);
|
||||
public static ColumnType<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> TYPE = ColumnTypes.register("s8[]+gcs[]", ByteOrder.nativeOrder(), GammaCodedSequenceArrayColumn::open, GammaCodedSequenceArrayColumn::create);
|
||||
|
||||
public static GammaCodedSequenceArrayReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(columnDesc,
|
||||
GammaCodedSequenceColumn.open(path, columnDesc),
|
||||
VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.GROUP_LENGTH,
|
||||
ColumnType.VARINT_LE,
|
||||
ColumnTypes.VARINT_LE,
|
||||
StorageType.PLAIN)
|
||||
)
|
||||
);
|
||||
@ -35,7 +36,7 @@ public class GammaCodedSequenceArrayColumn {
|
||||
return new Writer(columnDesc,
|
||||
GammaCodedSequenceColumn.create(path, columnDesc),
|
||||
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.GROUP_LENGTH,
|
||||
ColumnType.VARINT_LE,
|
||||
ColumnTypes.VARINT_LE,
|
||||
StorageType.PLAIN)
|
||||
)
|
||||
);
|
||||
|
@ -1,6 +1,7 @@
|
||||
package nu.marginalia.sequence.slop;
|
||||
|
||||
import nu.marginalia.sequence.GammaCodedSequence;
|
||||
import nu.marginalia.slop.ColumnTypes;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumn;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
|
||||
@ -20,13 +21,13 @@ import java.nio.file.Path;
|
||||
/** Slop column extension for storing GammaCodedSequence objects. */
|
||||
public class GammaCodedSequenceColumn {
|
||||
|
||||
public static ColumnType<GammaCodedSequenceReader, GammaCodedSequenceWriter> TYPE = ColumnType.register("s8[]+gcs", ByteOrder.nativeOrder(), GammaCodedSequenceColumn::open, GammaCodedSequenceColumn::create);
|
||||
public static ColumnType<GammaCodedSequenceReader, GammaCodedSequenceWriter> TYPE = ColumnTypes.register("s8[]+gcs", ByteOrder.nativeOrder(), GammaCodedSequenceColumn::open, GammaCodedSequenceColumn::create);
|
||||
|
||||
public static GammaCodedSequenceReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(columnDesc,
|
||||
Storage.reader(path, columnDesc, false), // note we must never pass aligned=true here, as the data is not guaranteed alignment
|
||||
VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN,
|
||||
ColumnType.VARINT_LE,
|
||||
ColumnTypes.VARINT_LE,
|
||||
StorageType.PLAIN)
|
||||
)
|
||||
);
|
||||
@ -36,7 +37,7 @@ public class GammaCodedSequenceColumn {
|
||||
return new Writer(columnDesc,
|
||||
Storage.writer(path, columnDesc),
|
||||
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN,
|
||||
ColumnType.VARINT_LE,
|
||||
ColumnTypes.VARINT_LE,
|
||||
StorageType.PLAIN)
|
||||
)
|
||||
);
|
||||
|
@ -1,83 +0,0 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id 'application'
|
||||
id 'org.graalvm.buildtools.native' version '0.10.2'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
sourceSets {
|
||||
main {
|
||||
java {
|
||||
srcDirs = [
|
||||
'java',
|
||||
]
|
||||
}
|
||||
resources {
|
||||
srcDirs = [ 'resources' ]
|
||||
}
|
||||
}
|
||||
test {
|
||||
java {
|
||||
srcDirs = [ 'test' ]
|
||||
}
|
||||
resources {
|
||||
srcDirs = [ 'test-resources' ]
|
||||
}
|
||||
}
|
||||
demo {
|
||||
java {
|
||||
srcDirs = [ 'demo' ]
|
||||
}
|
||||
resources {
|
||||
srcDirs = [ 'demo-resources' ]
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
application {
|
||||
mainClass = 'demo.OneBillionRowsDemo'
|
||||
}
|
||||
|
||||
graalvmNative {
|
||||
binaries.all {
|
||||
resources.autodetect()
|
||||
buildArgs=['-H:+ForeignAPISupport', '-H:+UnlockExperimentalVMOptions']
|
||||
|
||||
}
|
||||
|
||||
toolchainDetection = false
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.notnull
|
||||
implementation libs.commons.lang3
|
||||
implementation libs.lz4
|
||||
implementation libs.commons.compress
|
||||
implementation libs.zstd
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
|
||||
demoImplementation sourceSets.main.output
|
||||
demoImplementation libs.bundles.slf4j
|
||||
demoImplementation libs.notnull
|
||||
demoImplementation libs.commons.lang3
|
||||
demoImplementation libs.lz4
|
||||
demoImplementation libs.commons.compress
|
||||
demoImplementation libs.zstd
|
||||
demoImplementation libs.duckdb
|
||||
}
|
||||
|
||||
test {
|
||||
useJUnitPlatform()
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
package nu.marginalia.slop.column;
|
||||
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface ColumnReader {
|
||||
|
||||
ColumnDesc<?, ?> columnDesc();
|
||||
|
||||
long position() throws IOException;
|
||||
void skip(long positions) throws IOException;
|
||||
|
||||
boolean hasRemaining() throws IOException;
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
package nu.marginalia.slop.column;
|
||||
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface ColumnWriter {
|
||||
ColumnDesc<?, ?> columnDesc();
|
||||
|
||||
/** Return the current record index in the column */
|
||||
long position();
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
package nu.marginalia.slop.column;
|
||||
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.function.Predicate;
|
||||
|
||||
public interface ObjectColumnReader<T> extends ColumnReader {
|
||||
|
||||
ColumnDesc<?, ?> columnDesc();
|
||||
|
||||
T get() throws IOException;
|
||||
|
||||
default boolean search(T value) throws IOException {
|
||||
while (hasRemaining()) {
|
||||
if (get().equals(value)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
default boolean search(Predicate<T> test) throws IOException {
|
||||
while (hasRemaining()) {
|
||||
if (test.test(get())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
long position() throws IOException;
|
||||
void skip(long positions) throws IOException;
|
||||
|
||||
boolean hasRemaining() throws IOException;
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
package nu.marginalia.slop.column;
|
||||
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface ObjectColumnWriter<T> extends ColumnWriter {
|
||||
ColumnDesc<?, ?> columnDesc();
|
||||
|
||||
void put(T value) throws IOException;
|
||||
|
||||
/** Return the current record index in the column */
|
||||
long position();
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,125 +0,0 @@
|
||||
package nu.marginalia.slop.column.array;
|
||||
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumn;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnFunction;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
import nu.marginalia.slop.storage.Storage;
|
||||
import nu.marginalia.slop.storage.StorageReader;
|
||||
import nu.marginalia.slop.storage.StorageWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class ByteArrayColumn {
|
||||
|
||||
public static ByteArrayColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(
|
||||
columnDesc,
|
||||
Storage.reader(path, columnDesc, true),
|
||||
VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN))
|
||||
);
|
||||
}
|
||||
|
||||
public static ByteArrayColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Writer(
|
||||
columnDesc,
|
||||
Storage.writer(path, columnDesc),
|
||||
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN))
|
||||
);
|
||||
}
|
||||
|
||||
public static ObjectArrayColumnReader<byte[]> openNested(Path path, ColumnDesc desc) throws IOException {
|
||||
return ObjectArrayColumn.open(path, desc, open(path, desc));
|
||||
}
|
||||
|
||||
public static ObjectArrayColumnWriter<byte[]> createNested(Path path, ColumnDesc desc) throws IOException {
|
||||
return ObjectArrayColumn.create(path, desc, create(path, desc));
|
||||
}
|
||||
|
||||
private static class Writer implements ByteArrayColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter storage;
|
||||
private final VarintColumnWriter lengthsWriter;
|
||||
|
||||
private long position = 0;
|
||||
|
||||
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storage;
|
||||
this.lengthsWriter = lengthsWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(byte[] value) throws IOException {
|
||||
position ++;
|
||||
storage.putBytes(value);
|
||||
lengthsWriter.put(value.length);
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
lengthsWriter.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader implements ByteArrayColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader storage;
|
||||
private final VarintColumnReader lengthsReader;
|
||||
|
||||
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage, VarintColumnReader lengthsReader) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storage;
|
||||
this.lengthsReader = lengthsReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public byte[] get() throws IOException {
|
||||
int length = lengthsReader.get();
|
||||
byte[] ret = new byte[length];
|
||||
storage.getBytes(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return lengthsReader.position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
for (int i = 0; i < positions; i++) {
|
||||
int size = lengthsReader.get();
|
||||
storage.skip(size, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return lengthsReader.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
lengthsReader.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
package nu.marginalia.slop.column.array;
|
||||
|
||||
import nu.marginalia.slop.column.ObjectColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface ByteArrayColumnReader extends ObjectColumnReader<byte[]>, AutoCloseable {
|
||||
byte[] get() throws IOException;
|
||||
void close() throws IOException;
|
||||
|
||||
|
||||
@Override
|
||||
long position() throws IOException;
|
||||
|
||||
@Override
|
||||
void skip(long positions) throws IOException;
|
||||
|
||||
@Override
|
||||
boolean hasRemaining() throws IOException;
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
package nu.marginalia.slop.column.array;
|
||||
|
||||
import nu.marginalia.slop.column.ObjectColumnWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface ByteArrayColumnWriter extends ObjectColumnWriter<byte[]>, AutoCloseable {
|
||||
void put(byte[] value) throws IOException;
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,120 +0,0 @@
|
||||
package nu.marginalia.slop.column.array;
|
||||
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumn;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnFunction;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
import nu.marginalia.slop.storage.Storage;
|
||||
import nu.marginalia.slop.storage.StorageReader;
|
||||
import nu.marginalia.slop.storage.StorageWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class IntArrayColumn {
|
||||
|
||||
public static IntArrayColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(columnDesc,
|
||||
Storage.reader(path, columnDesc, true),
|
||||
VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN))
|
||||
);
|
||||
}
|
||||
|
||||
public static IntArrayColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Writer(columnDesc,
|
||||
Storage.writer(path, columnDesc),
|
||||
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN))
|
||||
);
|
||||
}
|
||||
|
||||
public static ObjectArrayColumnReader<int[]> openNested(Path path, ColumnDesc desc) throws IOException {
|
||||
return ObjectArrayColumn.open(path, desc, open(path, desc));
|
||||
}
|
||||
|
||||
public static ObjectArrayColumnWriter<int[]> createNested(Path path, ColumnDesc desc) throws IOException {
|
||||
return ObjectArrayColumn.create(path, desc, create(path, desc));
|
||||
}
|
||||
|
||||
private static class Writer implements IntArrayColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter storage;
|
||||
private final VarintColumnWriter lengthsWriter;
|
||||
|
||||
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storage;
|
||||
this.lengthsWriter = lengthsWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(int[] value) throws IOException {
|
||||
storage.putInts(value);
|
||||
lengthsWriter.put(value.length);
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return lengthsWriter.position();
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
lengthsWriter.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader implements IntArrayColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader storage;
|
||||
private final VarintColumnReader lengthsReader;
|
||||
|
||||
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage, VarintColumnReader lengthsReader) {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storage;
|
||||
this.lengthsReader = lengthsReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public int[] get() throws IOException {
|
||||
int length = (int) lengthsReader.get();
|
||||
int[] ret = new int[length];
|
||||
storage.getInts(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return lengthsReader.position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
for (int i = 0; i < positions; i++) {
|
||||
int size = (int) lengthsReader.get();
|
||||
storage.skip(size, Integer.BYTES);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return lengthsReader.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
lengthsReader.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
package nu.marginalia.slop.column.array;
|
||||
|
||||
import nu.marginalia.slop.column.ObjectColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface IntArrayColumnReader extends ObjectColumnReader<int[]>, AutoCloseable {
|
||||
int[] get() throws IOException;
|
||||
void close() throws IOException;
|
||||
|
||||
|
||||
@Override
|
||||
long position() throws IOException;
|
||||
|
||||
@Override
|
||||
void skip(long positions) throws IOException;
|
||||
|
||||
@Override
|
||||
boolean hasRemaining() throws IOException;
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
package nu.marginalia.slop.column.array;
|
||||
|
||||
import nu.marginalia.slop.column.ObjectColumnWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface IntArrayColumnWriter extends ObjectColumnWriter<int[]>, AutoCloseable {
|
||||
void put(int[] value) throws IOException;
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,122 +0,0 @@
|
||||
package nu.marginalia.slop.column.array;
|
||||
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumn;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnFunction;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
import nu.marginalia.slop.storage.Storage;
|
||||
import nu.marginalia.slop.storage.StorageReader;
|
||||
import nu.marginalia.slop.storage.StorageWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class LongArrayColumn {
|
||||
|
||||
public static LongArrayColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new LongArrayColumn.Reader(
|
||||
columnDesc,
|
||||
Storage.reader(path, columnDesc, true),
|
||||
VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN))
|
||||
);
|
||||
}
|
||||
|
||||
public static LongArrayColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new LongArrayColumn.Writer(
|
||||
columnDesc,
|
||||
Storage.writer(path, columnDesc),
|
||||
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN, ColumnType.VARINT_LE, StorageType.PLAIN))
|
||||
);
|
||||
}
|
||||
|
||||
public static ObjectArrayColumnReader<long[]> openNested(Path path, ColumnDesc desc) throws IOException {
|
||||
return ObjectArrayColumn.open(path, desc, open(path, desc));
|
||||
}
|
||||
|
||||
public static ObjectArrayColumnWriter<long[]> createNested(Path path, ColumnDesc desc) throws IOException {
|
||||
return ObjectArrayColumn.create(path, desc, create(path, desc));
|
||||
}
|
||||
|
||||
private static class Writer implements LongArrayColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter storage;
|
||||
private final VarintColumnWriter lengthsWriter;
|
||||
|
||||
public Writer(ColumnDesc<?,?> columnDesc, StorageWriter storage, VarintColumnWriter lengthsWriter) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storage;
|
||||
this.lengthsWriter = lengthsWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(long[] value) throws IOException {
|
||||
storage.putLongs(value);
|
||||
lengthsWriter.put(value.length);
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return lengthsWriter.position();
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
lengthsWriter.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader implements LongArrayColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader storage;
|
||||
private final VarintColumnReader lengthsReader;
|
||||
|
||||
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage, VarintColumnReader lengthsReader) {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storage;
|
||||
this.lengthsReader = lengthsReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public long[] get() throws IOException {
|
||||
int length = (int) lengthsReader.get();
|
||||
long[] ret = new long[length];
|
||||
storage.getLongs(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return lengthsReader.position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
for (int i = 0; i < positions; i++) {
|
||||
int size = (int) lengthsReader.get();
|
||||
storage.skip(size, Long.BYTES);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return lengthsReader.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
lengthsReader.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
package nu.marginalia.slop.column.array;
|
||||
|
||||
|
||||
import nu.marginalia.slop.column.ObjectColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface LongArrayColumnReader extends ObjectColumnReader<long[]>, AutoCloseable {
|
||||
long[] get() throws IOException;
|
||||
void close() throws IOException;
|
||||
|
||||
|
||||
@Override
|
||||
long position() throws IOException;
|
||||
|
||||
@Override
|
||||
void skip(long positions) throws IOException;
|
||||
|
||||
@Override
|
||||
boolean hasRemaining() throws IOException;
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
package nu.marginalia.slop.column.array;
|
||||
|
||||
import nu.marginalia.slop.column.ObjectColumnWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface LongArrayColumnWriter extends ObjectColumnWriter<long[]>, AutoCloseable {
|
||||
void put(long[] value) throws IOException;
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,118 +0,0 @@
|
||||
package nu.marginalia.slop.column.array;
|
||||
|
||||
import nu.marginalia.slop.column.ObjectColumnReader;
|
||||
import nu.marginalia.slop.column.ObjectColumnWriter;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumn;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnFunction;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class ObjectArrayColumn {
|
||||
public static <T> ObjectArrayColumnReader<T> open(Path baseDir,
|
||||
ColumnDesc<ObjectArrayColumnReader<T>, ObjectArrayColumnWriter<T>> selfType,
|
||||
ObjectColumnReader<T> baseReader) throws IOException {
|
||||
return new Reader<>(selfType, baseReader,
|
||||
VarintColumn.open(baseDir, selfType.createSupplementaryColumn(ColumnFunction.GROUP_LENGTH, ColumnType.VARINT_LE, StorageType.PLAIN)));
|
||||
}
|
||||
|
||||
public static <T> ObjectArrayColumnWriter<T> create(Path baseDir,
|
||||
ColumnDesc<ObjectArrayColumnReader<T>, ObjectArrayColumnWriter<T>> selfType,
|
||||
ObjectColumnWriter<T> baseWriter) throws IOException {
|
||||
return new Writer<T>(selfType,
|
||||
baseWriter,
|
||||
VarintColumn.create(baseDir, selfType.createSupplementaryColumn(ColumnFunction.GROUP_LENGTH, ColumnType.VARINT_LE, StorageType.PLAIN)));
|
||||
}
|
||||
|
||||
|
||||
private static class Writer<T> implements ObjectArrayColumnWriter<T> {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final ObjectColumnWriter<T> dataWriter;
|
||||
private final VarintColumnWriter groupsWriter;
|
||||
|
||||
public Writer(ColumnDesc<?, ?> columnDesc, ObjectColumnWriter<T> dataWriter, VarintColumnWriter groupsWriter) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.dataWriter = dataWriter;
|
||||
this.groupsWriter = groupsWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(List<T> value) throws IOException {
|
||||
groupsWriter.put(value.size());
|
||||
for (T t : value) {
|
||||
dataWriter.put(t);
|
||||
}
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return groupsWriter.position();
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
dataWriter.close();
|
||||
groupsWriter.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader<T> implements ObjectArrayColumnReader<T> {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final ObjectColumnReader<T> dataReader;
|
||||
private final VarintColumnReader groupsReader;
|
||||
|
||||
public Reader(ColumnDesc<?, ?> columnDesc, ObjectColumnReader<T> dataReader, VarintColumnReader groupsReader) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.dataReader = dataReader;
|
||||
this.groupsReader = groupsReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public List<T> get() throws IOException {
|
||||
int length = groupsReader.get();
|
||||
List<T> ret = new ArrayList<>(length);
|
||||
for (int i = 0; i < length; i++) {
|
||||
ret.add(dataReader.get());
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return groupsReader.position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
int toSkip = 0;
|
||||
for (int i = 0; i < positions; i++) {
|
||||
toSkip += groupsReader.get();
|
||||
}
|
||||
dataReader.skip(toSkip);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return groupsReader.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
dataReader.close();
|
||||
groupsReader.close();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
package nu.marginalia.slop.column.array;
|
||||
|
||||
import nu.marginalia.slop.column.ObjectColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
public interface ObjectArrayColumnReader<T> extends ObjectColumnReader<List<T>>, AutoCloseable {
|
||||
List<T> get() throws IOException;
|
||||
void close() throws IOException;
|
||||
|
||||
|
||||
@Override
|
||||
long position() throws IOException;
|
||||
|
||||
@Override
|
||||
void skip(long positions) throws IOException;
|
||||
|
||||
@Override
|
||||
boolean hasRemaining() throws IOException;
|
||||
}
|
@ -1,12 +0,0 @@
|
||||
package nu.marginalia.slop.column.array;
|
||||
|
||||
import nu.marginalia.slop.column.ObjectColumnWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
public interface ObjectArrayColumnWriter<T> extends ObjectColumnWriter<List<T>>, AutoCloseable {
|
||||
void put(List<T> values) throws IOException;
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,148 +0,0 @@
|
||||
package nu.marginalia.slop.column.dynamic;
|
||||
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnFunction;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
import nu.marginalia.slop.storage.Storage;
|
||||
import nu.marginalia.slop.storage.StorageReader;
|
||||
import nu.marginalia.slop.storage.StorageWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class CustomBinaryColumn {
|
||||
|
||||
public static CustomBinaryColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(
|
||||
columnDesc,
|
||||
Storage.reader(path, columnDesc, false), // note we must never pass aligned=true here, as the data is not guaranteed alignment
|
||||
VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN,
|
||||
ColumnType.VARINT_LE,
|
||||
StorageType.PLAIN)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
public static CustomBinaryColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Writer(
|
||||
columnDesc,
|
||||
Storage.writer(path, columnDesc),
|
||||
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN,
|
||||
ColumnType.VARINT_LE,
|
||||
StorageType.PLAIN)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
private static class Writer implements CustomBinaryColumnWriter {
|
||||
private final VarintColumnWriter indexWriter;
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter storage;
|
||||
|
||||
public Writer(ColumnDesc<?, ?> columnDesc,
|
||||
StorageWriter storage,
|
||||
VarintColumnWriter indexWriter)
|
||||
{
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storage;
|
||||
this.indexWriter = indexWriter;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public RecordWriter next() throws IOException {
|
||||
return new RecordWriter() {
|
||||
long pos = storage.position();
|
||||
|
||||
@Override
|
||||
public StorageWriter writer() {
|
||||
return storage;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
indexWriter.put((int) (storage.position() - pos));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return indexWriter.position();
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
indexWriter.close();
|
||||
storage.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader implements CustomBinaryColumnReader {
|
||||
private final VarintColumnReader indexReader;
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader storage;
|
||||
|
||||
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader reader, VarintColumnReader indexReader) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = reader;
|
||||
this.indexReader = indexReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
for (int i = 0; i < positions; i++) {
|
||||
int size = (int) indexReader.get();
|
||||
storage.skip(size, 1);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return indexReader.hasRemaining();
|
||||
}
|
||||
|
||||
public long position() throws IOException {
|
||||
return indexReader.position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public RecordReader next() throws IOException {
|
||||
int size = (int) indexReader.get();
|
||||
|
||||
return new RecordReader() {
|
||||
long origPos = storage.position();
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public StorageReader reader() {
|
||||
return storage;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
assert storage.position() - origPos == size : "column reader caller did not read the entire record";
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
indexReader.close();
|
||||
storage.close();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
package nu.marginalia.slop.column.dynamic;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnReader;
|
||||
import nu.marginalia.slop.storage.StorageReader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface CustomBinaryColumnReader extends ColumnReader, AutoCloseable {
|
||||
RecordReader next() throws IOException;
|
||||
void close() throws IOException;
|
||||
|
||||
interface RecordReader extends AutoCloseable {
|
||||
int size();
|
||||
StorageReader reader();
|
||||
void close() throws IOException;
|
||||
}
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
package nu.marginalia.slop.column.dynamic;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnWriter;
|
||||
import nu.marginalia.slop.storage.StorageWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface CustomBinaryColumnWriter extends ColumnWriter {
|
||||
RecordWriter next() throws IOException;
|
||||
void close() throws IOException;
|
||||
|
||||
interface RecordWriter extends AutoCloseable {
|
||||
StorageWriter writer();
|
||||
void close() throws IOException;
|
||||
}
|
||||
}
|
@ -1,318 +0,0 @@
|
||||
package nu.marginalia.slop.column.dynamic;
|
||||
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.storage.Storage;
|
||||
import nu.marginalia.slop.storage.StorageReader;
|
||||
import nu.marginalia.slop.storage.StorageWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class VarintColumn {
|
||||
|
||||
public static VarintColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
if (columnDesc.byteOrder() == ByteOrder.BIG_ENDIAN) {
|
||||
return new ReaderBE(columnDesc, Storage.reader(path, columnDesc, true));
|
||||
}
|
||||
else {
|
||||
return new ReaderLE(columnDesc, Storage.reader(path, columnDesc, true));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static VarintColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
if (columnDesc.byteOrder() == ByteOrder.BIG_ENDIAN) {
|
||||
return new WriterBE(columnDesc, Storage.writer(path, columnDesc));
|
||||
} else {
|
||||
return new WriterLE(columnDesc, Storage.writer(path, columnDesc));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static class WriterBE implements VarintColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter writer;
|
||||
private long position = 0;
|
||||
|
||||
public WriterBE(ColumnDesc<?,?> columnDesc, StorageWriter writer) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.writer = writer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(long value) throws IOException {
|
||||
position++;
|
||||
|
||||
while ((value & ~0x7F) != 0) {
|
||||
writer.putByte((byte) (0x80 | (value & 0x7F)));
|
||||
value >>>= 7;
|
||||
}
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
|
||||
public void put(long[] values) throws IOException {
|
||||
for (long val : values) {
|
||||
put(val);
|
||||
}
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
writer.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class WriterLE implements VarintColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter writer;
|
||||
private long position = 0;
|
||||
|
||||
public WriterLE(ColumnDesc<?,?> columnDesc, StorageWriter writer) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.writer = writer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(long value) throws IOException {
|
||||
position++;
|
||||
|
||||
if (value < 0)
|
||||
throw new IllegalArgumentException("Value must be positive");
|
||||
|
||||
if (value < (1<<7)) {
|
||||
writer.putByte((byte) value);
|
||||
}
|
||||
else if (value < (1<<14)) {
|
||||
writer.putByte((byte) (value >>> (7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
else if (value < (1<<21)) {
|
||||
writer.putByte((byte) ((value >>> 14) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
else if (value < (1<<28)) {
|
||||
writer.putByte((byte) ((value >>> 21) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 14) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
else if (value < (1L<<35)) {
|
||||
writer.putByte((byte) ((value >>> 28) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 21) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 14) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
else if (value < (1L<<42)) {
|
||||
writer.putByte((byte) ((value >>> 35) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 28) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 21) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 14) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
else if (value < (1L<<49)) {
|
||||
writer.putByte((byte) ((value >>> 42) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 35) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 28) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 21) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 14) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
else if (value < (1L<<56)) {
|
||||
writer.putByte((byte) ((value >>> 49) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 42) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 35) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 28) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 21) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 14) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
else {
|
||||
writer.putByte((byte) ((value >>> 56) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 49) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 42) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 35) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 28) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 21) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 14) | 0x80));
|
||||
writer.putByte((byte) ((value >>> 7) | 0x80));
|
||||
writer.putByte((byte) (value & 0x7F));
|
||||
}
|
||||
}
|
||||
|
||||
public void put(long[] values) throws IOException {
|
||||
for (long val : values) {
|
||||
put(val);
|
||||
}
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
writer.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class ReaderBE implements VarintColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader reader;
|
||||
|
||||
private long position = 0;
|
||||
|
||||
public ReaderBE(ColumnDesc<?,?> columnDesc, StorageReader reader) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.reader = reader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public int get() throws IOException {
|
||||
int value = 0;
|
||||
int shift = 0;
|
||||
byte b;
|
||||
|
||||
do {
|
||||
b = reader.getByte();
|
||||
value |= (b & 0x7F) << shift;
|
||||
shift += 7;
|
||||
} while ((b & 0x80) != 0);
|
||||
|
||||
position++;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
public long getLong() throws IOException {
|
||||
long value = 0;
|
||||
int shift = 0;
|
||||
byte b;
|
||||
|
||||
do {
|
||||
b = reader.getByte();
|
||||
value |= (long) (b & 0x7F) << shift;
|
||||
shift += 7;
|
||||
} while ((b & 0x80) != 0);
|
||||
|
||||
position++;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
for (long i = 0; i < positions; i++) {
|
||||
get();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return reader.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class ReaderLE implements VarintColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader reader;
|
||||
|
||||
private long position = 0;
|
||||
|
||||
public ReaderLE(ColumnDesc<?,?> columnDesc, StorageReader reader) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.reader = reader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public int get() throws IOException {
|
||||
position++;
|
||||
|
||||
byte b = reader.getByte();
|
||||
if ((b & 0x80) == 0) {
|
||||
return b;
|
||||
}
|
||||
|
||||
int value = b & 0x7F;
|
||||
do {
|
||||
b = reader.getByte();
|
||||
value = (value << 7) | (b & 0x7F);
|
||||
} while ((b & 0x80) != 0);
|
||||
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
public long getLong() throws IOException {
|
||||
position++;
|
||||
|
||||
byte b = reader.getByte();
|
||||
if ((b & 0x80) == 0) {
|
||||
return b;
|
||||
}
|
||||
|
||||
long value = b & 0x7F;
|
||||
do {
|
||||
b = reader.getByte();
|
||||
value = value << 7 | (b & 0x7F);
|
||||
} while ((b & 0x80) != 0);
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
for (long i = 0; i < positions; i++) {
|
||||
get();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return reader.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
reader.close();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
package nu.marginalia.slop.column.dynamic;
|
||||
|
||||
import nu.marginalia.slop.column.primitive.IntColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface VarintColumnReader extends IntColumnReader {
|
||||
|
||||
int get() throws IOException;
|
||||
long getLong() throws IOException;
|
||||
|
||||
@Override
|
||||
long position() throws IOException;
|
||||
|
||||
@Override
|
||||
void skip(long positions) throws IOException;
|
||||
|
||||
@Override
|
||||
boolean hasRemaining() throws IOException;
|
||||
}
|
@ -1,6 +0,0 @@
|
||||
package nu.marginalia.slop.column.dynamic;
|
||||
|
||||
import nu.marginalia.slop.column.primitive.LongColumnWriter;
|
||||
|
||||
public interface VarintColumnWriter extends LongColumnWriter {
|
||||
}
|
@ -1,88 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.storage.Storage;
|
||||
import nu.marginalia.slop.storage.StorageReader;
|
||||
import nu.marginalia.slop.storage.StorageWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class ByteColumn {
|
||||
|
||||
public static ByteColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
|
||||
}
|
||||
|
||||
public static ByteColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Writer(columnDesc, Storage.writer(path, columnDesc));
|
||||
}
|
||||
|
||||
private static class Writer implements ByteColumnWriter {
|
||||
private final ColumnDesc columnDesc;
|
||||
private final StorageWriter storage;
|
||||
private long position = 0;
|
||||
|
||||
public Writer(ColumnDesc columnDesc, StorageWriter storageWriter) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storageWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(byte value) throws IOException {
|
||||
storage.putByte(value);
|
||||
position++;
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader implements ByteColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader storage;
|
||||
|
||||
public Reader(ColumnDesc<?,?> columnDesc, StorageReader storage) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storage;
|
||||
}
|
||||
|
||||
public byte get() throws IOException {
|
||||
return storage.getByte();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return storage.position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
storage.skip(positions, Byte.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return storage.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface ByteColumnReader extends ColumnReader, AutoCloseable {
|
||||
byte get() throws IOException;
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface ByteColumnWriter extends ColumnWriter, AutoCloseable {
|
||||
void put(byte value) throws IOException;
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,89 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.storage.Storage;
|
||||
import nu.marginalia.slop.storage.StorageReader;
|
||||
import nu.marginalia.slop.storage.StorageWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class CharColumn {
|
||||
|
||||
public static CharColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
|
||||
}
|
||||
|
||||
public static CharColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Writer(columnDesc, Storage.writer(path, columnDesc));
|
||||
}
|
||||
|
||||
private static class Writer implements CharColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter storage;
|
||||
private long position = 0;
|
||||
|
||||
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storageWriter) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storageWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(char value) throws IOException {
|
||||
storage.putChar(value);
|
||||
position++;
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader implements CharColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader storage;
|
||||
|
||||
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storage;
|
||||
}
|
||||
|
||||
public char get() throws IOException {
|
||||
return storage.getChar();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return storage.position() / Character.BYTES;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
storage.skip(positions, Character.BYTES);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return storage.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface CharColumnReader extends ColumnReader, AutoCloseable {
|
||||
char get() throws IOException;
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface CharColumnWriter extends ColumnWriter, AutoCloseable {
|
||||
void put(char value) throws IOException;
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,88 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.storage.Storage;
|
||||
import nu.marginalia.slop.storage.StorageReader;
|
||||
import nu.marginalia.slop.storage.StorageWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class DoubleColumn {
|
||||
|
||||
public static DoubleColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
|
||||
}
|
||||
|
||||
public static DoubleColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Writer(columnDesc, Storage.writer(path, columnDesc));
|
||||
}
|
||||
|
||||
private static class Writer implements DoubleColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter storage;
|
||||
private long position = 0;
|
||||
|
||||
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storageWriter) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storageWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(double value) throws IOException {
|
||||
storage.putDouble(value);
|
||||
position++;
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader implements DoubleColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader storage;
|
||||
|
||||
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storage;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public double get() throws IOException {
|
||||
return storage.getDouble();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return storage.position() / Double.BYTES;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
storage.skip(positions, Double.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return storage.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface DoubleColumnReader extends ColumnReader, AutoCloseable {
|
||||
double get() throws IOException;
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface DoubleColumnWriter extends ColumnWriter, AutoCloseable {
|
||||
void put(double value) throws IOException;
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,89 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.storage.Storage;
|
||||
import nu.marginalia.slop.storage.StorageReader;
|
||||
import nu.marginalia.slop.storage.StorageWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class FloatColumn {
|
||||
|
||||
public static FloatColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
|
||||
}
|
||||
|
||||
public static FloatColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Writer(columnDesc, Storage.writer(path, columnDesc));
|
||||
}
|
||||
|
||||
|
||||
private static class Writer implements FloatColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter storage;
|
||||
private long position = 0;
|
||||
|
||||
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storageWriter) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storageWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(float value) throws IOException {
|
||||
storage.putFloat(value);
|
||||
position++;
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader implements FloatColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader storage;
|
||||
|
||||
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storage;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public float get() throws IOException {
|
||||
return storage.getFloat();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return storage.position() / Float.BYTES;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
storage.skip(positions, Float.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return storage.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface FloatColumnReader extends ColumnReader, AutoCloseable {
|
||||
float get() throws IOException;
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface FloatColumnWriter extends ColumnWriter, AutoCloseable {
|
||||
void put(float value) throws IOException;
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,95 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.storage.Storage;
|
||||
import nu.marginalia.slop.storage.StorageReader;
|
||||
import nu.marginalia.slop.storage.StorageWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class IntColumn {
|
||||
|
||||
public static IntColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
|
||||
}
|
||||
|
||||
public static IntColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Writer(columnDesc, Storage.writer(path, columnDesc));
|
||||
}
|
||||
|
||||
private static class Writer implements IntColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter storage;
|
||||
private long position = 0;
|
||||
|
||||
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storageWriter) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storageWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(int[] values) throws IOException {
|
||||
for (int value : values) {
|
||||
storage.putInt(value);
|
||||
}
|
||||
position+=values.length;
|
||||
}
|
||||
|
||||
public void put(int value) throws IOException {
|
||||
storage.putInt(value);
|
||||
position++;
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader implements IntColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader storage;
|
||||
|
||||
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storage;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public int get() throws IOException {
|
||||
return storage.getInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return storage.position() / Integer.BYTES;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
storage.skip(positions, Integer.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return storage.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface IntColumnReader extends ColumnReader, AutoCloseable {
|
||||
int get() throws IOException;
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,13 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface IntColumnWriter extends ColumnWriter, AutoCloseable {
|
||||
void put(int value) throws IOException;
|
||||
void put(int[] values) throws IOException;
|
||||
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,89 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.storage.Storage;
|
||||
import nu.marginalia.slop.storage.StorageReader;
|
||||
import nu.marginalia.slop.storage.StorageWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class LongColumn {
|
||||
|
||||
public static LongColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
|
||||
}
|
||||
|
||||
public static LongColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Writer(columnDesc, Storage.writer(path, columnDesc));
|
||||
}
|
||||
|
||||
private static class Writer implements LongColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter storage;
|
||||
private long position = 0;
|
||||
|
||||
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storageWriter) {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storageWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(long value) throws IOException {
|
||||
storage.putLong(value);
|
||||
position++;
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader implements LongColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader storage;
|
||||
|
||||
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storage;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public long get() throws IOException {
|
||||
return storage.getLong();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return storage.position() / Long.BYTES;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
storage.skip(positions, Long.BYTES);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return storage.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface LongColumnReader extends ColumnReader, AutoCloseable {
|
||||
long get() throws IOException;
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface LongColumnWriter extends ColumnWriter, AutoCloseable {
|
||||
void put(long value) throws IOException;
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,89 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.storage.Storage;
|
||||
import nu.marginalia.slop.storage.StorageReader;
|
||||
import nu.marginalia.slop.storage.StorageWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class ShortColumn {
|
||||
|
||||
public static ShortColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(columnDesc, Storage.reader(path, columnDesc, true));
|
||||
}
|
||||
|
||||
public static ShortColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Writer(columnDesc, Storage.writer(path, columnDesc));
|
||||
}
|
||||
|
||||
private static class Writer implements ShortColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter storage;
|
||||
private long position = 0;
|
||||
|
||||
public Writer(ColumnDesc<?, ?> columnDesc, StorageWriter storageWriter) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storageWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(short value) throws IOException {
|
||||
storage.putShort(value);
|
||||
position++;
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader implements ShortColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader storage;
|
||||
|
||||
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader storage) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storage;
|
||||
}
|
||||
|
||||
public short get() throws IOException {
|
||||
return storage.getShort();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return storage.position() / Short.BYTES;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
storage.skip(positions, Short.BYTES);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return storage.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
storage.close();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface ShortColumnReader extends ColumnReader, AutoCloseable {
|
||||
short get() throws IOException;
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
package nu.marginalia.slop.column.primitive;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface ShortColumnWriter extends ColumnWriter, AutoCloseable {
|
||||
void put(short value) throws IOException;
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,273 +0,0 @@
|
||||
package nu.marginalia.slop.column.string;
|
||||
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumn;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
|
||||
import nu.marginalia.slop.column.primitive.ByteColumn;
|
||||
import nu.marginalia.slop.column.primitive.ByteColumnReader;
|
||||
import nu.marginalia.slop.column.primitive.ByteColumnWriter;
|
||||
import nu.marginalia.slop.column.primitive.LongColumnWriter;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnFunction;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
public class EnumColumn {
|
||||
|
||||
public static EnumColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(
|
||||
columnDesc,
|
||||
StringColumn.open(path,
|
||||
columnDesc.createSupplementaryColumn(
|
||||
ColumnFunction.DICT,
|
||||
ColumnType.TXTSTRING,
|
||||
StorageType.PLAIN)
|
||||
),
|
||||
VarintColumn.open(path,
|
||||
columnDesc.createSupplementaryColumn(
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.ENUM_LE,
|
||||
columnDesc.storageType()
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
public static EnumColumnReader open8(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader8(
|
||||
columnDesc,
|
||||
StringColumn.open(path,
|
||||
columnDesc.createSupplementaryColumn(
|
||||
ColumnFunction.DICT,
|
||||
ColumnType.TXTSTRING,
|
||||
StorageType.PLAIN)
|
||||
),
|
||||
ByteColumn.open(path,
|
||||
columnDesc.createSupplementaryColumn(
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.BYTE,
|
||||
columnDesc.storageType()
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
public static StringColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Writer(columnDesc,
|
||||
StringColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DICT, ColumnType.TXTSTRING, StorageType.PLAIN)),
|
||||
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA, ColumnType.ENUM_LE, columnDesc.storageType()))
|
||||
);
|
||||
}
|
||||
|
||||
public static StringColumnWriter create8(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Writer8(columnDesc,
|
||||
StringColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DICT, ColumnType.TXTSTRING, StorageType.PLAIN)),
|
||||
ByteColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA, ColumnType.BYTE, columnDesc.storageType()))
|
||||
);
|
||||
}
|
||||
|
||||
private static class Writer implements StringColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StringColumnWriter dicionaryColumn;
|
||||
private final LongColumnWriter dataColumn;
|
||||
private final HashMap<String, Integer> dictionary = new HashMap<>();
|
||||
|
||||
public Writer(ColumnDesc<?, ?> columnDesc,
|
||||
StringColumnWriter dicionaryColumn,
|
||||
LongColumnWriter dataColumn) throws IOException
|
||||
{
|
||||
this.columnDesc = columnDesc;
|
||||
this.dicionaryColumn = dicionaryColumn;
|
||||
this.dataColumn = dataColumn;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(String value) throws IOException {
|
||||
Integer index = dictionary.get(value);
|
||||
if (index == null) {
|
||||
index = dictionary.size();
|
||||
dictionary.put(value, index);
|
||||
dicionaryColumn.put(value);
|
||||
}
|
||||
dataColumn.put(index);
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return dataColumn.position();
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
dataColumn.close();
|
||||
dicionaryColumn.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Writer8 implements StringColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StringColumnWriter dicionaryColumn;
|
||||
private final ByteColumnWriter dataColumn;
|
||||
private final HashMap<String, Integer> dictionary = new HashMap<>();
|
||||
|
||||
public Writer8(ColumnDesc<?, ?> columnDesc,
|
||||
StringColumnWriter dicionaryColumn,
|
||||
ByteColumnWriter dataColumn) throws IOException
|
||||
{
|
||||
this.columnDesc = columnDesc;
|
||||
this.dicionaryColumn = dicionaryColumn;
|
||||
this.dataColumn = dataColumn;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(String value) throws IOException {
|
||||
Integer index = dictionary.get(value);
|
||||
if (index == null) {
|
||||
index = dictionary.size();
|
||||
dictionary.put(value, index);
|
||||
dicionaryColumn.put(value);
|
||||
}
|
||||
dataColumn.put((byte) index.intValue());
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return dataColumn.position();
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
dataColumn.close();
|
||||
dicionaryColumn.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader implements EnumColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final VarintColumnReader dataColumn;
|
||||
private final List<String> dictionary = new ArrayList<>();
|
||||
|
||||
public Reader(ColumnDesc<?, ?> columnDesc,
|
||||
StringColumnReader dicionaryColumn,
|
||||
VarintColumnReader dataColumn) throws IOException
|
||||
{
|
||||
this.columnDesc = columnDesc;
|
||||
this.dataColumn = dataColumn;
|
||||
|
||||
while (dicionaryColumn.hasRemaining()) {
|
||||
dictionary.add(dicionaryColumn.get());
|
||||
}
|
||||
|
||||
dicionaryColumn.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getDictionary() throws IOException {
|
||||
return Collections.unmodifiableList(dictionary);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOrdinal() throws IOException {
|
||||
return (int) dataColumn.get();
|
||||
}
|
||||
|
||||
public String get() throws IOException {
|
||||
int index = (int) dataColumn.get();
|
||||
return dictionary.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return dataColumn.position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
dataColumn.skip(positions);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return dataColumn.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
dataColumn.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader8 implements EnumColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final ByteColumnReader dataColumn;
|
||||
private final List<String> dictionary = new ArrayList<>();
|
||||
|
||||
public Reader8(ColumnDesc<?, ?> columnDesc,
|
||||
StringColumnReader dicionaryColumn,
|
||||
ByteColumnReader dataColumn) throws IOException
|
||||
{
|
||||
this.columnDesc = columnDesc;
|
||||
this.dataColumn = dataColumn;
|
||||
|
||||
while (dicionaryColumn.hasRemaining()) {
|
||||
dictionary.add(dicionaryColumn.get());
|
||||
}
|
||||
|
||||
dicionaryColumn.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getDictionary() throws IOException {
|
||||
return Collections.unmodifiableList(dictionary);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOrdinal() throws IOException {
|
||||
return dataColumn.get();
|
||||
}
|
||||
|
||||
public String get() throws IOException {
|
||||
int index = dataColumn.get();
|
||||
return dictionary.get(index);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return dataColumn.position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
dataColumn.skip(positions);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return dataColumn.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
dataColumn.close();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,26 +0,0 @@
|
||||
package nu.marginalia.slop.column.string;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
public interface EnumColumnReader extends StringColumnReader, ColumnReader, AutoCloseable {
|
||||
|
||||
List<String> getDictionary() throws IOException;
|
||||
int getOrdinal() throws IOException;
|
||||
|
||||
String get() throws IOException;
|
||||
|
||||
@Override
|
||||
long position() throws IOException;
|
||||
|
||||
@Override
|
||||
void skip(long positions) throws IOException;
|
||||
|
||||
@Override
|
||||
boolean hasRemaining() throws IOException;
|
||||
|
||||
@Override
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,315 +0,0 @@
|
||||
package nu.marginalia.slop.column.string;
|
||||
|
||||
import nu.marginalia.slop.column.array.*;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.storage.Storage;
|
||||
import nu.marginalia.slop.storage.StorageReader;
|
||||
import nu.marginalia.slop.storage.StorageWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class StringColumn {
|
||||
|
||||
public static StringColumnReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
if (columnDesc.type().equals(ColumnType.STRING)) {
|
||||
return new ArrayReader(columnDesc, ByteArrayColumn.open(path, columnDesc));
|
||||
} else if (columnDesc.type().equals(ColumnType.CSTRING)) {
|
||||
return new CStringReader(columnDesc, Storage.reader(path, columnDesc, true));
|
||||
} else if (columnDesc.type().equals(ColumnType.TXTSTRING)) {
|
||||
return new TxtStringReader(columnDesc, Storage.reader(path, columnDesc, true));
|
||||
}
|
||||
throw new IllegalArgumentException("Unsupported column type: " + columnDesc.type());
|
||||
}
|
||||
|
||||
|
||||
public static StringColumnWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
if (columnDesc.type().equals(ColumnType.STRING)) {
|
||||
return new ArrayWriter(columnDesc, ByteArrayColumn.create(path, columnDesc));
|
||||
} else if (columnDesc.type().equals(ColumnType.CSTRING)) {
|
||||
return new CStringWriter(columnDesc, Storage.writer(path, columnDesc));
|
||||
} else if (columnDesc.type().equals(ColumnType.TXTSTRING)) {
|
||||
return new TxtStringWriter(columnDesc, Storage.writer(path, columnDesc));
|
||||
}
|
||||
throw new IllegalArgumentException("Unsupported column type: " + columnDesc.type());
|
||||
}
|
||||
|
||||
public static ObjectArrayColumnReader<String> openArray(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
if (columnDesc.type().equals(ColumnType.STRING_ARRAY)) {
|
||||
return ObjectArrayColumn.open(path, columnDesc, new ArrayReader(columnDesc, ByteArrayColumn.open(path, columnDesc)));
|
||||
} else if (columnDesc.type().equals(ColumnType.CSTRING_ARRAY)) {
|
||||
return ObjectArrayColumn.open(path, columnDesc, new CStringReader(columnDesc, Storage.reader(path, columnDesc, true)));
|
||||
} else if (columnDesc.type().equals(ColumnType.TXTSTRING_ARRAY)) {
|
||||
return ObjectArrayColumn.open(path, columnDesc, new TxtStringReader(columnDesc, Storage.reader(path, columnDesc, true)));
|
||||
}
|
||||
throw new IllegalArgumentException("Unsupported column type: " + columnDesc.type());
|
||||
}
|
||||
|
||||
public static ObjectArrayColumnWriter<String> createArray(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
if (columnDesc.type().equals(ColumnType.STRING_ARRAY)) {
|
||||
return ObjectArrayColumn.create(path, columnDesc, new ArrayWriter(columnDesc, ByteArrayColumn.create(path, columnDesc)));
|
||||
} else if (columnDesc.type().equals(ColumnType.CSTRING_ARRAY)) {
|
||||
return ObjectArrayColumn.create(path, columnDesc, new CStringWriter(columnDesc, Storage.writer(path, columnDesc)));
|
||||
} else if (columnDesc.type().equals(ColumnType.TXTSTRING_ARRAY)) {
|
||||
return ObjectArrayColumn.create(path, columnDesc, new TxtStringWriter(columnDesc, Storage.writer(path, columnDesc)));
|
||||
}
|
||||
throw new IllegalArgumentException("Unsupported column type: " + columnDesc.type());
|
||||
}
|
||||
|
||||
private static class ArrayWriter implements StringColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final ByteArrayColumnWriter backingColumn;
|
||||
|
||||
public ArrayWriter(ColumnDesc<?, ?> columnDesc, ByteArrayColumnWriter backingColumn) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.backingColumn = backingColumn;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(String value) throws IOException {
|
||||
if (null == value) {
|
||||
value = "";
|
||||
}
|
||||
|
||||
backingColumn.put(value.getBytes());
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return backingColumn.position();
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
backingColumn.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class ArrayReader implements StringColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final ByteArrayColumnReader backingColumn;
|
||||
|
||||
public ArrayReader(ColumnDesc<?, ?> columnDesc, ByteArrayColumnReader backingColumn) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.backingColumn = backingColumn;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public String get() throws IOException {
|
||||
return new String(backingColumn.get());
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return backingColumn.position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
backingColumn.skip(positions);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return backingColumn.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
backingColumn.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static class CStringWriter implements StringColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter storageWriter;
|
||||
|
||||
private long position = 0;
|
||||
|
||||
public CStringWriter(ColumnDesc<?,?> columnDesc, StorageWriter storageWriter) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storageWriter = storageWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(String value) throws IOException {
|
||||
if (null == value) {
|
||||
value = "";
|
||||
}
|
||||
assert value.indexOf('\0') == -1 : "Null byte not allowed in cstring";
|
||||
storageWriter.putBytes(value.getBytes());
|
||||
storageWriter.putByte((byte) 0);
|
||||
position++;
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
storageWriter.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class CStringReader implements StringColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader storageReader;
|
||||
private long position = 0;
|
||||
|
||||
public CStringReader(ColumnDesc<?, ?> columnDesc, StorageReader storageReader) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storageReader = storageReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public String get() throws IOException {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
byte b;
|
||||
while (storageReader.hasRemaining() && (b = storageReader.getByte()) != 0) {
|
||||
sb.append((char) b);
|
||||
}
|
||||
position++;
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
int i = 0;
|
||||
|
||||
while (i < positions && storageReader.hasRemaining()) {
|
||||
if (storageReader.getByte() == 0) {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
position += positions;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return storageReader.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
storageReader.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static class TxtStringWriter implements StringColumnWriter {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageWriter storageWriter;
|
||||
private long position = 0;
|
||||
|
||||
public TxtStringWriter(ColumnDesc<?, ?> columnDesc, StorageWriter storageWriter) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storageWriter = storageWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public void put(String value) throws IOException {
|
||||
if (null == value) {
|
||||
value = "";
|
||||
}
|
||||
|
||||
assert value.indexOf('\n') == -1 : "Newline not allowed in txtstring";
|
||||
|
||||
storageWriter.putBytes(value.getBytes());
|
||||
storageWriter.putByte((byte) '\n');
|
||||
position++;
|
||||
}
|
||||
|
||||
public long position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
storageWriter.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static class TxtStringReader implements StringColumnReader {
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
private final StorageReader storageReader;
|
||||
private long position = 0;
|
||||
|
||||
public TxtStringReader(ColumnDesc<?, ?> columnDesc, StorageReader storageReader) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
this.storageReader = storageReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
}
|
||||
|
||||
public String get() throws IOException {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
byte b;
|
||||
while (storageReader.hasRemaining()) {
|
||||
b = storageReader.getByte();
|
||||
if (b == '\n') {
|
||||
break;
|
||||
}
|
||||
else {
|
||||
sb.append((char) b);
|
||||
}
|
||||
}
|
||||
position++;
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long positions) throws IOException {
|
||||
int i = 0;
|
||||
|
||||
position+=positions;
|
||||
|
||||
while (i < positions && storageReader.hasRemaining()) {
|
||||
if (storageReader.getByte() == '\n') {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return storageReader.hasRemaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
storageReader.close();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
package nu.marginalia.slop.column.string;
|
||||
|
||||
import nu.marginalia.slop.column.ObjectColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface StringColumnReader extends ObjectColumnReader<String>, AutoCloseable {
|
||||
|
||||
String get() throws IOException;
|
||||
|
||||
@Override
|
||||
long position() throws IOException;
|
||||
|
||||
@Override
|
||||
void skip(long positions) throws IOException;
|
||||
|
||||
@Override
|
||||
boolean hasRemaining() throws IOException;
|
||||
|
||||
@Override
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,12 +0,0 @@
|
||||
package nu.marginalia.slop.column.string;
|
||||
|
||||
import nu.marginalia.slop.column.ObjectColumnWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface StringColumnWriter extends ObjectColumnWriter<String>, AutoCloseable {
|
||||
void put(String value) throws IOException;
|
||||
|
||||
@Override
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,109 +0,0 @@
|
||||
package nu.marginalia.slop.desc;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnReader;
|
||||
import nu.marginalia.slop.column.ColumnWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
/** Describes a slop column. A column is a named, typed, and paginated sequence of values.
|
||||
*
|
||||
* @param name the name of the column, must not contain dots
|
||||
* @param page the page number of the column, 0 for the first page
|
||||
* @param function the function of the column, {@link ColumnFunction}
|
||||
* @param type the type of the column, {@link ColumnType}
|
||||
* @param storageType the storage type of the column, {@link StorageType}
|
||||
* @param <R> the reader type
|
||||
* @param <W> the writer type
|
||||
*/
|
||||
public record ColumnDesc<R extends ColumnReader,
|
||||
W extends ColumnWriter>(
|
||||
String name,
|
||||
int page,
|
||||
ColumnFunction function,
|
||||
ColumnType<R, W> type,
|
||||
StorageType storageType) {
|
||||
|
||||
public ColumnDesc {
|
||||
if (name.contains(".")) {
|
||||
throw new IllegalArgumentException("Invalid column name: " + name);
|
||||
}
|
||||
}
|
||||
|
||||
public ColumnDesc(String name, ColumnType<R, W> type, StorageType storageType) {
|
||||
this(name, 0, ColumnFunction.DATA, type, storageType);
|
||||
}
|
||||
|
||||
/** Open a column reader for this column.
|
||||
*
|
||||
* @param table the table to register the reader with
|
||||
* @param path the path to the file to read from
|
||||
* */
|
||||
public R open(SlopTable table, Path path) throws IOException {
|
||||
var reader = type.open(path, this);
|
||||
table.register(reader);
|
||||
return reader;
|
||||
}
|
||||
|
||||
/** Create a new column writer for this column.
|
||||
*
|
||||
* @param table the table to register the writer with
|
||||
* @param path the path to the file to write to
|
||||
* */
|
||||
public W create(SlopTable table, Path path) throws IOException {
|
||||
var writer = type.create(path, this);
|
||||
table.register(writer);
|
||||
return writer;
|
||||
}
|
||||
|
||||
public W createUnregistered(Path path) throws IOException {
|
||||
return type.create(path, this);
|
||||
}
|
||||
|
||||
public R openUnregistered(Path path) throws IOException {
|
||||
return type.open(path, this);
|
||||
}
|
||||
|
||||
public <R2 extends ColumnReader, W2 extends ColumnWriter >
|
||||
ColumnDesc<R2, W2> createSupplementaryColumn(
|
||||
ColumnFunction function,
|
||||
ColumnType<R2, W2> type,
|
||||
StorageType storageType)
|
||||
{
|
||||
return new ColumnDesc<>(name, page, function, type, storageType);
|
||||
}
|
||||
|
||||
public ByteOrder byteOrder() {
|
||||
return type.byteOrder();
|
||||
}
|
||||
|
||||
public ColumnDesc<R, W> forPage(int page) {
|
||||
return new ColumnDesc<>(name, page, function, type, storageType);
|
||||
}
|
||||
|
||||
public boolean exists(Path base) {
|
||||
return Files.exists(base.resolve(toString()));
|
||||
}
|
||||
|
||||
public static ColumnDesc parse(String name) {
|
||||
String[] parts = name.split("\\.");
|
||||
if (parts.length != 5) {
|
||||
throw new IllegalArgumentException("Invalid column name: " + name);
|
||||
}
|
||||
|
||||
return new ColumnDesc(parts[0],
|
||||
Integer.parseInt(parts[1]),
|
||||
ColumnFunction.fromString(parts[2]),
|
||||
ColumnType.byMnemonic(parts[3]),
|
||||
StorageType.fromString(parts[4])
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return name + "." + page + "." + function.nmnemonic + "." + type.mnemonic() + "." + storageType.nmnemonic;
|
||||
}
|
||||
|
||||
}
|
@ -1,49 +0,0 @@
|
||||
package nu.marginalia.slop.desc;
|
||||
|
||||
/** The type of function that a column performs.
|
||||
* This is used to determine how to interpret the
|
||||
* data in the column.
|
||||
*/
|
||||
public enum ColumnFunction {
|
||||
/** The principal data column. */
|
||||
DATA("dat"),
|
||||
/** The length column for the DATA column, in the case of variable-length records. */
|
||||
DATA_LEN("dat-len"),
|
||||
/** The length column for the group of items in the DATA column, in the case of variable-length array-style records. */
|
||||
GROUP_LENGTH("grp-len"),
|
||||
/** The dictionary column, in the case of a dictionary-encoded column. */
|
||||
DICT("dic"),
|
||||
/** The length column for the DICT column, in the case of variable-length dictionaries. */
|
||||
DICT_LEN("dic-len"),
|
||||
;
|
||||
|
||||
public String nmnemonic;
|
||||
|
||||
ColumnFunction(String nmnemonic) {
|
||||
this.nmnemonic = nmnemonic;
|
||||
}
|
||||
|
||||
/** Return the appropriate column function for
|
||||
* a length column corresponding to the current
|
||||
* column function.
|
||||
*/
|
||||
public ColumnFunction lengthsTable() {
|
||||
switch (this) {
|
||||
case DATA:
|
||||
return DATA_LEN;
|
||||
case DICT:
|
||||
return DICT_LEN;
|
||||
default:
|
||||
throw new IllegalArgumentException("Cannot get length table type for " + this);
|
||||
}
|
||||
}
|
||||
|
||||
public static ColumnFunction fromString(String nmnemonic) {
|
||||
for (ColumnFunction type : values()) {
|
||||
if (type.nmnemonic.equals(nmnemonic)) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Unknown column function: " + nmnemonic);
|
||||
}
|
||||
}
|
@ -1,124 +0,0 @@
|
||||
package nu.marginalia.slop.desc;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnReader;
|
||||
import nu.marginalia.slop.column.ColumnWriter;
|
||||
import nu.marginalia.slop.column.array.*;
|
||||
import nu.marginalia.slop.column.dynamic.*;
|
||||
import nu.marginalia.slop.column.primitive.*;
|
||||
import nu.marginalia.slop.column.string.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.file.Path;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
|
||||
public abstract class ColumnType<
|
||||
R extends ColumnReader,
|
||||
W extends ColumnWriter>
|
||||
{
|
||||
private static Map<String, ColumnType<? extends ColumnReader,? extends ColumnWriter>> byMnemonic = new HashMap<>();
|
||||
|
||||
public abstract String mnemonic();
|
||||
public abstract ByteOrder byteOrder();
|
||||
|
||||
abstract R open(Path path, ColumnDesc<R, W> desc) throws IOException;
|
||||
abstract W create(Path path, ColumnDesc<R, W> desc) throws IOException;
|
||||
|
||||
public static ColumnType<? extends ColumnReader,? extends ColumnWriter> byMnemonic(String mnemonic) {
|
||||
return byMnemonic.get(mnemonic);
|
||||
}
|
||||
|
||||
public static ColumnType<ByteColumnReader, ByteColumnWriter> BYTE = register("s8", ByteOrder.nativeOrder(), ByteColumn::open, ByteColumn::create);
|
||||
public static ColumnType<CharColumnReader, CharColumnWriter> CHAR_LE = register("u16le", ByteOrder.LITTLE_ENDIAN, CharColumn::open, CharColumn::create);
|
||||
public static ColumnType<CharColumnReader, CharColumnWriter> CHAR_BE = register("u16be", ByteOrder.BIG_ENDIAN, CharColumn::open, CharColumn::create);
|
||||
public static ColumnType<ShortColumnReader, ShortColumnWriter> SHORT_LE = register("s16le", ByteOrder.LITTLE_ENDIAN, ShortColumn::open, ShortColumn::create);
|
||||
public static ColumnType<ShortColumnReader, ShortColumnWriter> SHORT_BE = register("s16be", ByteOrder.BIG_ENDIAN, ShortColumn::open, ShortColumn::create);
|
||||
public static ColumnType<IntColumnReader, IntColumnWriter> INT_LE = register("s32le", ByteOrder.LITTLE_ENDIAN, IntColumn::open, IntColumn::create);
|
||||
public static ColumnType<IntColumnReader, IntColumnWriter> INT_BE = register("s32be", ByteOrder.BIG_ENDIAN, IntColumn::open, IntColumn::create);
|
||||
public static ColumnType<LongColumnReader, LongColumnWriter> LONG_LE = register("s64le", ByteOrder.LITTLE_ENDIAN, LongColumn::open, LongColumn::create);
|
||||
public static ColumnType<LongColumnReader, LongColumnWriter> LONG_BE = register("s64be", ByteOrder.BIG_ENDIAN, LongColumn::open, LongColumn::create);
|
||||
public static ColumnType<FloatColumnReader, FloatColumnWriter> FLOAT_LE = register("fp32le", ByteOrder.LITTLE_ENDIAN, FloatColumn::open, FloatColumn::create);
|
||||
public static ColumnType<FloatColumnReader, FloatColumnWriter> FLOAT_BE = register("fp32be", ByteOrder.BIG_ENDIAN, FloatColumn::open, FloatColumn::create);
|
||||
public static ColumnType<DoubleColumnReader, DoubleColumnWriter> DOUBLE_LE = register("fp64le", ByteOrder.LITTLE_ENDIAN, DoubleColumn::open, DoubleColumn::create);
|
||||
public static ColumnType<DoubleColumnReader, DoubleColumnWriter> DOUBLE_BE = register("fp64be", ByteOrder.BIG_ENDIAN, DoubleColumn::open, DoubleColumn::create);
|
||||
public static ColumnType<VarintColumnReader, VarintColumnWriter> VARINT_LE = register("varintle", ByteOrder.LITTLE_ENDIAN, VarintColumn::open, VarintColumn::create);
|
||||
public static ColumnType<VarintColumnReader, VarintColumnWriter> VARINT_BE = register("varintbe", ByteOrder.BIG_ENDIAN, VarintColumn::open, VarintColumn::create);
|
||||
public static ColumnType<CustomBinaryColumnReader, CustomBinaryColumnWriter> BYTE_ARRAY_CUSTOM = register("s8[]+custom", ByteOrder.nativeOrder(), CustomBinaryColumn::open, CustomBinaryColumn::create);
|
||||
|
||||
public static ColumnType<StringColumnReader, StringColumnWriter> STRING = register("s8[]+str", ByteOrder.nativeOrder(), StringColumn::open, StringColumn::create);
|
||||
public static ColumnType<StringColumnReader, StringColumnWriter> CSTRING = register("s8+cstr", ByteOrder.nativeOrder(), StringColumn::open, StringColumn::create);
|
||||
public static ColumnType<StringColumnReader, StringColumnWriter> TXTSTRING = register("s8+txt", ByteOrder.nativeOrder(), StringColumn::open, StringColumn::create);
|
||||
|
||||
|
||||
public static ColumnType<EnumColumnReader, StringColumnWriter> ENUM_8 = register("u8+enum", ByteOrder.LITTLE_ENDIAN, EnumColumn::open8, EnumColumn::create8);
|
||||
public static ColumnType<EnumColumnReader, StringColumnWriter> ENUM_LE = register("varintle+enum", ByteOrder.LITTLE_ENDIAN, EnumColumn::open, EnumColumn::create);
|
||||
public static ColumnType<EnumColumnReader, StringColumnWriter> ENUM_BE = register("varintbe+enum", ByteOrder.BIG_ENDIAN, EnumColumn::open, EnumColumn::create);
|
||||
|
||||
public static ColumnType<ByteArrayColumnReader, ByteArrayColumnWriter> BYTE_ARRAY = register("s8[]", ByteOrder.nativeOrder(), ByteArrayColumn::open, ByteArrayColumn::create);
|
||||
public static ColumnType<ObjectArrayColumnReader<byte[]>, ObjectArrayColumnWriter<byte[]>> BYTE_ARRAY_ARRAY = register("s8[][]", ByteOrder.nativeOrder(), ByteArrayColumn::openNested, ByteArrayColumn::createNested);
|
||||
public static ColumnType<LongArrayColumnReader, LongArrayColumnWriter> LONG_ARRAY_LE = register("s64le[]", ByteOrder.LITTLE_ENDIAN, LongArrayColumn::open, LongArrayColumn::create);
|
||||
public static ColumnType<LongArrayColumnReader, LongArrayColumnWriter> LONG_ARRAY_BE = register("s64be[]", ByteOrder.BIG_ENDIAN, LongArrayColumn::open, LongArrayColumn::create);
|
||||
|
||||
public static ColumnType<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> STRING_ARRAY = register("s8[]+str[]", ByteOrder.nativeOrder(), StringColumn::openArray, StringColumn::createArray);
|
||||
public static ColumnType<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> CSTRING_ARRAY = register("s8+cstr[]", ByteOrder.nativeOrder(), StringColumn::openArray, StringColumn::createArray);
|
||||
public static ColumnType<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> TXTSTRING_ARRAY = register("s8+txt", ByteOrder.nativeOrder(), StringColumn::openArray, StringColumn::createArray);
|
||||
|
||||
public static ColumnType<IntArrayColumnReader, IntArrayColumnWriter> INT_ARRAY_LE = register("s32le[]", ByteOrder.LITTLE_ENDIAN, IntArrayColumn::open, IntArrayColumn::create);
|
||||
public static ColumnType<IntArrayColumnReader, IntArrayColumnWriter> INT_ARRAY_BE = register("s32be[]", ByteOrder.BIG_ENDIAN, IntArrayColumn::open, IntArrayColumn::create);
|
||||
public static ColumnType<ObjectArrayColumnReader<int[]>, ObjectArrayColumnWriter<int[]>> INT_ARRAY_ARRAY_LE = register("s32le[][]", ByteOrder.LITTLE_ENDIAN, IntArrayColumn::openNested, IntArrayColumn::createNested);
|
||||
public static ColumnType<ObjectArrayColumnReader<int[]>, ObjectArrayColumnWriter<int[]>> INT_ARRAY_ARRAY_BE = register("s32be[][]", ByteOrder.BIG_ENDIAN, IntArrayColumn::openNested, IntArrayColumn::createNested);
|
||||
public static ColumnType<ObjectArrayColumnReader<long[]>, ObjectArrayColumnWriter<long[]>> LONG_ARRAY_ARRAY_LE = register("s64le[][]", ByteOrder.LITTLE_ENDIAN, LongArrayColumn::openNested, LongArrayColumn::createNested);
|
||||
public static ColumnType<ObjectArrayColumnReader<long[]>, ObjectArrayColumnWriter<long[]>> LONG_ARRAY_ARRAY_BE = register("s64be[][]", ByteOrder.BIG_ENDIAN, LongArrayColumn::openNested, LongArrayColumn::createNested);
|
||||
|
||||
public interface ColumnOpener<T extends ColumnReader> {
|
||||
T open(Path path, ColumnDesc desc) throws IOException;
|
||||
}
|
||||
public interface ColumnCreator<T extends ColumnWriter> {
|
||||
T create(Path path, ColumnDesc desc) throws IOException;
|
||||
}
|
||||
|
||||
public static <R extends ColumnReader,
|
||||
W extends ColumnWriter,
|
||||
T extends ColumnType<R,W>> ColumnType<R, W> register(
|
||||
String mnemonic,
|
||||
ByteOrder byteOrder,
|
||||
ColumnOpener<R> readerCons,
|
||||
ColumnCreator<W> writerCons) {
|
||||
|
||||
var ins = new ColumnType<R, W>() {
|
||||
@Override
|
||||
public String mnemonic() {
|
||||
return mnemonic;
|
||||
}
|
||||
|
||||
public ByteOrder byteOrder() {
|
||||
return byteOrder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public R open(Path path, ColumnDesc<R, W> desc) throws IOException {
|
||||
return readerCons.open(path, desc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public W create(Path path, ColumnDesc<R, W> desc) throws IOException {
|
||||
return writerCons.create(path, desc);
|
||||
}
|
||||
};
|
||||
|
||||
byMnemonic.put(mnemonic, ins);
|
||||
return ins;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return mnemonic().hashCode();
|
||||
}
|
||||
public boolean equals(Object o) {
|
||||
return o instanceof ColumnType ct && Objects.equals(ct.mnemonic(), mnemonic());
|
||||
}
|
||||
public String toString() {
|
||||
return mnemonic();
|
||||
}
|
||||
}
|
@ -1,86 +0,0 @@
|
||||
package nu.marginalia.slop.desc;
|
||||
|
||||
import nu.marginalia.slop.column.ColumnReader;
|
||||
import nu.marginalia.slop.column.ColumnWriter;
|
||||
import nu.marginalia.slop.column.ObjectColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
/** SlopTable is a utility class for managing a group of columns that are
|
||||
* read and written together. It is used to ensure that the reader and writer
|
||||
* positions are maintained correctly between the columns, and to ensure that
|
||||
* the columns are closed correctly.
|
||||
* <p></p>
|
||||
* To deal with the fact that some columns may not be expected to have the same
|
||||
* number of rows, SlopTable supports the concept of column groups. Each column
|
||||
* group is a separate SlopTable instance, and the columns in the group are
|
||||
* managed together.
|
||||
* <p></p>
|
||||
* It is often a good idea to let the reader or writer class for a particular
|
||||
* table inherit from SlopTable, so that the table is automatically closed when
|
||||
* the reader or writer is closed.
|
||||
*/
|
||||
|
||||
public class SlopTable implements AutoCloseable {
|
||||
private final Set<ColumnReader> readerList = new HashSet<>();
|
||||
private final Set<ColumnWriter> writerList = new HashSet<>();
|
||||
|
||||
/** Register a column reader with this table. This is called from ColumnDesc. */
|
||||
void register(ColumnReader reader) {
|
||||
if (!readerList.add(reader))
|
||||
System.err.println("Double registration of " + reader);
|
||||
}
|
||||
|
||||
/** Register a column reader with this table. This is called from ColumnDesc. */
|
||||
void register(ColumnWriter writer) {
|
||||
if (!writerList.add(writer))
|
||||
System.err.println("Double registration of " + writer);
|
||||
}
|
||||
|
||||
protected <T> boolean find(ObjectColumnReader<T> column, T value) throws IOException {
|
||||
boolean ret = column.search(value);
|
||||
|
||||
long desiredPos = column.position() - 1;
|
||||
|
||||
for (var otherReader : readerList) {
|
||||
if (otherReader.position() < desiredPos) {
|
||||
otherReader.skip(desiredPos - otherReader.position());
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
|
||||
Map<Long, List<ColumnDesc>> positions = new HashMap<>();
|
||||
|
||||
for (ColumnReader reader : readerList) {
|
||||
positions.computeIfAbsent(reader.position(), k -> new ArrayList<>()).add(reader.columnDesc());
|
||||
reader.close();
|
||||
}
|
||||
for (ColumnWriter writer : writerList) {
|
||||
positions.computeIfAbsent(writer.position(), k -> new ArrayList<>()).add(writer.columnDesc());
|
||||
writer.close();
|
||||
}
|
||||
|
||||
|
||||
// Check for the scenario where we have multiple positions
|
||||
// and one of the positions is zero, indicating that we haven't
|
||||
// read or written to one of the columns. This is likely a bug,
|
||||
// but not necessarily a severe one, so we just log a warning.
|
||||
|
||||
var zeroPositions = Objects.requireNonNullElseGet(positions.remove(0L), List::of);
|
||||
if (!zeroPositions.isEmpty() && !positions.isEmpty()) {
|
||||
System.err.println("Zero position found in {}, this is likely development debris" + zeroPositions);
|
||||
}
|
||||
|
||||
// If there are more than one position and several are non-zero, then we haven't maintained the
|
||||
// position correctly between the columns. This is a disaster, so we throw an exception.
|
||||
if (positions.size() > 1) {
|
||||
throw new IllegalStateException("Expected only one reader position, found " + positions);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
package nu.marginalia.slop.desc;
|
||||
|
||||
/** The type of storage used for a column. */
|
||||
public enum StorageType {
|
||||
|
||||
/** The column is stored as an uncompressed binary file. */
|
||||
PLAIN("bin"),
|
||||
/** The column is stored as a compressed binary file using the GZIP algorithm. */
|
||||
GZIP("gz"),
|
||||
/** The column is stored as a compressed binary file using the ZSTD algorithm. */
|
||||
ZSTD("zstd"),
|
||||
;
|
||||
|
||||
public String nmnemonic;
|
||||
|
||||
StorageType(String nmnemonic) {
|
||||
this.nmnemonic = nmnemonic;
|
||||
}
|
||||
|
||||
public static StorageType fromString(String nmnemonic) {
|
||||
for (StorageType type : values()) {
|
||||
if (type.nmnemonic.equals(nmnemonic)) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
throw new IllegalArgumentException("Unknown storage type: " + nmnemonic);
|
||||
}
|
||||
}
|
@ -1,234 +0,0 @@
|
||||
package nu.marginalia.slop.storage;
|
||||
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorInputStream;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.zip.GZIPInputStream;
|
||||
|
||||
public class CompressingStorageReader implements StorageReader {
|
||||
private final byte[] arrayBuffer;
|
||||
|
||||
private long position = 0;
|
||||
|
||||
private final InputStream is;
|
||||
private final ByteBuffer buffer;
|
||||
|
||||
public CompressingStorageReader(Path path, StorageType storageType, ByteOrder order, int bufferSize) throws IOException {
|
||||
is = switch (storageType) {
|
||||
case GZIP -> new GZIPInputStream(Files.newInputStream(path, StandardOpenOption.READ));
|
||||
case ZSTD -> new ZstdCompressorInputStream(Files.newInputStream(path, StandardOpenOption.READ));
|
||||
default -> throw new UnsupportedEncodingException("Unsupported storage type: " + storageType);
|
||||
};
|
||||
|
||||
this.arrayBuffer = new byte[bufferSize];
|
||||
this.buffer = ByteBuffer.wrap(arrayBuffer).order(order);
|
||||
|
||||
buffer.position(0);
|
||||
buffer.limit(0);
|
||||
|
||||
// read the first chunk, this is needed for InputStream otherwise we don't handle empty files
|
||||
// correctly
|
||||
refill();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getByte() throws IOException {
|
||||
if (buffer.remaining() < Byte.BYTES) {
|
||||
refill();
|
||||
}
|
||||
|
||||
return buffer.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public short getShort() throws IOException {
|
||||
if (buffer.remaining() < Short.BYTES) {
|
||||
refill();
|
||||
}
|
||||
|
||||
return buffer.getShort();
|
||||
}
|
||||
|
||||
@Override
|
||||
public char getChar() throws IOException {
|
||||
if (buffer.remaining() < Character.BYTES) {
|
||||
refill();
|
||||
}
|
||||
|
||||
return buffer.getChar();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getInt() throws IOException {
|
||||
if (buffer.remaining() < Integer.BYTES) {
|
||||
refill();
|
||||
}
|
||||
|
||||
return buffer.getInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLong() throws IOException {
|
||||
if (buffer.remaining() < Long.BYTES) {
|
||||
refill();
|
||||
}
|
||||
|
||||
return buffer.getLong();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat() throws IOException {
|
||||
if (buffer.remaining() < Float.BYTES) {
|
||||
refill();
|
||||
}
|
||||
|
||||
return buffer.getFloat();
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getDouble() throws IOException {
|
||||
if (buffer.remaining() < Double.BYTES) {
|
||||
refill();
|
||||
}
|
||||
|
||||
return buffer.getDouble();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getBytes(byte[] bytes) throws IOException {
|
||||
getBytes(bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getBytes(byte[] bytes, int offset, int length) throws IOException {
|
||||
if (buffer.remaining() >= length) {
|
||||
buffer.get(bytes, offset, length);
|
||||
} else {
|
||||
int totalToRead = length;
|
||||
|
||||
while (totalToRead > 0) {
|
||||
if (!buffer.hasRemaining()) {
|
||||
refill();
|
||||
}
|
||||
|
||||
int toRead = Math.min(buffer.remaining(), totalToRead);
|
||||
buffer.get(bytes, offset + length - totalToRead, toRead);
|
||||
totalToRead -= toRead;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getBytes(ByteBuffer data) throws IOException {
|
||||
if (data.remaining() < buffer.remaining()) {
|
||||
int lim = buffer.limit();
|
||||
buffer.limit(buffer.position() + data.remaining());
|
||||
data.put(buffer);
|
||||
buffer.limit(lim);
|
||||
} else {
|
||||
while (data.hasRemaining()) {
|
||||
if (!buffer.hasRemaining()) {
|
||||
refill();
|
||||
}
|
||||
|
||||
int lim = buffer.limit();
|
||||
buffer.limit(Math.min(buffer.position() + data.remaining(), lim));
|
||||
data.put(buffer);
|
||||
buffer.limit(lim);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void getInts(int[] ints) throws IOException {
|
||||
if (buffer.remaining() >= ints.length * Integer.BYTES) {
|
||||
// fast path: if we can read all the ints from the buffer and don't need to check for buffer boundaries
|
||||
for (int i = 0; i < ints.length; i++) {
|
||||
ints[i] = buffer.getInt();
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < ints.length; i++) {
|
||||
ints[i] = getInt();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void getLongs(long[] longs) throws IOException {
|
||||
if (buffer.remaining() >= longs.length * Long.BYTES) {
|
||||
// fast path: if we can read all the longs from the buffer and don't need to check for buffer boundaries
|
||||
for (int i = 0; i < longs.length; i++) {
|
||||
longs[i] = buffer.getLong();
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < longs.length; i++) {
|
||||
longs[i] = getLong();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long bytes, int stepSize) throws IOException {
|
||||
long toSkip = bytes * stepSize;
|
||||
|
||||
if (buffer.remaining() < toSkip) {
|
||||
toSkip -= buffer.remaining();
|
||||
|
||||
while (toSkip > 0) {
|
||||
long rb = is.skip(toSkip);
|
||||
toSkip -= rb;
|
||||
position += rb;
|
||||
}
|
||||
|
||||
buffer.position(0);
|
||||
buffer.limit(0);
|
||||
} else {
|
||||
buffer.position(buffer.position() + (int) toSkip);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seek(long position, int stepSize) throws IOException {
|
||||
throw new UnsupportedEncodingException("Seek not supported in GzipStorageReader");
|
||||
}
|
||||
|
||||
private void refill() throws IOException {
|
||||
buffer.compact();
|
||||
|
||||
while (buffer.hasRemaining()) {
|
||||
int rb = is.read(arrayBuffer, buffer.position(), buffer.remaining());
|
||||
if (rb < 0) {
|
||||
break;
|
||||
}
|
||||
else {
|
||||
position += rb;
|
||||
buffer.position(buffer.position() + rb);
|
||||
}
|
||||
}
|
||||
|
||||
buffer.flip();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return position - buffer.remaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return buffer.hasRemaining() || is.available() > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
is.close();
|
||||
}
|
||||
}
|
@ -1,210 +0,0 @@
|
||||
package nu.marginalia.slop.storage;
|
||||
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
import org.apache.commons.compress.compressors.zstandard.ZstdCompressorOutputStream;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.zip.GZIPOutputStream;
|
||||
|
||||
public class CompressingStorageWriter implements StorageWriter, AutoCloseable {
|
||||
private final ByteBuffer buffer;
|
||||
private final OutputStream os;
|
||||
private byte[] arrayBuffer;
|
||||
|
||||
private long position = 0;
|
||||
|
||||
private final Path tempPath;
|
||||
private final Path destPath;
|
||||
|
||||
public CompressingStorageWriter(Path path, StorageType storageType, ByteOrder order, int bufferSize) throws IOException {
|
||||
tempPath = path.resolveSibling(path.getFileName() + ".tmp");
|
||||
destPath = path;
|
||||
|
||||
os = switch (storageType) {
|
||||
case GZIP -> new GZIPOutputStream(Files.newOutputStream(tempPath, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE));
|
||||
case ZSTD -> new ZstdCompressorOutputStream(Files.newOutputStream(tempPath, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE));
|
||||
default -> throw new IllegalArgumentException("Unsupported storage type: " + storageType);
|
||||
};
|
||||
|
||||
arrayBuffer = new byte[bufferSize];
|
||||
this.buffer = ByteBuffer.wrap(arrayBuffer).order(order);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putByte(byte b) throws IOException {
|
||||
if (buffer.remaining() < Byte.BYTES) {
|
||||
flush();
|
||||
}
|
||||
|
||||
buffer.put(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putShort(short s) throws IOException {
|
||||
if (buffer.remaining() < Short.BYTES) {
|
||||
flush();
|
||||
}
|
||||
|
||||
buffer.putShort(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putChar(char s) throws IOException {
|
||||
if (buffer.remaining() < Character.BYTES) {
|
||||
flush();
|
||||
}
|
||||
|
||||
buffer.putChar(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putInt(int i) throws IOException {
|
||||
if (buffer.remaining() < Integer.BYTES) {
|
||||
flush();
|
||||
}
|
||||
|
||||
buffer.putInt(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putLong(long l) throws IOException {
|
||||
if (buffer.remaining() < Long.BYTES) {
|
||||
flush();
|
||||
}
|
||||
|
||||
buffer.putLong(l);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putInts(int[] values) throws IOException {
|
||||
if (buffer.remaining() >= Integer.BYTES * values.length) {
|
||||
for (int value : values) {
|
||||
buffer.putInt(value);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int value : values) {
|
||||
putInt(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putLongs(long[] values) throws IOException {
|
||||
if (buffer.remaining() >= Long.BYTES * values.length) {
|
||||
for (long value : values) {
|
||||
buffer.putLong(value);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (long value : values) {
|
||||
putLong(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putBytes(byte[] bytes) throws IOException {
|
||||
putBytes(bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putBytes(byte[] bytes, int offset, int length) throws IOException {
|
||||
int totalToWrite = length;
|
||||
|
||||
if (totalToWrite < buffer.remaining()) {
|
||||
buffer.put(bytes, offset, totalToWrite);
|
||||
}
|
||||
else { // case where the data is larger than the write buffer, so we need to write in chunks
|
||||
while (totalToWrite > 0) {
|
||||
if (!buffer.hasRemaining()) {
|
||||
flush();
|
||||
}
|
||||
|
||||
// Write as much as possible to the buffer
|
||||
int toWriteNow = Math.min(totalToWrite, buffer.remaining());
|
||||
buffer.put(bytes, offset, toWriteNow);
|
||||
|
||||
// Update the remaining bytes and offset
|
||||
totalToWrite -= toWriteNow;
|
||||
offset += toWriteNow;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putBytes(ByteBuffer data) throws IOException {
|
||||
if (data.remaining() < buffer.remaining()) {
|
||||
buffer.put(data);
|
||||
}
|
||||
else { // case where the data is larger than the write buffer, so we need to write in chunks
|
||||
while (data.hasRemaining()) {
|
||||
if (!buffer.hasRemaining()) {
|
||||
flush();
|
||||
}
|
||||
|
||||
// temporarily reduce the data buffer's limit to what's possible to write to the writer's buffer
|
||||
int lim = data.limit();
|
||||
data.limit(Math.min(data.position() + buffer.remaining(), lim));
|
||||
|
||||
// write the data to the buffer
|
||||
buffer.put(data);
|
||||
|
||||
// restore the limit, so we can write the rest of the data
|
||||
data.limit(lim);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putFloat(float f) throws IOException {
|
||||
if (buffer.remaining() < Float.BYTES) {
|
||||
flush();
|
||||
}
|
||||
|
||||
buffer.putFloat(f);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putDouble(double d) throws IOException {
|
||||
if (buffer.remaining() < Double.BYTES) {
|
||||
flush();
|
||||
}
|
||||
|
||||
buffer.putDouble(d);
|
||||
}
|
||||
|
||||
private void flush() throws IOException {
|
||||
buffer.flip();
|
||||
|
||||
int rem = buffer.remaining();
|
||||
if (rem > 0) {
|
||||
os.write(buffer.array(), buffer.position(), buffer.remaining());
|
||||
buffer.limit(0);
|
||||
position += rem;
|
||||
}
|
||||
|
||||
buffer.clear();
|
||||
}
|
||||
|
||||
public long position() throws IOException {
|
||||
return position + buffer.position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
flush();
|
||||
|
||||
os.flush();
|
||||
os.close();
|
||||
|
||||
Files.move(tempPath, destPath, StandardCopyOption.REPLACE_EXISTING);
|
||||
}
|
||||
}
|
@ -1,149 +0,0 @@
|
||||
package nu.marginalia.slop.storage;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
|
||||
@SuppressWarnings("preview") // for MemorySegment in jdk-21
|
||||
public class MmapStorageReader implements StorageReader {
|
||||
private final MemorySegment segment;
|
||||
private final Arena arena;
|
||||
|
||||
private long position = 0;
|
||||
|
||||
public MmapStorageReader(Path path) throws IOException {
|
||||
arena = Arena.ofConfined();
|
||||
|
||||
try (var channel = (FileChannel) Files.newByteChannel(path, StandardOpenOption.READ)) {
|
||||
this.segment = channel.map(FileChannel.MapMode.READ_ONLY, 0, channel.size(), arena);
|
||||
}
|
||||
|
||||
position = 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getByte() throws IOException {
|
||||
return segment.get(ValueLayout.JAVA_BYTE, position++);
|
||||
}
|
||||
|
||||
@Override
|
||||
public short getShort() throws IOException {
|
||||
short ret = segment.get(ValueLayout.JAVA_SHORT, position);
|
||||
position += Short.BYTES;
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public char getChar() throws IOException {
|
||||
char ret = segment.get(ValueLayout.JAVA_CHAR, position);
|
||||
position += Character.BYTES;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getInt() throws IOException {
|
||||
int ret = segment.get(ValueLayout.JAVA_INT, position);
|
||||
position += Integer.BYTES;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLong() throws IOException {
|
||||
long ret = segment.get(ValueLayout.JAVA_LONG, position);
|
||||
position += Long.BYTES;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat() throws IOException {
|
||||
float ret = segment.get(ValueLayout.JAVA_FLOAT, position);
|
||||
position += Float.BYTES;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getDouble() throws IOException {
|
||||
double ret = segment.get(ValueLayout.JAVA_DOUBLE, position);
|
||||
position += Double.BYTES;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getBytes(byte[] bytes) throws IOException {
|
||||
if (position + bytes.length > segment.byteSize()) {
|
||||
throw new ArrayIndexOutOfBoundsException();
|
||||
}
|
||||
for (int i = 0; i < bytes.length; i++) {
|
||||
bytes[i] = segment.get(ValueLayout.JAVA_BYTE, position+i);
|
||||
}
|
||||
position += bytes.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getBytes(byte[] bytes, int offset, int length) throws IOException {
|
||||
if (position + length > segment.byteSize()) {
|
||||
throw new ArrayIndexOutOfBoundsException();
|
||||
}
|
||||
for (int i = 0; i < length; i++) {
|
||||
bytes[offset + i] = segment.get(ValueLayout.JAVA_BYTE, position+i);
|
||||
}
|
||||
position += length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getBytes(ByteBuffer buffer) throws IOException {
|
||||
int toRead = buffer.remaining();
|
||||
if (position + toRead > segment.byteSize()) {
|
||||
throw new ArrayIndexOutOfBoundsException();
|
||||
}
|
||||
|
||||
buffer.put(segment.asSlice(position, toRead).asByteBuffer());
|
||||
position += toRead;
|
||||
}
|
||||
|
||||
public void getInts(int[] ret) {
|
||||
for (int i = 0; i < ret.length; i++) {
|
||||
ret[i] = segment.get(ValueLayout.JAVA_INT, position);
|
||||
position += Integer.BYTES;
|
||||
}
|
||||
}
|
||||
|
||||
public void getLongs(long[] ret) {
|
||||
for (int i = 0; i < ret.length; i++) {
|
||||
ret[i] = segment.get(ValueLayout.JAVA_LONG, position);
|
||||
position += Long.BYTES;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long bytes, int stepSize) throws IOException {
|
||||
position += bytes * stepSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seek(long position, int stepSize) throws IOException {
|
||||
this.position = position * stepSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return position;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return position < segment.byteSize();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
arena.close();
|
||||
}
|
||||
}
|
@ -1,215 +0,0 @@
|
||||
package nu.marginalia.slop.storage;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
|
||||
public class SimpleStorageReader implements StorageReader {
|
||||
private final ByteBuffer buffer;
|
||||
private final FileChannel channel;
|
||||
|
||||
public SimpleStorageReader(Path path, ByteOrder order, int bufferSize) throws IOException {
|
||||
channel = (FileChannel) Files.newByteChannel(path, StandardOpenOption.READ);
|
||||
|
||||
this.buffer = ByteBuffer.allocateDirect(bufferSize).order(order);
|
||||
|
||||
buffer.position(0);
|
||||
buffer.limit(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte getByte() throws IOException {
|
||||
if (buffer.remaining() < Byte.BYTES) {
|
||||
refill();
|
||||
}
|
||||
|
||||
return buffer.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public short getShort() throws IOException {
|
||||
if (buffer.remaining() < Short.BYTES) {
|
||||
refill();
|
||||
}
|
||||
|
||||
return buffer.getShort();
|
||||
}
|
||||
|
||||
@Override
|
||||
public char getChar() throws IOException {
|
||||
if (buffer.remaining() < Character.BYTES) {
|
||||
refill();
|
||||
}
|
||||
|
||||
return buffer.getChar();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getInt() throws IOException {
|
||||
if (buffer.remaining() < Integer.BYTES) {
|
||||
refill();
|
||||
}
|
||||
|
||||
return buffer.getInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getLong() throws IOException {
|
||||
if (buffer.remaining() < Long.BYTES) {
|
||||
refill();
|
||||
}
|
||||
|
||||
return buffer.getLong();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float getFloat() throws IOException {
|
||||
if (buffer.remaining() < Float.BYTES) {
|
||||
refill();
|
||||
}
|
||||
|
||||
return buffer.getFloat();
|
||||
}
|
||||
|
||||
@Override
|
||||
public double getDouble() throws IOException {
|
||||
if (buffer.remaining() < Double.BYTES) {
|
||||
refill();
|
||||
}
|
||||
|
||||
return buffer.getDouble();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getBytes(byte[] bytes) throws IOException {
|
||||
getBytes(bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getBytes(byte[] bytes, int offset, int length) throws IOException {
|
||||
if (buffer.remaining() >= length) {
|
||||
buffer.get(bytes, offset, length);
|
||||
} else {
|
||||
int totalToRead = length;
|
||||
|
||||
while (totalToRead > 0) {
|
||||
if (!buffer.hasRemaining()) {
|
||||
refill();
|
||||
}
|
||||
|
||||
int toRead = Math.min(buffer.remaining(), totalToRead);
|
||||
buffer.get(bytes, offset + length - totalToRead, toRead);
|
||||
totalToRead -= toRead;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getBytes(ByteBuffer data) throws IOException {
|
||||
if (data.remaining() < buffer.remaining()) {
|
||||
int lim = buffer.limit();
|
||||
buffer.limit(buffer.position() + data.remaining());
|
||||
data.put(buffer);
|
||||
buffer.limit(lim);
|
||||
} else {
|
||||
while (data.hasRemaining()) {
|
||||
if (!buffer.hasRemaining()) {
|
||||
refill();
|
||||
}
|
||||
|
||||
int lim = buffer.limit();
|
||||
buffer.limit(Math.min(buffer.position() + data.remaining(), lim));
|
||||
data.put(buffer);
|
||||
buffer.limit(lim);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void getInts(int[] ints) throws IOException {
|
||||
if (buffer.remaining() >= ints.length * Integer.BYTES) {
|
||||
// fast path: if we can read all the ints from the buffer and don't need to check for buffer boundaries
|
||||
for (int i = 0; i < ints.length; i++) {
|
||||
ints[i] = buffer.getInt();
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < ints.length; i++) {
|
||||
ints[i] = getInt();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void getLongs(long[] longs) throws IOException {
|
||||
if (buffer.remaining() >= longs.length * Long.BYTES) {
|
||||
// fast path: if we can read all the longs from the buffer and don't need to check for buffer boundaries
|
||||
for (int i = 0; i < longs.length; i++) {
|
||||
longs[i] = buffer.getLong();
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < longs.length; i++) {
|
||||
longs[i] = getLong();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skip(long bytes, int stepSize) throws IOException {
|
||||
long toSkip = bytes * stepSize;
|
||||
|
||||
if (buffer.remaining() < toSkip) {
|
||||
channel.position(channel.position() - buffer.remaining() + toSkip);
|
||||
buffer.position(0);
|
||||
buffer.limit(0);
|
||||
} else {
|
||||
buffer.position(buffer.position() + (int) toSkip);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seek(long position, int stepSize) throws IOException {
|
||||
position *= stepSize;
|
||||
|
||||
if (position > channel.position() - buffer.limit() && position < channel.position()) {
|
||||
// If the position is within the buffer, we can just move the buffer position to the correct spot
|
||||
buffer.position((int) (position - channel.position() + buffer.limit()));
|
||||
}
|
||||
else {
|
||||
// Otherwise, we need to move the channel position and invalidate the buffer
|
||||
channel.position(position);
|
||||
buffer.position(0);
|
||||
buffer.limit(0);
|
||||
}
|
||||
}
|
||||
|
||||
private void refill() throws IOException {
|
||||
buffer.compact();
|
||||
|
||||
while (buffer.hasRemaining()) {
|
||||
if (channel.read(buffer) == -1) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
buffer.flip();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long position() throws IOException {
|
||||
return channel.position() - buffer.remaining();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return buffer.hasRemaining() || channel.position() < channel.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
channel.close();
|
||||
}
|
||||
}
|
@ -1,199 +0,0 @@
|
||||
package nu.marginalia.slop.storage;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
|
||||
public class SimpleStorageWriter implements StorageWriter, AutoCloseable {
|
||||
private final ByteBuffer buffer;
|
||||
private final FileChannel channel;
|
||||
|
||||
private final Path tempPath;
|
||||
private final Path destPath;
|
||||
|
||||
public SimpleStorageWriter(Path path, ByteOrder order, int bufferSize) throws IOException {
|
||||
tempPath = path.resolveSibling(path.getFileName() + ".tmp");
|
||||
destPath = path;
|
||||
|
||||
channel = (FileChannel) Files.newByteChannel(tempPath,
|
||||
StandardOpenOption.CREATE,
|
||||
StandardOpenOption.TRUNCATE_EXISTING,
|
||||
StandardOpenOption.WRITE
|
||||
);
|
||||
|
||||
this.buffer = ByteBuffer.allocate(bufferSize).order(order);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putByte(byte b) throws IOException {
|
||||
if (buffer.remaining() < Byte.BYTES) {
|
||||
flush();
|
||||
}
|
||||
|
||||
buffer.put(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putShort(short s) throws IOException {
|
||||
if (buffer.remaining() < Short.BYTES) {
|
||||
flush();
|
||||
}
|
||||
|
||||
buffer.putShort(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putChar(char s) throws IOException {
|
||||
if (buffer.remaining() < Character.BYTES) {
|
||||
flush();
|
||||
}
|
||||
|
||||
buffer.putChar(s);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putInt(int i) throws IOException {
|
||||
if (buffer.remaining() < Integer.BYTES) {
|
||||
flush();
|
||||
}
|
||||
|
||||
buffer.putInt(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putLong(long l) throws IOException {
|
||||
if (buffer.remaining() < Long.BYTES) {
|
||||
flush();
|
||||
}
|
||||
|
||||
buffer.putLong(l);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putInts(int[] values) throws IOException {
|
||||
if (buffer.remaining() >= Integer.BYTES * values.length) {
|
||||
for (int value : values) {
|
||||
buffer.putInt(value);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int value : values) {
|
||||
putInt(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putLongs(long[] values) throws IOException {
|
||||
if (buffer.remaining() >= Long.BYTES * values.length) {
|
||||
for (long value : values) {
|
||||
buffer.putLong(value);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (long value : values) {
|
||||
putLong(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putBytes(byte[] bytes) throws IOException {
|
||||
putBytes(bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putBytes(byte[] bytes, int offset, int length) throws IOException {
|
||||
int totalToWrite = length;
|
||||
|
||||
if (totalToWrite < buffer.remaining()) {
|
||||
buffer.put(bytes, offset, totalToWrite);
|
||||
}
|
||||
else { // case where the data is larger than the write buffer, so we need to write in chunks
|
||||
while (totalToWrite > 0) {
|
||||
if (!buffer.hasRemaining()) {
|
||||
flush();
|
||||
}
|
||||
|
||||
// Write as much as possible to the buffer
|
||||
int toWriteNow = Math.min(totalToWrite, buffer.remaining());
|
||||
buffer.put(bytes, offset, toWriteNow);
|
||||
|
||||
// Update the remaining bytes and offset
|
||||
totalToWrite -= toWriteNow;
|
||||
offset += toWriteNow;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putBytes(ByteBuffer data) throws IOException {
|
||||
if (data.remaining() < buffer.remaining()) {
|
||||
buffer.put(data);
|
||||
}
|
||||
else { // case where the data is larger than the write buffer, so we need to write in chunks
|
||||
while (data.hasRemaining()) {
|
||||
if (!buffer.hasRemaining()) {
|
||||
flush();
|
||||
}
|
||||
|
||||
// temporarily reduce the data buffer's limit to what's possible to write to the writer's buffer
|
||||
int lim = data.limit();
|
||||
data.limit(Math.min(data.position() + buffer.remaining(), lim));
|
||||
|
||||
// write the data to the buffer
|
||||
buffer.put(data);
|
||||
|
||||
// restore the limit, so we can write the rest of the data
|
||||
data.limit(lim);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putFloat(float f) throws IOException {
|
||||
if (buffer.remaining() < Float.BYTES) {
|
||||
flush();
|
||||
}
|
||||
|
||||
buffer.putFloat(f);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putDouble(double d) throws IOException {
|
||||
if (buffer.remaining() < Double.BYTES) {
|
||||
flush();
|
||||
}
|
||||
|
||||
buffer.putDouble(d);
|
||||
}
|
||||
|
||||
private void flush() throws IOException {
|
||||
buffer.flip();
|
||||
|
||||
while (buffer.hasRemaining()) {
|
||||
channel.write(buffer);
|
||||
}
|
||||
|
||||
buffer.clear();
|
||||
}
|
||||
|
||||
public long position() throws IOException {
|
||||
return channel.position() + buffer.position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
flush();
|
||||
|
||||
channel.force(false);
|
||||
channel.close();
|
||||
|
||||
Files.move(tempPath, destPath, StandardCopyOption.REPLACE_EXISTING);
|
||||
}
|
||||
}
|
@ -1,61 +0,0 @@
|
||||
package nu.marginalia.slop.storage;
|
||||
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public interface Storage {
|
||||
|
||||
/** Create a reader for the given column.
|
||||
*
|
||||
* @param path the directory containing the column data
|
||||
* @param columnDesc the column descriptor
|
||||
* @param aligned whether the data is aligned to the storage type, which can be used to optimize reading
|
||||
* */
|
||||
static StorageReader reader(Path path, ColumnDesc columnDesc, boolean aligned) throws IOException {
|
||||
ByteOrder byteOrder = columnDesc.byteOrder();
|
||||
StorageType storageType = columnDesc.storageType();
|
||||
|
||||
Path filePath = path.resolve(columnDesc.toString());
|
||||
|
||||
if (aligned && byteOrder.equals(ByteOrder.LITTLE_ENDIAN) && storageType.equals(StorageType.PLAIN)) {
|
||||
// mmap is only supported for little-endian plain storage, but it's generally worth it in this case
|
||||
return new MmapStorageReader(filePath);
|
||||
} else {
|
||||
final int bufferSize = switch(columnDesc.function()) {
|
||||
case DATA -> 4096;
|
||||
default -> 1024;
|
||||
};
|
||||
|
||||
return switch (storageType) {
|
||||
case PLAIN -> new SimpleStorageReader(filePath, byteOrder, bufferSize);
|
||||
case GZIP, ZSTD -> new CompressingStorageReader(filePath, storageType, byteOrder, bufferSize);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/** Create a writer for the given column.
|
||||
*
|
||||
* @param path the directory containing the column data
|
||||
* @param columnDesc the column descriptor
|
||||
* */
|
||||
static StorageWriter writer(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
ByteOrder byteOrder = columnDesc.byteOrder();
|
||||
StorageType storageType = columnDesc.storageType();
|
||||
|
||||
Path filePath = path.resolve(columnDesc.toString());
|
||||
|
||||
final int bufferSize = switch(columnDesc.function()) {
|
||||
case DATA -> 4096;
|
||||
default -> 1024;
|
||||
};
|
||||
|
||||
return switch (storageType) {
|
||||
case PLAIN -> new SimpleStorageWriter(filePath, byteOrder, bufferSize);
|
||||
case GZIP, ZSTD -> new CompressingStorageWriter(filePath, storageType, byteOrder, bufferSize);
|
||||
};
|
||||
}
|
||||
}
|
@ -1,50 +0,0 @@
|
||||
package nu.marginalia.slop.storage;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
public interface StorageReader extends AutoCloseable {
|
||||
byte getByte() throws IOException;
|
||||
short getShort() throws IOException;
|
||||
char getChar() throws IOException;
|
||||
int getInt() throws IOException;
|
||||
long getLong() throws IOException;
|
||||
float getFloat() throws IOException;
|
||||
double getDouble() throws IOException;
|
||||
|
||||
void getBytes(byte[] bytes) throws IOException;
|
||||
void getBytes(byte[] bytes, int offset, int length) throws IOException;
|
||||
void getBytes(ByteBuffer buffer) throws IOException;
|
||||
|
||||
void getInts(int[] ints) throws IOException;
|
||||
void getLongs(long[] longs) throws IOException;
|
||||
|
||||
default void getChars(char[] chars) throws IOException {
|
||||
for (int i = 0; i < chars.length; i++) {
|
||||
chars[i] = getChar();
|
||||
}
|
||||
}
|
||||
default void getShorts(short[] shorts) throws IOException {
|
||||
for (int i = 0; i < shorts.length; i++) {
|
||||
shorts[i] = getShort();
|
||||
}
|
||||
}
|
||||
default void getFloats(float[] floats) throws IOException {
|
||||
for (int i = 0; i < floats.length; i++) {
|
||||
floats[i] = getFloat();
|
||||
}
|
||||
}
|
||||
default void getDoubles(double[] doubles) throws IOException {
|
||||
for (int i = 0; i < doubles.length; i++) {
|
||||
doubles[i] = getDouble();
|
||||
}
|
||||
}
|
||||
|
||||
void skip(long bytes, int stepSize) throws IOException;
|
||||
void seek(long position, int stepSize) throws IOException;
|
||||
long position() throws IOException;
|
||||
boolean hasRemaining() throws IOException;
|
||||
|
||||
@Override
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,50 +0,0 @@
|
||||
package nu.marginalia.slop.storage;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
/** Interface for writing data to a storage. */
|
||||
public interface StorageWriter extends AutoCloseable {
|
||||
void putByte(byte b) throws IOException;
|
||||
void putShort(short s) throws IOException;
|
||||
void putChar(char c) throws IOException;
|
||||
void putInt(int i) throws IOException;
|
||||
void putLong(long l) throws IOException;
|
||||
|
||||
void putFloat(float f) throws IOException;
|
||||
void putDouble(double d) throws IOException;
|
||||
|
||||
void putBytes(byte[] bytes) throws IOException;
|
||||
void putBytes(byte[] bytes, int offset, int length) throws IOException;
|
||||
void putBytes(ByteBuffer buffer) throws IOException;
|
||||
|
||||
// Bulk operations, these can be more efficient than the single value operations
|
||||
// if they are implemented in a way that minimizes the of bounds checks and other overhead
|
||||
|
||||
void putInts(int[] bytes) throws IOException;
|
||||
void putLongs(long[] bytes) throws IOException;
|
||||
|
||||
default void putChars(char[] chars) throws IOException {
|
||||
for (char c : chars) {
|
||||
putChar(c);
|
||||
}
|
||||
}
|
||||
default void putShorts(short[] shorts) throws IOException {
|
||||
for (short s : shorts) {
|
||||
putShort(s);
|
||||
}
|
||||
}
|
||||
default void putFloats(float[] floats) throws IOException {
|
||||
for (float f : floats) {
|
||||
putFloat(f);
|
||||
}
|
||||
}
|
||||
default void putDoubles(double[] doubles) throws IOException {
|
||||
for (double d : doubles) {
|
||||
putDouble(d);
|
||||
}
|
||||
}
|
||||
|
||||
long position() throws IOException;
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,164 +0,0 @@
|
||||
# Slop
|
||||
|
||||
Slop is a library for columnar data persistence. It is designed to be used for storing large amounts of data in a way
|
||||
that is both fast and memory-efficient. The data is write-once, and the slop library offers many facilities for
|
||||
deciding how it should be stored and accessed.
|
||||
|
||||
Slop is designed as a low abstraction what-you-see-is-what-you-do library, the reason for
|
||||
this is to be able to eliminate copies and other overheads that are common in higher
|
||||
level libraries. The intent is to get the performance of a hand-rolled solution, but
|
||||
without the complexity and brittleness that comes with hand-rolling an ad-hoc row-based storage
|
||||
format.
|
||||
|
||||
A lot of what would commonly be kept in a schema description is instead just
|
||||
implemented as code. To aid with portability, slop stores schema information
|
||||
in the file names of the data files, besides the actual name of the column itself.
|
||||
|
||||
A table of demographic information may end up stored in files like this:
|
||||
|
||||
```text
|
||||
cities.0.dat.s8[].gz
|
||||
cities.0.dat-len.varint-le.bin
|
||||
population.0.dat.s32le.bin
|
||||
average-age.0.dat.f64le.gz
|
||||
```
|
||||
|
||||
The slop library offers some facilities to aid with data integrity, such as the SlopTable
|
||||
class, which is a wrapper that ensures consistent positions for a group of columns, and aids
|
||||
in closing the columns when they are no longer needed. Beyond that, you're on your own.
|
||||
|
||||
## Why though?
|
||||
|
||||
Slop is fast.
|
||||
|
||||
Depending on compression and encoding choices, it's possible
|
||||
to get read speeds that are 5-20x faster than reading from a sqlite database.
|
||||
When compression is disabled, Slop will memory map the data, and depending on the
|
||||
contents of the column, it's possible to perform zero copy reads.
|
||||
|
||||
Slop is compact.
|
||||
|
||||
Depending on compression and encoding choices, the format will be smaller
|
||||
than a parquet file containing the equivalent information.
|
||||
|
||||
Slop is simple.
|
||||
|
||||
There isn't much magic going on under the hood in Slop. It's designed with the philosophy that a competent programmer
|
||||
should be able to reverse engineer the format of the data by just looking
|
||||
at a directory listing of the data files. Despite being a very obscure library,
|
||||
this gives the data a sort of portability.
|
||||
|
||||
|
||||
### Relaxed 1BRC (no CSV ingestion time)
|
||||
|
||||
A benchmark against DuckDB, which is another excellent columnar storage library, albeit
|
||||
one that is more featureful and safe than Slop is.
|
||||
|
||||
The benchmark is a relaxed 1BRC, aggregate a billion rows of temperature data by city,
|
||||
and then calculate max/min/avg. This omits the CSV ingestion time from the original
|
||||
challenge, which means the numbers are not directly comparable with other 1BRC benchmarks.
|
||||
|
||||
| Impl | Runtime | Size On Disk |
|
||||
|-----------------------------------------|---------|--------------|
|
||||
| Parallel Slop, s16 | 0.64s | 2.8 GB |
|
||||
| Parallel Slop, varint | 0.90s | 2.8 GB |
|
||||
| DuckDB<sup>1</sup> | 2.6s | 3.0 GB |
|
||||
| Slop, s16 | 4.2s | 2.8 GB |
|
||||
| Slop, s32 | 4.5s | 3.8 GB |
|
||||
| Parquet<sup>2</sup> (Snappy) in DuckDB | 4.5s | 5.5 GB |
|
||||
| Parquet<sup>2</sup> (Zstd) in DuckDB | 5.5s | 3.0 GB |
|
||||
| JDBC<sup>3</sup> | 6500s | 3.0 GB |
|
||||
|
||||
<sup>[1]</sup> Benchmark loads the data into DuckDB's native table format,
|
||||
performs an aggregation within the database, and then fetches the results via JDBC.
|
||||
|
||||
<sup>[2]</sup> Benchmark loads the data from Parquet in DuckDB, performs an
|
||||
aggregation within the database, and then fetches the results via JDBC.
|
||||
|
||||
<sup>[3]</sup> Benchmark loads the data into DuckDB's native table format,
|
||||
then streaming it as-is over JDBC to Java for processing, with fetch size = 1000.
|
||||
This is a very common usage pattern in Enterprise Java applications, although
|
||||
usually you'd have an ORM in between the JDBC and the application code adding even
|
||||
more overhead. The numbers are extrapolated from a 100M benchmark, as I value my time.
|
||||
|
||||
## Example
|
||||
|
||||
With slop it's desirable to keep the schema information in the code. This is an example of how you might use slop to
|
||||
store a table of data with three columns: source, dest, and counts. The source and dest columns are strings, and the
|
||||
counts column is an integer that's stored wit a varint-coding (i.e. like how utf-8 works).
|
||||
|
||||
The data is stored in a directory, and the data is written and read using the `MyData.Writer` and `MyData.Reader` classes.
|
||||
The `MyData` class is itself is a record, and the schema is stored as static fields in the `MyData` class.
|
||||
|
||||
|
||||
```java
|
||||
record Population(String city, int population, double avgAge) {
|
||||
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> citiesColumn =
|
||||
new ColumnDesc<>("cities", ColumnType.STRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> populationColumn =
|
||||
new ColumnDesc<>("population", ColumnType.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<DoubleColumnReader, DoubleColumnWriter> averageAgeColumnn =
|
||||
new ColumnDesc<>("average-age", ColumnType.DOUBLE_LE, StorageType.PLAIN);
|
||||
|
||||
public static class Writer extends SlopTable {
|
||||
private final StringColumnWriter citiesWriter;
|
||||
private final IntColumnWriter populationWriter;
|
||||
private final DoubleColumnWriter avgAgeWriter;
|
||||
|
||||
public Writer(Path baseDir) throws IOException {
|
||||
citiesWriter = citiesColumn.create(this, baseDir);
|
||||
populationWriter = populationColumn.create(this, baseDir);
|
||||
avgAgeWriter = averageAgeColumnn.create(this, baseDir);
|
||||
}
|
||||
|
||||
public void write(Population data) throws IOException {
|
||||
citiesWriter.put(data.city);
|
||||
populationWriter.put(data.population);
|
||||
avgAgeWriter.put(data.avgAge);
|
||||
}
|
||||
}
|
||||
|
||||
public static class Reader extends SlopTable {
|
||||
private final StringColumnReader citiesReader;
|
||||
private final IntColumnReader populationReader;
|
||||
private final DoubleColumnReader avgAgeReader;
|
||||
|
||||
public Reader(Path baseDir) throws IOException {
|
||||
citiesReader = citiesColumn.open(this, baseDir);
|
||||
populationReader = populationColumn.open(this, baseDir);
|
||||
avgAgeReader = averageAgeColumnn.open(this, baseDir);
|
||||
}
|
||||
|
||||
public boolean hasRemaining() throws IOException {
|
||||
return citiesReader.hasRemaining();
|
||||
}
|
||||
|
||||
public Population read() throws IOException {
|
||||
return new Population(
|
||||
citiesReader.get(),
|
||||
populationReader.get(),
|
||||
avgAgeReader.get()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Nested Records
|
||||
|
||||
Nested records are not supported in slop, although array values are supported. If you need to store nested records,
|
||||
you've got the options of flattening them, representing them as arrays, or serializing them into a byte array and
|
||||
storing that.
|
||||
|
||||
## Column Types
|
||||
|
||||
TBW
|
||||
|
||||
## Storage Types
|
||||
|
||||
TBW
|
||||
|
||||
## Extension
|
||||
|
||||
TBW
|
@ -1,78 +0,0 @@
|
||||
package nu.marginalia.slop.column;
|
||||
|
||||
import nu.marginalia.slop.column.array.IntArrayColumn;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnFunction;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
|
||||
class ArrayColumnTest {
|
||||
Path tempDir;
|
||||
|
||||
@BeforeEach
|
||||
void setup() throws IOException {
|
||||
tempDir = Files.createTempDirectory(getClass().getSimpleName());
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void cleanup() {
|
||||
try {
|
||||
Files.walk(tempDir)
|
||||
.sorted(this::deleteOrder)
|
||||
.forEach(p -> {
|
||||
try {
|
||||
if (Files.isRegularFile(p)) {
|
||||
System.out.println("Deleting " + p + " " + Files.size(p));
|
||||
}
|
||||
Files.delete(p);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
int deleteOrder(Path a, Path b) {
|
||||
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
|
||||
return 1;
|
||||
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
|
||||
return -1;
|
||||
} else {
|
||||
return a.getNameCount() - b.getNameCount();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void test() throws IOException {
|
||||
var name = new ColumnDesc("test",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.INT_ARRAY_LE,
|
||||
StorageType.PLAIN
|
||||
);
|
||||
|
||||
|
||||
try (var column = IntArrayColumn.create(tempDir, name)) {
|
||||
column.put(new int[] { 11, 22, 33});
|
||||
column.put(new int[] { 2 });
|
||||
column.put(new int[] { 444 });
|
||||
}
|
||||
try (var column = IntArrayColumn.open(tempDir, name)) {
|
||||
assertArrayEquals(new int[] { 11, 22, 33}, column.get());
|
||||
assertArrayEquals(new int[] { 2 }, column.get());
|
||||
assertArrayEquals(new int[] { 444 }, column.get());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,57 +0,0 @@
|
||||
package nu.marginalia.slop.column;
|
||||
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
class CodedSequenceColumnTest {
|
||||
Path tempDir;
|
||||
|
||||
@BeforeEach
|
||||
void setup() throws IOException {
|
||||
tempDir = Files.createTempDirectory(getClass().getSimpleName());
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void cleanup() {
|
||||
try {
|
||||
Files.walk(tempDir)
|
||||
.sorted(this::deleteOrder)
|
||||
.forEach(p -> {
|
||||
try {
|
||||
if (Files.isRegularFile(p)) {
|
||||
System.out.println("Deleting " + p + " " + Files.size(p));
|
||||
}
|
||||
Files.delete(p);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
int deleteOrder(Path a, Path b) {
|
||||
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
|
||||
return 1;
|
||||
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
|
||||
return -1;
|
||||
} else {
|
||||
return a.getNameCount() - b.getNameCount();
|
||||
}
|
||||
}
|
||||
|
||||
Path tempFile() {
|
||||
try {
|
||||
return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat");
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -1,93 +0,0 @@
|
||||
package nu.marginalia.slop.column;
|
||||
|
||||
import nu.marginalia.slop.column.string.EnumColumn;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnFunction;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
class EnumColumnTest {
|
||||
Path tempDir;
|
||||
|
||||
@BeforeEach
|
||||
void setup() throws IOException {
|
||||
tempDir = Files.createTempDirectory(getClass().getSimpleName());
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void cleanup() {
|
||||
try {
|
||||
Files.walk(tempDir)
|
||||
.sorted(this::deleteOrder)
|
||||
.forEach(p -> {
|
||||
try {
|
||||
if (Files.isRegularFile(p)) {
|
||||
System.out.println("Deleting " + p + " " + Files.size(p));
|
||||
}
|
||||
Files.delete(p);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
int deleteOrder(Path a, Path b) {
|
||||
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
|
||||
return 1;
|
||||
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
|
||||
return -1;
|
||||
} else {
|
||||
return a.getNameCount() - b.getNameCount();
|
||||
}
|
||||
}
|
||||
|
||||
Path tempFile() {
|
||||
try {
|
||||
return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat");
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void test() throws IOException {
|
||||
var name = new ColumnDesc("test",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.ENUM_BE,
|
||||
StorageType.PLAIN);
|
||||
|
||||
try (var column = EnumColumn.create(tempDir, name)) {
|
||||
column.put("Foo");
|
||||
column.put("Bar");
|
||||
column.put("Baz");
|
||||
column.put("Foo");
|
||||
column.put("Foo");
|
||||
column.put("Bar");
|
||||
column.put("Baz");
|
||||
}
|
||||
|
||||
try (var column = EnumColumn.open(tempDir, name)) {
|
||||
assertEquals("Foo", column.get());
|
||||
assertEquals("Bar", column.get());
|
||||
assertEquals("Baz", column.get());
|
||||
assertEquals("Foo", column.get());
|
||||
assertEquals("Foo", column.get());
|
||||
assertEquals("Bar", column.get());
|
||||
assertEquals("Baz", column.get());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,156 +0,0 @@
|
||||
package nu.marginalia.slop.column;
|
||||
|
||||
import nu.marginalia.slop.column.primitive.IntColumn;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnFunction;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class IntColumnTest {
|
||||
Path tempDir;
|
||||
|
||||
@BeforeEach
|
||||
void setup() throws IOException {
|
||||
tempDir = Files.createTempDirectory(getClass().getSimpleName());
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void cleanup() {
|
||||
try {
|
||||
Files.walk(tempDir)
|
||||
.sorted(this::deleteOrder)
|
||||
.forEach(p -> {
|
||||
try {
|
||||
if (Files.isRegularFile(p)) {
|
||||
System.out.println("Deleting " + p + " " + Files.size(p));
|
||||
}
|
||||
Files.delete(p);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
int deleteOrder(Path a, Path b) {
|
||||
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
|
||||
return 1;
|
||||
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
|
||||
return -1;
|
||||
} else {
|
||||
return a.getNameCount() - b.getNameCount();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void test() throws IOException {
|
||||
|
||||
var name = new ColumnDesc("test",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.INT_LE,
|
||||
StorageType.PLAIN
|
||||
);
|
||||
|
||||
try (var column = IntColumn.create(tempDir, name)) {
|
||||
column.put(42);
|
||||
column.put(43);
|
||||
}
|
||||
try (var column = IntColumn.open(tempDir, name)) {
|
||||
assertEquals(42, column.get());
|
||||
assertEquals(43, column.get());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void testLarge() throws IOException {
|
||||
var name = new ColumnDesc("test",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.INT_LE,
|
||||
StorageType.PLAIN
|
||||
);
|
||||
|
||||
try (var column = IntColumn.create(tempDir, name)) {
|
||||
for (int i = 0; i < 64; i++) {
|
||||
column.put(i);
|
||||
}
|
||||
}
|
||||
try (var column = IntColumn.open(tempDir, name)) {
|
||||
int i = 0;
|
||||
while (column.hasRemaining()) {
|
||||
assertEquals(i++, column.get());
|
||||
}
|
||||
assertEquals(64, i);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLargeBulk() throws IOException {
|
||||
var name = new ColumnDesc("test",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.INT_LE,
|
||||
StorageType.PLAIN
|
||||
);
|
||||
|
||||
|
||||
int[] values = new int[24];
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = i;
|
||||
}
|
||||
try (var column = IntColumn.create(tempDir, name)) {
|
||||
column.put(values);
|
||||
column.put(values);
|
||||
}
|
||||
try (var column = IntColumn.open(tempDir, name)) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
for (int j = 0; j < values.length; j++) {
|
||||
assertEquals(j, column.get());
|
||||
}
|
||||
}
|
||||
assertFalse(column.hasRemaining());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testSkip() throws IOException {
|
||||
var name = new ColumnDesc("test",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.INT_LE,
|
||||
StorageType.PLAIN
|
||||
);
|
||||
|
||||
|
||||
int[] values = new int[24];
|
||||
for (int i = 0; i < values.length; i++) {
|
||||
values[i] = i;
|
||||
}
|
||||
try (var column = IntColumn.create(tempDir, name)) {
|
||||
column.put(values);
|
||||
column.put(values);
|
||||
}
|
||||
try (var column = IntColumn.open(tempDir, name)) {
|
||||
column.get();
|
||||
column.get();
|
||||
column.skip(34);
|
||||
assertEquals(12, column.get());
|
||||
|
||||
assertTrue(column.hasRemaining());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,117 +0,0 @@
|
||||
package nu.marginalia.slop.column;
|
||||
|
||||
import nu.marginalia.slop.desc.*;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
|
||||
class StringColumnTest {
|
||||
Path tempDir;
|
||||
|
||||
@BeforeEach
|
||||
void setup() throws IOException {
|
||||
tempDir = Files.createTempDirectory(getClass().getSimpleName());
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void cleanup() {
|
||||
try {
|
||||
Files.walk(tempDir)
|
||||
.sorted(this::deleteOrder)
|
||||
.forEach(p -> {
|
||||
try {
|
||||
if (Files.isRegularFile(p)) {
|
||||
System.out.println("Deleting " + p + " " + Files.size(p));
|
||||
}
|
||||
Files.delete(p);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
int deleteOrder(Path a, Path b) {
|
||||
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
|
||||
return 1;
|
||||
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
|
||||
return -1;
|
||||
} else {
|
||||
return a.getNameCount() - b.getNameCount();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testArrayStr() throws IOException {
|
||||
var name = new ColumnDesc<>("test",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.STRING,
|
||||
StorageType.GZIP);
|
||||
|
||||
try (var table = new SlopTable()) {
|
||||
var column = name.create(table, tempDir);
|
||||
|
||||
column.put("Lorem");
|
||||
column.put("Ipsum");
|
||||
}
|
||||
try (var table = new SlopTable()) {
|
||||
var column = name.open(table, tempDir);
|
||||
|
||||
assertEquals("Lorem", column.get());
|
||||
assertEquals("Ipsum", column.get());
|
||||
assertFalse(column.hasRemaining());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testCStr() throws IOException {
|
||||
var name = new ColumnDesc<>("test",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.CSTRING,
|
||||
StorageType.GZIP);
|
||||
|
||||
try (var table = new SlopTable()) {
|
||||
var column = name.create(table, tempDir);
|
||||
column.put("Lorem");
|
||||
column.put("Ipsum");
|
||||
}
|
||||
try (var table = new SlopTable()) {
|
||||
var column = name.open(table, tempDir);
|
||||
assertEquals("Lorem", column.get());
|
||||
assertEquals("Ipsum", column.get());
|
||||
assertFalse(column.hasRemaining());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testTxtStr() throws IOException {
|
||||
var name = new ColumnDesc<>("test",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.TXTSTRING,
|
||||
StorageType.GZIP);
|
||||
|
||||
try (var table = new SlopTable()) {
|
||||
var column = name.create(table, tempDir);
|
||||
column.put("Lorem");
|
||||
column.put("Ipsum");
|
||||
}
|
||||
try (var table = new SlopTable()) {
|
||||
var column = name.open(table, tempDir);
|
||||
assertEquals("Lorem", column.get());
|
||||
assertEquals("Ipsum", column.get());
|
||||
assertFalse(column.hasRemaining());
|
||||
}
|
||||
}
|
||||
}
|
@ -1,150 +0,0 @@
|
||||
package nu.marginalia.slop.column;
|
||||
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumn;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnFunction;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
class VarintColumnTest {
|
||||
Path tempDir;
|
||||
|
||||
@BeforeEach
|
||||
void setup() throws IOException {
|
||||
tempDir = Files.createTempDirectory(getClass().getSimpleName());
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void cleanup() {
|
||||
try {
|
||||
Files.walk(tempDir)
|
||||
.sorted(this::deleteOrder)
|
||||
.forEach(p -> {
|
||||
try {
|
||||
if (Files.isRegularFile(p)) {
|
||||
System.out.println("Deleting " + p + " " + Files.size(p));
|
||||
}
|
||||
Files.delete(p);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
int deleteOrder(Path a, Path b) {
|
||||
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
|
||||
return 1;
|
||||
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
|
||||
return -1;
|
||||
} else {
|
||||
return a.getNameCount() - b.getNameCount();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void test() throws IOException {
|
||||
var name = new ColumnDesc("test",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.VARINT_LE,
|
||||
StorageType.PLAIN);
|
||||
|
||||
try (var column = VarintColumn.create(tempDir, name)) {
|
||||
column.put(42);
|
||||
column.put(43);
|
||||
column.put(65534);
|
||||
column.put(1);
|
||||
column.put(0);
|
||||
column.put(6000000000L);
|
||||
column.put(1);
|
||||
}
|
||||
try (var column = VarintColumn.open(tempDir, name)) {
|
||||
assertEquals(42, column.get());
|
||||
assertEquals(43, column.get());
|
||||
assertEquals(65534, column.get());
|
||||
assertEquals(1, column.get());
|
||||
assertEquals(0, column.get());
|
||||
assertEquals(6000000000L, column.getLong());
|
||||
assertEquals(1, column.get());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void test22() throws IOException {
|
||||
var name = new ColumnDesc("test",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.VARINT_LE,
|
||||
StorageType.PLAIN);
|
||||
|
||||
try (var column = VarintColumn.create(tempDir, name)) {
|
||||
column.put(2);
|
||||
column.put(2);
|
||||
}
|
||||
try (var column = VarintColumn.open(tempDir, name)) {
|
||||
assertEquals(2, column.get());
|
||||
assertEquals(2, column.get());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testFuzz() throws IOException {
|
||||
var name1 = new ColumnDesc("test1",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.VARINT_LE,
|
||||
StorageType.PLAIN);
|
||||
|
||||
var name2 = new ColumnDesc("test2",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.VARINT_BE,
|
||||
StorageType.PLAIN);
|
||||
|
||||
List<Long> values = new ArrayList<>();
|
||||
var rand = new Random();
|
||||
|
||||
for (int i = 0; i < 50_000; i++) {
|
||||
values.add(rand.nextLong(0, Short.MAX_VALUE));
|
||||
values.add(rand.nextLong(0, Byte.MAX_VALUE));
|
||||
values.add(rand.nextLong(0, Integer.MAX_VALUE));
|
||||
values.add(rand.nextLong(0, Long.MAX_VALUE));
|
||||
}
|
||||
|
||||
try (var column1 = VarintColumn.create(tempDir, name1);
|
||||
var column2 = VarintColumn.create(tempDir, name2)
|
||||
) {
|
||||
for (var value : values) {
|
||||
column1.put(value);
|
||||
column2.put(value);
|
||||
}
|
||||
}
|
||||
try (var column1 = VarintColumn.open(tempDir, name1);
|
||||
var column2 = VarintColumn.open(tempDir, name2)
|
||||
) {
|
||||
int idx = 0;
|
||||
for (var value : values) {
|
||||
idx++;
|
||||
assertEquals(value, column1.getLong(), " idx: " + idx);
|
||||
assertEquals(value, column2.getLong());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
package nu.marginalia.slop.desc;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.nio.ByteOrder;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
class ColumnDescTest {
|
||||
@Test
|
||||
void testParse() {
|
||||
ColumnDesc name = ColumnDesc.parse("foo.0.dat.s32le.bin");
|
||||
assertEquals("foo.0.dat.s32le.bin", name.toString());
|
||||
assertEquals("foo", name.name());
|
||||
assertEquals(0, name.page());
|
||||
assertEquals(ByteOrder.LITTLE_ENDIAN, name.byteOrder());
|
||||
assertEquals(ColumnFunction.DATA, name.function());
|
||||
assertEquals(ColumnType.INT_LE, name.type());
|
||||
assertEquals(StorageType.PLAIN, name.storageType());
|
||||
|
||||
name = ColumnDesc.parse("bar.1.dat-len.fp32be.gz");
|
||||
assertEquals("bar.1.dat-len.fp32be.gz", name.toString());
|
||||
assertEquals("bar", name.name());
|
||||
assertEquals(1, name.page());
|
||||
assertEquals(ByteOrder.BIG_ENDIAN, name.byteOrder());
|
||||
assertEquals(ColumnFunction.DATA_LEN, name.function());
|
||||
assertEquals(ColumnType.FLOAT_BE, name.type());
|
||||
assertEquals(StorageType.GZIP, name.storageType());
|
||||
|
||||
|
||||
}
|
||||
}
|
@ -1,215 +0,0 @@
|
||||
package nu.marginalia.slop.desc;
|
||||
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
public class SlopTableTest {
|
||||
Path tempDir;
|
||||
|
||||
@BeforeEach
|
||||
void setup() throws IOException {
|
||||
tempDir = Files.createTempDirectory(getClass().getSimpleName());
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void cleanup() {
|
||||
try {
|
||||
Files.walk(tempDir)
|
||||
.sorted(this::deleteOrder)
|
||||
.forEach(p -> {
|
||||
try {
|
||||
if (Files.isRegularFile(p)) {
|
||||
System.out.println("Deleting " + p + " " + Files.size(p));
|
||||
}
|
||||
Files.delete(p);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
int deleteOrder(Path a, Path b) {
|
||||
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
|
||||
return 1;
|
||||
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
|
||||
return -1;
|
||||
} else {
|
||||
return a.getNameCount() - b.getNameCount();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEmpty() throws IOException {
|
||||
SlopTable slopTable = new SlopTable();
|
||||
slopTable.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPositionsGood() throws IOException {
|
||||
var name1 = new ColumnDesc<>("test1",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.INT_LE,
|
||||
StorageType.PLAIN
|
||||
);
|
||||
var name2 = new ColumnDesc<>("test2",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.INT_LE,
|
||||
StorageType.PLAIN
|
||||
);
|
||||
|
||||
try (SlopTable writerTable = new SlopTable()) {
|
||||
var column1 = name1.create(writerTable, tempDir);
|
||||
var column2 = name2.create(writerTable, tempDir);
|
||||
|
||||
column1.put(42);
|
||||
column2.put(43);
|
||||
}
|
||||
|
||||
|
||||
try (SlopTable readerTable = new SlopTable()) {
|
||||
var column1 = name1.open(readerTable, tempDir);
|
||||
var column2 = name2.open(readerTable, tempDir);
|
||||
|
||||
assertEquals(42, column1.get());
|
||||
assertEquals(43, column2.get());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testPositionsMisaligned() throws IOException {
|
||||
var name1 = new ColumnDesc<>("test1",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.INT_LE,
|
||||
StorageType.PLAIN
|
||||
);
|
||||
var name2 = new ColumnDesc<>("test2",
|
||||
0,
|
||||
ColumnFunction.DATA,
|
||||
ColumnType.INT_LE,
|
||||
StorageType.PLAIN
|
||||
);
|
||||
|
||||
boolean sawException = false;
|
||||
try (SlopTable writerTable = new SlopTable()) {
|
||||
var column1 = name1.create(writerTable, tempDir);
|
||||
var column2 = name2.create(writerTable, tempDir);
|
||||
|
||||
column1.put(42);
|
||||
column2.put(43);
|
||||
column2.put(44);
|
||||
}
|
||||
catch (Exception ex) {
|
||||
ex.printStackTrace();
|
||||
sawException = true;
|
||||
}
|
||||
assertEquals(true, sawException);
|
||||
|
||||
}
|
||||
|
||||
|
||||
// Sanity check for the implementation of position() in the column classes
|
||||
@Test
|
||||
public void testPositionsMegatest() throws IOException {
|
||||
var byteCol = new ColumnDesc<>("byte", ColumnType.BYTE, StorageType.PLAIN);
|
||||
var charCol = new ColumnDesc<>("char", ColumnType.CHAR_LE, StorageType.PLAIN);
|
||||
var intCol = new ColumnDesc<>("int", ColumnType.INT_LE, StorageType.PLAIN);
|
||||
var longCol = new ColumnDesc<>("long", ColumnType.LONG_LE, StorageType.PLAIN);
|
||||
var floatCol = new ColumnDesc<>("float", ColumnType.FLOAT_LE, StorageType.PLAIN);
|
||||
var doubleCol = new ColumnDesc<>("double", ColumnType.DOUBLE_LE, StorageType.PLAIN);
|
||||
var byteArrayCol = new ColumnDesc<>("byteArray", ColumnType.BYTE_ARRAY, StorageType.PLAIN);
|
||||
var intArrayCol = new ColumnDesc<>("intArray", ColumnType.INT_ARRAY_LE, StorageType.PLAIN);
|
||||
var longArrayCol = new ColumnDesc<>("longArray", ColumnType.LONG_ARRAY_LE, StorageType.PLAIN);
|
||||
var cstringCol = new ColumnDesc<>("cstring", ColumnType.CSTRING, StorageType.PLAIN);
|
||||
var txtStringCol = new ColumnDesc<>("txtString", ColumnType.TXTSTRING, StorageType.PLAIN);
|
||||
var arrayStringCol = new ColumnDesc<>("arrayString", ColumnType.STRING, StorageType.PLAIN);
|
||||
var varintCol = new ColumnDesc<>("varint", ColumnType.VARINT_LE, StorageType.PLAIN);
|
||||
var enumCol = new ColumnDesc<>("enum", ColumnType.ENUM_LE, StorageType.PLAIN);
|
||||
|
||||
try (SlopTable writerTable = new SlopTable()) {
|
||||
var byteColumn = byteCol.create(writerTable, tempDir);
|
||||
var charColumn = charCol.create(writerTable, tempDir);
|
||||
var intColumn = intCol.create(writerTable, tempDir);
|
||||
var longColumn = longCol.create(writerTable, tempDir);
|
||||
var floatColumn = floatCol.create(writerTable, tempDir);
|
||||
var doubleColumn = doubleCol.create(writerTable, tempDir);
|
||||
var byteArrayColumn = byteArrayCol.create(writerTable, tempDir);
|
||||
|
||||
var intArrayColumn = intArrayCol.create(writerTable, tempDir);
|
||||
var longArrayColumn = longArrayCol.create(writerTable, tempDir);
|
||||
var cstringColumn = cstringCol.create(writerTable, tempDir);
|
||||
var txtStringColumn = txtStringCol.create(writerTable, tempDir);
|
||||
var arrayStringColumn = arrayStringCol.create(writerTable, tempDir);
|
||||
var enumColumn = enumCol.create(writerTable, tempDir);
|
||||
var varintColumn = varintCol.create(writerTable, tempDir);
|
||||
|
||||
byteColumn.put((byte) 42);
|
||||
charColumn.put('a');
|
||||
intColumn.put(42);
|
||||
longColumn.put(42L);
|
||||
floatColumn.put(42.0f);
|
||||
doubleColumn.put(42.0);
|
||||
|
||||
byteArrayColumn.put(new byte[] { 42, 43, 44 });
|
||||
intArrayColumn.put(new int[] { 42, 43, 44 });
|
||||
longArrayColumn.put(new long[] { 42, 43, 44 });
|
||||
|
||||
cstringColumn.put("Hello");
|
||||
txtStringColumn.put("Hello");
|
||||
arrayStringColumn.put("Hello");
|
||||
enumColumn.put("Hello");
|
||||
|
||||
varintColumn.put(10000000);
|
||||
}
|
||||
|
||||
try (SlopTable readerTable = new SlopTable()) {
|
||||
var byteColumn = byteCol.open(readerTable, tempDir);
|
||||
var charColumn = charCol.open(readerTable, tempDir);
|
||||
var intColumn = intCol.open(readerTable, tempDir);
|
||||
var longColumn = longCol.open(readerTable, tempDir);
|
||||
var floatColumn = floatCol.open(readerTable, tempDir);
|
||||
var doubleColumn = doubleCol.open(readerTable, tempDir);
|
||||
var byteArrayColumn = byteArrayCol.open(readerTable, tempDir);
|
||||
var intArrayColumn = intArrayCol.open(readerTable, tempDir);
|
||||
var longArrayColumn = longArrayCol.open(readerTable, tempDir);
|
||||
var cstringColumn = cstringCol.open(readerTable, tempDir);
|
||||
var txtStringColumn = txtStringCol.open(readerTable, tempDir);
|
||||
var arrayStringColumn = arrayStringCol.open(readerTable, tempDir);
|
||||
var enumColumn = enumCol.open(readerTable, tempDir);
|
||||
var varintColumn = varintCol.open(readerTable, tempDir);
|
||||
|
||||
assertEquals(42, byteColumn.get());
|
||||
assertEquals('a', charColumn.get());
|
||||
assertEquals(42, intColumn.get());
|
||||
assertEquals(42L, longColumn.get());
|
||||
assertEquals(42.0f, floatColumn.get());
|
||||
assertEquals(42.0, doubleColumn.get());
|
||||
|
||||
assertArrayEquals(new byte[] {42, 43, 44}, byteArrayColumn.get());
|
||||
assertArrayEquals(new int[] {42, 43, 44}, intArrayColumn.get());
|
||||
assertArrayEquals(new long[] {42, 43, 44}, longArrayColumn.get());
|
||||
|
||||
assertEquals("Hello", cstringColumn.get());
|
||||
assertEquals("Hello", txtStringColumn.get());
|
||||
assertEquals("Hello", arrayStringColumn.get());
|
||||
assertEquals("Hello", enumColumn.get());
|
||||
|
||||
assertEquals(10000000, varintColumn.get());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -1,308 +0,0 @@
|
||||
package nu.marginalia.slop.storage;
|
||||
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class CompressingStorageWriterAndReaderTest {
|
||||
Path tempDir;
|
||||
|
||||
@BeforeEach
|
||||
void setup() throws IOException {
|
||||
tempDir = Files.createTempDirectory(getClass().getSimpleName());
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void cleanup() {
|
||||
try {
|
||||
Files.walk(tempDir)
|
||||
.sorted(this::deleteOrder)
|
||||
.forEach(p -> {
|
||||
try {
|
||||
if (Files.isRegularFile(p)) {
|
||||
System.out.println("Deleting " + p + " " + Files.size(p));
|
||||
}
|
||||
Files.delete(p);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
int deleteOrder(Path a, Path b) {
|
||||
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
|
||||
return 1;
|
||||
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
|
||||
return -1;
|
||||
} else {
|
||||
return a.getNameCount() - b.getNameCount();
|
||||
}
|
||||
}
|
||||
|
||||
Path tempFile() {
|
||||
try {
|
||||
return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat");
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
StorageWriter writer(Path path) {
|
||||
try {
|
||||
return new CompressingStorageWriter(path, StorageType.GZIP, ByteOrder.LITTLE_ENDIAN, 63);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
StorageReader reader(Path path) {
|
||||
try {
|
||||
return new CompressingStorageReader(path, StorageType.GZIP, ByteOrder.LITTLE_ENDIAN, 63);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void putByte() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, writer.position());
|
||||
writer.putByte((byte) i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertTrue(reader.hasRemaining());
|
||||
assertEquals(i, reader.position());
|
||||
|
||||
assertEquals((byte) i, reader.getByte());
|
||||
}
|
||||
assertFalse(reader.hasRemaining());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putByteSkipReader() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, writer.position());
|
||||
writer.putByte((byte) i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
assertEquals(0, reader.position());
|
||||
assertEquals((byte) 0, reader.getByte());
|
||||
assertEquals(1, reader.position());
|
||||
assertEquals((byte) 1, reader.getByte());
|
||||
reader.skip(64, 1);
|
||||
assertEquals(66, reader.position());
|
||||
assertEquals((byte) 66, reader.getByte());
|
||||
assertEquals(67, reader.position());
|
||||
reader.skip(2, 3);
|
||||
assertEquals(73, reader.position());
|
||||
assertEquals((byte) 73, reader.getByte());
|
||||
}
|
||||
}
|
||||
@Test
|
||||
void putShort() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putByte((byte) i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals((byte) i, reader.getByte());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putChar() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putChar((char) i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals((char) i, reader.getChar());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putInt() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putInt(i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, reader.getInt());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putLong() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putLong(i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, reader.getLong());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putFloat() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putFloat(i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, reader.getFloat());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putDouble() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putDouble(i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, reader.getDouble());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putBytes() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
byte[] data = new byte[2];
|
||||
data[0] = (byte) i;
|
||||
data[1] = (byte) (i + 1);
|
||||
writer.putBytes(data);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
byte[] data = new byte[2];
|
||||
reader.getBytes(data);
|
||||
assertEquals((byte) i, data[0]);
|
||||
assertEquals((byte) (i + 1), data[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPutBytes() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
byte[] data = new byte[4];
|
||||
data[1] = (byte) i;
|
||||
data[2] = (byte) (i + 1);
|
||||
writer.putBytes(data, 1, 2);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
byte[] data = new byte[4];
|
||||
reader.getBytes(data, 1, 2);
|
||||
assertEquals((byte) i, data[1]);
|
||||
assertEquals((byte) (i + 1), data[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPutBytesViaBuffer() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
ByteBuffer buffer = ByteBuffer.allocate(4);
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
buffer.clear();
|
||||
buffer.put(new byte[] { (byte) i, (byte) (i+1), (byte) (i + 2), (byte) (i+3) });
|
||||
buffer.flip();
|
||||
writer.putBytes(buffer);
|
||||
|
||||
assertFalse(buffer.hasRemaining());
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
buffer.clear();
|
||||
reader.getBytes(buffer);
|
||||
buffer.flip();
|
||||
|
||||
assertEquals(4, buffer.remaining());
|
||||
|
||||
assertEquals((byte) i, buffer.get());
|
||||
assertEquals((byte) (i + 1), buffer.get());
|
||||
assertEquals((byte) (i + 2), buffer.get());
|
||||
assertEquals((byte) (i + 3), buffer.get());
|
||||
|
||||
assertFalse(buffer.hasRemaining());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,307 +0,0 @@
|
||||
package nu.marginalia.slop.storage;
|
||||
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class SimpleStorageWriterAndMmapReaderTest {
|
||||
Path tempDir;
|
||||
|
||||
@BeforeEach
|
||||
void setup() throws IOException {
|
||||
tempDir = Files.createTempDirectory(getClass().getSimpleName());
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void cleanup() {
|
||||
try {
|
||||
Files.walk(tempDir)
|
||||
.sorted(this::deleteOrder)
|
||||
.forEach(p -> {
|
||||
try {
|
||||
if (Files.isRegularFile(p)) {
|
||||
System.out.println("Deleting " + p + " " + Files.size(p));
|
||||
}
|
||||
Files.delete(p);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
int deleteOrder(Path a, Path b) {
|
||||
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
|
||||
return 1;
|
||||
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
|
||||
return -1;
|
||||
} else {
|
||||
return a.getNameCount() - b.getNameCount();
|
||||
}
|
||||
}
|
||||
|
||||
Path tempFile() {
|
||||
try {
|
||||
return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat");
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
StorageWriter writer(Path path) {
|
||||
try {
|
||||
return new SimpleStorageWriter(path, ByteOrder.LITTLE_ENDIAN, 63);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
StorageReader reader(Path path) {
|
||||
try {
|
||||
return new MmapStorageReader(path);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putByte() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, writer.position());
|
||||
writer.putByte((byte) i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertTrue(reader.hasRemaining());
|
||||
assertEquals(i, reader.position());
|
||||
|
||||
assertEquals((byte) i, reader.getByte());
|
||||
}
|
||||
assertFalse(reader.hasRemaining());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putByteSkipReader() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, writer.position());
|
||||
writer.putByte((byte) i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
assertEquals(0, reader.position());
|
||||
assertEquals((byte) 0, reader.getByte());
|
||||
assertEquals(1, reader.position());
|
||||
assertEquals((byte) 1, reader.getByte());
|
||||
reader.skip(64, 1);
|
||||
assertEquals(66, reader.position());
|
||||
assertEquals((byte) 66, reader.getByte());
|
||||
assertEquals(67, reader.position());
|
||||
reader.skip(2, 3);
|
||||
assertEquals(73, reader.position());
|
||||
assertEquals((byte) 73, reader.getByte());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putShort() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putByte((byte) i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals((byte) i, reader.getByte());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putChar() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putChar((char) i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals((char) i, reader.getChar());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putInt() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putInt(i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, reader.getInt());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putLong() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putLong(i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, reader.getLong());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putFloat() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putFloat(i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, reader.getFloat());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putDouble() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putDouble(i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, reader.getDouble());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putBytes() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
byte[] data = new byte[2];
|
||||
data[0] = (byte) i;
|
||||
data[1] = (byte) (i + 1);
|
||||
writer.putBytes(data);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
byte[] data = new byte[2];
|
||||
reader.getBytes(data);
|
||||
assertEquals((byte) i, data[0]);
|
||||
assertEquals((byte) (i + 1), data[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPutBytes() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
byte[] data = new byte[4];
|
||||
data[1] = (byte) i;
|
||||
data[2] = (byte) (i + 1);
|
||||
writer.putBytes(data, 1, 2);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
byte[] data = new byte[4];
|
||||
reader.getBytes(data, 1, 2);
|
||||
assertEquals((byte) i, data[1]);
|
||||
assertEquals((byte) (i + 1), data[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPutBytesViaBuffer() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
ByteBuffer buffer = ByteBuffer.allocate(4);
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
buffer.clear();
|
||||
buffer.put(new byte[] { (byte) i, (byte) (i+1), (byte) (i + 2), (byte) (i+3) });
|
||||
buffer.flip();
|
||||
writer.putBytes(buffer);
|
||||
|
||||
assertFalse(buffer.hasRemaining());
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
buffer.clear();
|
||||
reader.getBytes(buffer);
|
||||
buffer.flip();
|
||||
|
||||
assertEquals(4, buffer.remaining());
|
||||
|
||||
assertEquals((byte) i, buffer.get());
|
||||
assertEquals((byte) (i + 1), buffer.get());
|
||||
assertEquals((byte) (i + 2), buffer.get());
|
||||
assertEquals((byte) (i + 3), buffer.get());
|
||||
|
||||
assertFalse(buffer.hasRemaining());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,307 +0,0 @@
|
||||
package nu.marginalia.slop.storage;
|
||||
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class SimpleStorageWriterAndReaderTest {
|
||||
Path tempDir;
|
||||
|
||||
@BeforeEach
|
||||
void setup() throws IOException {
|
||||
tempDir = Files.createTempDirectory(getClass().getSimpleName());
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void cleanup() {
|
||||
try {
|
||||
Files.walk(tempDir)
|
||||
.sorted(this::deleteOrder)
|
||||
.forEach(p -> {
|
||||
try {
|
||||
if (Files.isRegularFile(p)) {
|
||||
System.out.println("Deleting " + p + " " + Files.size(p));
|
||||
}
|
||||
Files.delete(p);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
int deleteOrder(Path a, Path b) {
|
||||
if (Files.isDirectory(a) && !Files.isDirectory(b)) {
|
||||
return 1;
|
||||
} else if (!Files.isDirectory(a) && Files.isDirectory(b)) {
|
||||
return -1;
|
||||
} else {
|
||||
return a.getNameCount() - b.getNameCount();
|
||||
}
|
||||
}
|
||||
|
||||
Path tempFile() {
|
||||
try {
|
||||
return Files.createTempFile(tempDir, getClass().getSimpleName(), ".dat");
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
StorageWriter writer(Path path) {
|
||||
try {
|
||||
return new SimpleStorageWriter(path, ByteOrder.LITTLE_ENDIAN, 63);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
StorageReader reader(Path path) {
|
||||
try {
|
||||
return new SimpleStorageReader(path, ByteOrder.LITTLE_ENDIAN, 63);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putByte() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, writer.position());
|
||||
writer.putByte((byte) i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertTrue(reader.hasRemaining());
|
||||
assertEquals(i, reader.position());
|
||||
|
||||
assertEquals((byte) i, reader.getByte());
|
||||
}
|
||||
assertFalse(reader.hasRemaining());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putByteSkipReader() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, writer.position());
|
||||
writer.putByte((byte) i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
assertEquals(0, reader.position());
|
||||
assertEquals((byte) 0, reader.getByte());
|
||||
assertEquals(1, reader.position());
|
||||
assertEquals((byte) 1, reader.getByte());
|
||||
reader.skip(64, 1);
|
||||
assertEquals(66, reader.position());
|
||||
assertEquals((byte) 66, reader.getByte());
|
||||
assertEquals(67, reader.position());
|
||||
reader.skip(2, 3);
|
||||
assertEquals(73, reader.position());
|
||||
assertEquals((byte) 73, reader.getByte());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putShort() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putByte((byte) i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals((byte) i, reader.getByte());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putChar() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putChar((char) i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals((char) i, reader.getChar());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putInt() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putInt(i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, reader.getInt());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putLong() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putLong(i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, reader.getLong());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putFloat() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putFloat(i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, reader.getFloat());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putDouble() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
writer.putDouble(i);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
assertEquals(i, reader.getDouble());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void putBytes() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
byte[] data = new byte[2];
|
||||
data[0] = (byte) i;
|
||||
data[1] = (byte) (i + 1);
|
||||
writer.putBytes(data);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
byte[] data = new byte[2];
|
||||
reader.getBytes(data);
|
||||
assertEquals((byte) i, data[0]);
|
||||
assertEquals((byte) (i + 1), data[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPutBytes() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
byte[] data = new byte[4];
|
||||
data[1] = (byte) i;
|
||||
data[2] = (byte) (i + 1);
|
||||
writer.putBytes(data, 1, 2);
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
byte[] data = new byte[4];
|
||||
reader.getBytes(data, 1, 2);
|
||||
assertEquals((byte) i, data[1]);
|
||||
assertEquals((byte) (i + 1), data[2]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testPutBytesViaBuffer() throws IOException {
|
||||
Path p = tempFile();
|
||||
|
||||
ByteBuffer buffer = ByteBuffer.allocate(4);
|
||||
try (var writer = writer(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
buffer.clear();
|
||||
buffer.put(new byte[] { (byte) i, (byte) (i+1), (byte) (i + 2), (byte) (i+3) });
|
||||
buffer.flip();
|
||||
writer.putBytes(buffer);
|
||||
|
||||
assertFalse(buffer.hasRemaining());
|
||||
}
|
||||
}
|
||||
|
||||
try (var reader = reader(p)) {
|
||||
for (int i = 0; i < 127; i++) {
|
||||
buffer.clear();
|
||||
reader.getBytes(buffer);
|
||||
buffer.flip();
|
||||
|
||||
assertEquals(4, buffer.remaining());
|
||||
|
||||
assertEquals((byte) i, buffer.get());
|
||||
assertEquals((byte) (i + 1), buffer.get());
|
||||
assertEquals((byte) (i + 2), buffer.get());
|
||||
assertEquals((byte) (i + 3), buffer.get());
|
||||
|
||||
assertFalse(buffer.hasRemaining());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -36,7 +36,6 @@ dependencies {
|
||||
implementation project(':code:common:config')
|
||||
implementation project(':code:libraries:message-queue')
|
||||
implementation project(':code:libraries:blocking-thread-pool')
|
||||
implementation project(':code:libraries:slop')
|
||||
|
||||
implementation project(':code:libraries:guarded-regex')
|
||||
implementation project(':code:libraries:easy-lsh')
|
||||
@ -57,6 +56,7 @@ dependencies {
|
||||
testImplementation project(':code:libraries:term-frequency-dict')
|
||||
testImplementation project(':code:processes:crawling-process:model')
|
||||
|
||||
implementation libs.slop
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.notnull
|
||||
|
@ -17,10 +17,10 @@ jar.archiveBaseName = 'converting-process-model'
|
||||
dependencies {
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation project(':code:libraries:slop')
|
||||
implementation project(':third-party:parquet-floor')
|
||||
implementation project(':code:libraries:coded-sequence')
|
||||
|
||||
implementation libs.slop
|
||||
implementation libs.notnull
|
||||
implementation libs.roaringbitmap
|
||||
implementation libs.trove
|
||||
|
@ -5,6 +5,7 @@ import nu.marginalia.sequence.GammaCodedSequence;
|
||||
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayColumn;
|
||||
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayReader;
|
||||
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayWriter;
|
||||
import nu.marginalia.slop.ColumnTypes;
|
||||
import nu.marginalia.slop.column.array.ByteArrayColumnReader;
|
||||
import nu.marginalia.slop.column.array.ByteArrayColumnWriter;
|
||||
import nu.marginalia.slop.column.array.ObjectArrayColumnReader;
|
||||
@ -16,7 +17,6 @@ import nu.marginalia.slop.column.string.EnumColumnReader;
|
||||
import nu.marginalia.slop.column.string.StringColumnReader;
|
||||
import nu.marginalia.slop.column.string.StringColumnWriter;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
@ -111,30 +111,30 @@ public record SlopDocumentRecord(
|
||||
}
|
||||
|
||||
// Basic information
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> domainsColumn = new ColumnDesc<>("domain", ColumnType.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> urlsColumn = new ColumnDesc<>("url", ColumnType.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<VarintColumnReader, VarintColumnWriter> ordinalsColumn = new ColumnDesc<>("ordinal", ColumnType.VARINT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<EnumColumnReader, StringColumnWriter> statesColumn = new ColumnDesc<>("state", ColumnType.ENUM_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> stateReasonsColumn = new ColumnDesc<>("stateReason", ColumnType.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> domainsColumn = new ColumnDesc<>("domain", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> urlsColumn = new ColumnDesc<>("url", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<VarintColumnReader, VarintColumnWriter> ordinalsColumn = new ColumnDesc<>("ordinal", ColumnTypes.VARINT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<EnumColumnReader, StringColumnWriter> statesColumn = new ColumnDesc<>("state", ColumnTypes.ENUM_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> stateReasonsColumn = new ColumnDesc<>("stateReason", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
|
||||
// Document metadata
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> titlesColumn = new ColumnDesc<>("title", ColumnType.STRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> descriptionsColumn = new ColumnDesc<>("description", ColumnType.STRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<EnumColumnReader, StringColumnWriter> htmlStandardsColumn = new ColumnDesc<>("htmlStandard", ColumnType.ENUM_LE, StorageType.GZIP);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> htmlFeaturesColumn = new ColumnDesc<>("htmlFeatures", ColumnType.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> lengthsColumn = new ColumnDesc<>("length", ColumnType.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> pubYearColumn = new ColumnDesc<>("pubYear", ColumnType.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<LongColumnReader, LongColumnWriter> hashesColumn = new ColumnDesc<>("hash", ColumnType.LONG_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<FloatColumnReader, FloatColumnWriter> qualitiesColumn = new ColumnDesc<>("quality", ColumnType.FLOAT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<LongColumnReader, LongColumnWriter> domainMetadata = new ColumnDesc<>("domainMetadata", ColumnType.LONG_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> titlesColumn = new ColumnDesc<>("title", ColumnTypes.STRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> descriptionsColumn = new ColumnDesc<>("description", ColumnTypes.STRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<EnumColumnReader, StringColumnWriter> htmlStandardsColumn = new ColumnDesc<>("htmlStandard", ColumnTypes.ENUM_LE, StorageType.GZIP);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> htmlFeaturesColumn = new ColumnDesc<>("htmlFeatures", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> lengthsColumn = new ColumnDesc<>("length", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> pubYearColumn = new ColumnDesc<>("pubYear", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<LongColumnReader, LongColumnWriter> hashesColumn = new ColumnDesc<>("hash", ColumnTypes.LONG_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<FloatColumnReader, FloatColumnWriter> qualitiesColumn = new ColumnDesc<>("quality", ColumnTypes.FLOAT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<LongColumnReader, LongColumnWriter> domainMetadata = new ColumnDesc<>("domainMetadata", ColumnTypes.LONG_LE, StorageType.PLAIN);
|
||||
|
||||
// Keyword-level columns, these are enumerated by the counts column
|
||||
private static final ColumnDesc<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> keywordsColumn = new ColumnDesc<>("keywords", ColumnType.STRING_ARRAY, StorageType.ZSTD);
|
||||
private static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> termMetaColumn = new ColumnDesc<>("termMetadata", ColumnType.BYTE_ARRAY, StorageType.ZSTD);
|
||||
private static final ColumnDesc<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> keywordsColumn = new ColumnDesc<>("keywords", ColumnTypes.STRING_ARRAY, StorageType.ZSTD);
|
||||
private static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> termMetaColumn = new ColumnDesc<>("termMetadata", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD);
|
||||
private static final ColumnDesc<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> termPositionsColumn = new ColumnDesc<>("termPositions", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD);
|
||||
|
||||
// Spans columns
|
||||
private static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> spanCodesColumn = new ColumnDesc<>("spanCodes", ColumnType.BYTE_ARRAY, StorageType.ZSTD);
|
||||
private static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> spanCodesColumn = new ColumnDesc<>("spanCodes", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD);
|
||||
private static final ColumnDesc<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> spansColumn = new ColumnDesc<>("spans", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD);
|
||||
|
||||
public static class KeywordsProjectionReader extends SlopTable {
|
||||
@ -156,18 +156,19 @@ public record SlopDocumentRecord(
|
||||
}
|
||||
|
||||
public KeywordsProjectionReader(Path baseDir, int page) throws IOException {
|
||||
domainsReader = domainsColumn.forPage(page).open(this, baseDir);
|
||||
ordinalsReader = ordinalsColumn.forPage(page).open(this, baseDir);
|
||||
htmlFeaturesReader = htmlFeaturesColumn.forPage(page).open(this, baseDir);
|
||||
domainMetadataReader = domainMetadata.forPage(page).open(this, baseDir);
|
||||
lengthsReader = lengthsColumn.forPage(page).open(this, baseDir);
|
||||
super(page);
|
||||
domainsReader = domainsColumn.open(this, baseDir);
|
||||
ordinalsReader = ordinalsColumn.open(this, baseDir);
|
||||
htmlFeaturesReader = htmlFeaturesColumn.open(this, baseDir);
|
||||
domainMetadataReader = domainMetadata.open(this, baseDir);
|
||||
lengthsReader = lengthsColumn.open(this, baseDir);
|
||||
|
||||
keywordsReader = keywordsColumn.forPage(page).open(this, baseDir);
|
||||
termMetaReader = termMetaColumn.forPage(page).open(this, baseDir);
|
||||
termPositionsReader = termPositionsColumn.forPage(page).open(this, baseDir);
|
||||
keywordsReader = keywordsColumn.open(this, baseDir);
|
||||
termMetaReader = termMetaColumn.open(this, baseDir);
|
||||
termPositionsReader = termPositionsColumn.open(this, baseDir);
|
||||
|
||||
spanCodesReader = spanCodesColumn.forPage(page).open(this, baseDir);
|
||||
spansReader = spansColumn.forPage(page).open(this, baseDir);
|
||||
spanCodesReader = spanCodesColumn.open(this, baseDir);
|
||||
spansReader = spansColumn.open(this, baseDir);
|
||||
}
|
||||
|
||||
public boolean hasMore() throws IOException {
|
||||
@ -223,17 +224,19 @@ public record SlopDocumentRecord(
|
||||
}
|
||||
|
||||
public MetadataReader(Path baseDir, int page) throws IOException {
|
||||
this.domainsReader = domainsColumn.forPage(page).open(this, baseDir);
|
||||
this.urlsReader = urlsColumn.forPage(page).open(this, baseDir);
|
||||
this.ordinalsReader = ordinalsColumn.forPage(page).open(this, baseDir);
|
||||
this.titlesReader = titlesColumn.forPage(page).open(this, baseDir);
|
||||
this.descriptionsReader = descriptionsColumn.forPage(page).open(this, baseDir);
|
||||
this.htmlFeaturesReader = htmlFeaturesColumn.forPage(page).open(this, baseDir);
|
||||
this.htmlStandardsReader = htmlStandardsColumn.forPage(page).open(this, baseDir);
|
||||
this.lengthsReader = lengthsColumn.forPage(page).open(this, baseDir);
|
||||
this.hashesReader = hashesColumn.forPage(page).open(this, baseDir);
|
||||
this.qualitiesReader = qualitiesColumn.forPage(page).open(this, baseDir);
|
||||
this.pubYearReader = pubYearColumn.forPage(page).open(this, baseDir);
|
||||
super(page);
|
||||
|
||||
this.domainsReader = domainsColumn.open(this, baseDir);
|
||||
this.urlsReader = urlsColumn.open(this, baseDir);
|
||||
this.ordinalsReader = ordinalsColumn.open(this, baseDir);
|
||||
this.titlesReader = titlesColumn.open(this, baseDir);
|
||||
this.descriptionsReader = descriptionsColumn.open(this, baseDir);
|
||||
this.htmlFeaturesReader = htmlFeaturesColumn.open(this, baseDir);
|
||||
this.htmlStandardsReader = htmlStandardsColumn.open(this, baseDir);
|
||||
this.lengthsReader = lengthsColumn.open(this, baseDir);
|
||||
this.hashesReader = hashesColumn.open(this, baseDir);
|
||||
this.qualitiesReader = qualitiesColumn.open(this, baseDir);
|
||||
this.pubYearReader = pubYearColumn.open(this, baseDir);
|
||||
}
|
||||
|
||||
public boolean hasMore() throws IOException {
|
||||
@ -281,27 +284,29 @@ public record SlopDocumentRecord(
|
||||
private final GammaCodedSequenceArrayWriter spansWriter;
|
||||
|
||||
public Writer(Path baseDir, int page) throws IOException {
|
||||
domainsWriter = domainsColumn.forPage(page).create(this, baseDir);
|
||||
urlsWriter = urlsColumn.forPage(page).create(this, baseDir);
|
||||
ordinalsWriter = ordinalsColumn.forPage(page).create(this, baseDir);
|
||||
statesWriter = statesColumn.forPage(page).create(this, baseDir);
|
||||
stateReasonsWriter = stateReasonsColumn.forPage(page).create(this, baseDir);
|
||||
titlesWriter = titlesColumn.forPage(page).create(this, baseDir);
|
||||
descriptionsWriter = descriptionsColumn.forPage(page).create(this, baseDir);
|
||||
htmlFeaturesWriter = htmlFeaturesColumn.forPage(page).create(this, baseDir);
|
||||
htmlStandardsWriter = htmlStandardsColumn.forPage(page).create(this, baseDir);
|
||||
lengthsWriter = lengthsColumn.forPage(page).create(this, baseDir);
|
||||
hashesWriter = hashesColumn.forPage(page).create(this, baseDir);
|
||||
qualitiesWriter = qualitiesColumn.forPage(page).create(this, baseDir);
|
||||
domainMetadataWriter = domainMetadata.forPage(page).create(this, baseDir);
|
||||
pubYearWriter = pubYearColumn.forPage(page).create(this, baseDir);
|
||||
super(page);
|
||||
|
||||
keywordsWriter = keywordsColumn.forPage(page).create(this, baseDir);
|
||||
termMetaWriter = termMetaColumn.forPage(page).create(this, baseDir);
|
||||
termPositionsWriter = termPositionsColumn.forPage(page).create(this, baseDir);
|
||||
domainsWriter = domainsColumn.create(this, baseDir);
|
||||
urlsWriter = urlsColumn.create(this, baseDir);
|
||||
ordinalsWriter = ordinalsColumn.create(this, baseDir);
|
||||
statesWriter = statesColumn.create(this, baseDir);
|
||||
stateReasonsWriter = stateReasonsColumn.create(this, baseDir);
|
||||
titlesWriter = titlesColumn.create(this, baseDir);
|
||||
descriptionsWriter = descriptionsColumn.create(this, baseDir);
|
||||
htmlFeaturesWriter = htmlFeaturesColumn.create(this, baseDir);
|
||||
htmlStandardsWriter = htmlStandardsColumn.create(this, baseDir);
|
||||
lengthsWriter = lengthsColumn.create(this, baseDir);
|
||||
hashesWriter = hashesColumn.create(this, baseDir);
|
||||
qualitiesWriter = qualitiesColumn.create(this, baseDir);
|
||||
domainMetadataWriter = domainMetadata.create(this, baseDir);
|
||||
pubYearWriter = pubYearColumn.create(this, baseDir);
|
||||
|
||||
spansCodesWriter = spanCodesColumn.forPage(page).create(this, baseDir);
|
||||
spansWriter = spansColumn.forPage(page).create(this, baseDir);
|
||||
keywordsWriter = keywordsColumn.create(this, baseDir);
|
||||
termMetaWriter = termMetaColumn.create(this, baseDir);
|
||||
termPositionsWriter = termPositionsColumn.create(this, baseDir);
|
||||
|
||||
spansCodesWriter = spanCodesColumn.create(this, baseDir);
|
||||
spansWriter = spansColumn.create(this, baseDir);
|
||||
}
|
||||
|
||||
public void write(SlopDocumentRecord record) throws IOException {
|
||||
|
@ -1,9 +1,9 @@
|
||||
package nu.marginalia.model.processed;
|
||||
|
||||
import nu.marginalia.slop.ColumnTypes;
|
||||
import nu.marginalia.slop.column.string.StringColumnReader;
|
||||
import nu.marginalia.slop.column.string.StringColumnWriter;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
|
||||
@ -15,8 +15,8 @@ public record SlopDomainLinkRecord(
|
||||
String source,
|
||||
String dest)
|
||||
{
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> sourcesColumn = new ColumnDesc<>("source", ColumnType.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> destsColumn = new ColumnDesc<>("dest", ColumnType.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> sourcesColumn = new ColumnDesc<>("source", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> destsColumn = new ColumnDesc<>("dest", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
|
||||
public static Reader reader(Path baseDir, int page) throws IOException {
|
||||
return new Reader(baseDir, page);
|
||||
@ -31,8 +31,10 @@ public record SlopDomainLinkRecord(
|
||||
}
|
||||
|
||||
public Reader(Path baseDir, int page) throws IOException {
|
||||
sourcesReader = sourcesColumn.forPage(page).open(this, baseDir);
|
||||
destsReader = destsColumn.forPage(page).open(this, baseDir);
|
||||
super(page);
|
||||
|
||||
sourcesReader = sourcesColumn.open(this, baseDir);
|
||||
destsReader = destsColumn.open(this, baseDir);
|
||||
}
|
||||
|
||||
public boolean hasMore() throws IOException {
|
||||
@ -59,8 +61,10 @@ public record SlopDomainLinkRecord(
|
||||
private final StringColumnWriter destsWriter;
|
||||
|
||||
public Writer(Path baseDir, int page) throws IOException {
|
||||
sourcesWriter = sourcesColumn.forPage(page).create(this, baseDir);
|
||||
destsWriter = destsColumn.forPage(page).create(this, baseDir);
|
||||
super(page);
|
||||
|
||||
sourcesWriter = sourcesColumn.create(this, baseDir);
|
||||
destsWriter = destsColumn.create(this, baseDir);
|
||||
}
|
||||
|
||||
public void write(SlopDomainLinkRecord record) throws IOException {
|
||||
|
@ -1,5 +1,6 @@
|
||||
package nu.marginalia.model.processed;
|
||||
|
||||
import nu.marginalia.slop.ColumnTypes;
|
||||
import nu.marginalia.slop.column.array.ObjectArrayColumnReader;
|
||||
import nu.marginalia.slop.column.array.ObjectArrayColumnWriter;
|
||||
import nu.marginalia.slop.column.primitive.IntColumnReader;
|
||||
@ -8,7 +9,6 @@ import nu.marginalia.slop.column.string.EnumColumnReader;
|
||||
import nu.marginalia.slop.column.string.StringColumnReader;
|
||||
import nu.marginalia.slop.column.string.StringColumnWriter;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
|
||||
@ -33,16 +33,16 @@ public record SlopDomainRecord(
|
||||
String ip)
|
||||
{}
|
||||
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> domainsColumn = new ColumnDesc<>("domain", ColumnType.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<EnumColumnReader, StringColumnWriter> statesColumn = new ColumnDesc<>("state", ColumnType.ENUM_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> redirectDomainsColumn = new ColumnDesc<>("redirectDomain", ColumnType.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> ipColumn = new ColumnDesc<>("ip", ColumnType.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> domainsColumn = new ColumnDesc<>("domain", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<EnumColumnReader, StringColumnWriter> statesColumn = new ColumnDesc<>("state", ColumnTypes.ENUM_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> redirectDomainsColumn = new ColumnDesc<>("redirectDomain", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> ipColumn = new ColumnDesc<>("ip", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> knownUrlsColumn = new ColumnDesc<>("knownUrls", ColumnType.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> goodUrlsColumn = new ColumnDesc<>("goodUrls", ColumnType.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> visitedUrlsColumn = new ColumnDesc<>("visitedUrls", ColumnType.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> knownUrlsColumn = new ColumnDesc<>("knownUrls", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> goodUrlsColumn = new ColumnDesc<>("goodUrls", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> visitedUrlsColumn = new ColumnDesc<>("visitedUrls", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
|
||||
private static final ColumnDesc<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> rssFeedsColumn = new ColumnDesc<>("rssFeeds", ColumnType.TXTSTRING_ARRAY, StorageType.GZIP);
|
||||
private static final ColumnDesc<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> rssFeedsColumn = new ColumnDesc<>("rssFeeds", ColumnTypes.TXTSTRING_ARRAY, StorageType.GZIP);
|
||||
|
||||
|
||||
public static class DomainNameReader extends SlopTable {
|
||||
@ -53,7 +53,9 @@ public record SlopDomainRecord(
|
||||
}
|
||||
|
||||
public DomainNameReader(Path baseDir, int page) throws IOException {
|
||||
domainsReader = domainsColumn.forPage(page).open(this, baseDir);
|
||||
super(page);
|
||||
|
||||
domainsReader = domainsColumn.open(this, baseDir);
|
||||
}
|
||||
|
||||
public boolean hasMore() throws IOException {
|
||||
@ -74,8 +76,10 @@ public record SlopDomainRecord(
|
||||
}
|
||||
|
||||
public DomainWithIpReader(Path baseDir, int page) throws IOException {
|
||||
domainsReader = domainsColumn.forPage(page).open(this, baseDir);
|
||||
ipReader = ipColumn.forPage(page).open(this, baseDir);
|
||||
super(page);
|
||||
|
||||
domainsReader = domainsColumn.open(this, baseDir);
|
||||
ipReader = ipColumn.open(this, baseDir);
|
||||
}
|
||||
|
||||
public boolean hasMore() throws IOException {
|
||||
@ -108,16 +112,18 @@ public record SlopDomainRecord(
|
||||
}
|
||||
|
||||
public Reader(Path baseDir, int page) throws IOException {
|
||||
domainsReader = domainsColumn.forPage(page).open(this, baseDir);
|
||||
statesReader = statesColumn.forPage(page).open(this, baseDir);
|
||||
redirectReader = redirectDomainsColumn.forPage(page).open(this, baseDir);
|
||||
ipReader = ipColumn.forPage(page).open(this, baseDir);
|
||||
super(page);
|
||||
|
||||
knownUrlsReader = knownUrlsColumn.forPage(page).open(this, baseDir);
|
||||
goodUrlsReader = goodUrlsColumn.forPage(page).open(this, baseDir);
|
||||
visitedUrlsReader = visitedUrlsColumn.forPage(page).open(this, baseDir);
|
||||
domainsReader = domainsColumn.open(this, baseDir);
|
||||
statesReader = statesColumn.open(this, baseDir);
|
||||
redirectReader = redirectDomainsColumn.open(this, baseDir);
|
||||
ipReader = ipColumn.open(this, baseDir);
|
||||
|
||||
rssFeedsReader = rssFeedsColumn.forPage(page).open(this, baseDir);
|
||||
knownUrlsReader = knownUrlsColumn.open(this, baseDir);
|
||||
goodUrlsReader = goodUrlsColumn.open(this, baseDir);
|
||||
visitedUrlsReader = visitedUrlsColumn.open(this, baseDir);
|
||||
|
||||
rssFeedsReader = rssFeedsColumn.open(this, baseDir);
|
||||
}
|
||||
|
||||
public boolean hasMore() throws IOException {
|
||||
@ -157,16 +163,18 @@ public record SlopDomainRecord(
|
||||
private final ObjectArrayColumnWriter<String> rssFeedsWriter;
|
||||
|
||||
public Writer(Path baseDir, int page) throws IOException {
|
||||
domainsWriter = domainsColumn.forPage(page).create(this, baseDir);
|
||||
statesWriter = statesColumn.forPage(page).create(this, baseDir);
|
||||
redirectWriter = redirectDomainsColumn.forPage(page).create(this, baseDir);
|
||||
ipWriter = ipColumn.forPage(page).create(this, baseDir);
|
||||
super(page);
|
||||
|
||||
knownUrlsWriter = knownUrlsColumn.forPage(page).create(this, baseDir);
|
||||
goodUrlsWriter = goodUrlsColumn.forPage(page).create(this, baseDir);
|
||||
visitedUrlsWriter = visitedUrlsColumn.forPage(page).create(this, baseDir);
|
||||
domainsWriter = domainsColumn.create(this, baseDir);
|
||||
statesWriter = statesColumn.create(this, baseDir);
|
||||
redirectWriter = redirectDomainsColumn.create(this, baseDir);
|
||||
ipWriter = ipColumn.create(this, baseDir);
|
||||
|
||||
rssFeedsWriter = rssFeedsColumn.forPage(page).create(this, baseDir);
|
||||
knownUrlsWriter = knownUrlsColumn.create(this, baseDir);
|
||||
goodUrlsWriter = goodUrlsColumn.create(this, baseDir);
|
||||
visitedUrlsWriter = visitedUrlsColumn.create(this, baseDir);
|
||||
|
||||
rssFeedsWriter = rssFeedsColumn.create(this, baseDir);
|
||||
}
|
||||
|
||||
public void write(SlopDomainRecord record) throws IOException {
|
||||
|
@ -32,7 +32,6 @@ dependencies {
|
||||
implementation project(':code:libraries:message-queue')
|
||||
implementation project(':code:libraries:language-processing')
|
||||
implementation project(':code:libraries:coded-sequence')
|
||||
implementation project(':code:libraries:slop')
|
||||
implementation project(':third-party:commons-codec')
|
||||
implementation project(':third-party:parquet-floor')
|
||||
testImplementation project(':code:services-application:search-service')
|
||||
@ -45,6 +44,7 @@ dependencies {
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.slop
|
||||
implementation libs.guava
|
||||
implementation dependencies.create(libs.guice.get()) {
|
||||
exclude group: 'com.google.guava'
|
||||
|
@ -40,7 +40,6 @@ include 'code:libraries:array:cpp'
|
||||
include 'code:libraries:coded-sequence'
|
||||
include 'code:libraries:geo-ip'
|
||||
include 'code:libraries:btree'
|
||||
include 'code:libraries:slop'
|
||||
include 'code:libraries:easy-lsh'
|
||||
include 'code:libraries:guarded-regex'
|
||||
include 'code:libraries:random-write-funnel'
|
||||
@ -107,6 +106,8 @@ dependencyResolutionManagement {
|
||||
maven { url "https://repo1.maven.org/maven2/" }
|
||||
maven { url "https://www2.ph.ed.ac.uk/maven2/" }
|
||||
maven { url "https://jitpack.io/" }
|
||||
maven { url "https://artifacts.marginalia.nu/snapshots" }
|
||||
|
||||
exclusiveContent {
|
||||
forRepository {
|
||||
maven {
|
||||
@ -118,6 +119,18 @@ dependencyResolutionManagement {
|
||||
includeModule("com.github.Marcono1234", "gson-record-type-adapter-factory")
|
||||
}
|
||||
}
|
||||
|
||||
exclusiveContent {
|
||||
forRepository {
|
||||
maven {
|
||||
url = uri("https://artifacts.marginalia.nu/snapshots")
|
||||
}
|
||||
}
|
||||
filter {
|
||||
// Only use the Marginalia snapshot repository for the `slop` library
|
||||
includeModule("nu.marginalia", "slop")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
versionCatalogs {
|
||||
@ -213,6 +226,8 @@ dependencyResolutionManagement {
|
||||
library('jetty-util','org.eclipse.jetty','jetty-util').version('9.4.54.v20240208')
|
||||
library('jetty-servlet','org.eclipse.jetty','jetty-servlet').version('9.4.54.v20240208')
|
||||
|
||||
library('slop', 'nu.marginalia', 'slop').version('0.0.1-SNAPSHOT')
|
||||
|
||||
bundle('jetty', ['jetty-server', 'jetty-util', 'jetty-servlet'])
|
||||
|
||||
bundle('slf4j', ['slf4j.api', 'log4j.api', 'log4j.core', 'log4j.slf4j'])
|
||||
|
Loading…
Reference in New Issue
Block a user