mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 04:58:59 +00:00
(slop) Migrate to latest Slop version
This commit is contained in:
parent
2ad93ad41a
commit
75b0888032
@ -10,8 +10,8 @@ import nu.marginalia.index.journal.IndexJournal;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import nu.marginalia.model.idx.DocumentMetadata;
|
||||
import nu.marginalia.process.control.ProcessHeartbeat;
|
||||
import nu.marginalia.slop.column.primitive.LongColumnReader;
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
import nu.marginalia.slop.SlopTable;
|
||||
import nu.marginalia.slop.column.primitive.LongColumn;
|
||||
import org.roaringbitmap.longlong.LongConsumer;
|
||||
import org.roaringbitmap.longlong.Roaring64Bitmap;
|
||||
import org.slf4j.Logger;
|
||||
@ -153,7 +153,7 @@ public class ForwardIndexConverter {
|
||||
|
||||
for (var instance : journalReader.pages()) {
|
||||
try (var slopTable = new SlopTable(instance.page())) {
|
||||
LongColumnReader idReader = instance.openCombinedId(slopTable);
|
||||
LongColumn.Reader idReader = instance.openCombinedId(slopTable);
|
||||
|
||||
while (idReader.hasRemaining()) {
|
||||
rbm.add(idReader.get());
|
||||
|
@ -1,6 +1,6 @@
|
||||
package nu.marginalia.index.journal;
|
||||
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
import nu.marginalia.slop.SlopTable;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
@ -1,36 +1,28 @@
|
||||
package nu.marginalia.index.journal;
|
||||
|
||||
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayColumn;
|
||||
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayReader;
|
||||
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayWriter;
|
||||
import nu.marginalia.slop.ColumnTypes;
|
||||
import nu.marginalia.slop.column.array.ByteArrayColumnReader;
|
||||
import nu.marginalia.slop.column.array.ByteArrayColumnWriter;
|
||||
import nu.marginalia.slop.column.array.LongArrayColumnReader;
|
||||
import nu.marginalia.slop.column.array.LongArrayColumnWriter;
|
||||
import nu.marginalia.slop.column.primitive.IntColumnReader;
|
||||
import nu.marginalia.slop.column.primitive.IntColumnWriter;
|
||||
import nu.marginalia.slop.column.primitive.LongColumnReader;
|
||||
import nu.marginalia.slop.column.primitive.LongColumnWriter;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
import nu.marginalia.slop.SlopTable;
|
||||
import nu.marginalia.slop.column.array.ByteArrayColumn;
|
||||
import nu.marginalia.slop.column.array.LongArrayColumn;
|
||||
import nu.marginalia.slop.column.primitive.IntColumn;
|
||||
import nu.marginalia.slop.column.primitive.LongColumn;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
public record IndexJournalPage(Path baseDir, int page) {
|
||||
public static final ColumnDesc<IntColumnReader, IntColumnWriter> features = new ColumnDesc<>("features", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
public static final ColumnDesc<IntColumnReader, IntColumnWriter> size = new ColumnDesc<>("size", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
public static final ColumnDesc<LongColumnReader, LongColumnWriter> combinedId = new ColumnDesc<>("combinedId", ColumnTypes.LONG_LE, StorageType.PLAIN);
|
||||
public static final ColumnDesc<LongColumnReader, LongColumnWriter> documentMeta = new ColumnDesc<>("documentMeta", ColumnTypes.LONG_LE, StorageType.PLAIN);
|
||||
public static IntColumn features = new IntColumn("features", StorageType.PLAIN);
|
||||
public static IntColumn size = new IntColumn("size", StorageType.PLAIN);
|
||||
public static LongColumn combinedId = new LongColumn("combinedId", StorageType.PLAIN);
|
||||
public static LongColumn documentMeta = new LongColumn("documentMeta", StorageType.PLAIN);
|
||||
|
||||
public static final ColumnDesc<LongArrayColumnReader, LongArrayColumnWriter> termIds = new ColumnDesc<>("termIds", ColumnTypes.LONG_ARRAY_LE, StorageType.ZSTD);
|
||||
public static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> termMeta = new ColumnDesc<>("termMetadata", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD);
|
||||
public static final ColumnDesc<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> positions = new ColumnDesc<>("termPositions", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD);
|
||||
public static LongArrayColumn termIds = new LongArrayColumn("termIds", StorageType.ZSTD);
|
||||
public static ByteArrayColumn termMeta = new ByteArrayColumn("termMetadata", StorageType.ZSTD);
|
||||
public static GammaCodedSequenceArrayColumn positions = new GammaCodedSequenceArrayColumn("termPositions", StorageType.ZSTD);
|
||||
|
||||
public static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> spanCodes = new ColumnDesc<>("spanCodes", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD);
|
||||
public static final ColumnDesc<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> spans = new ColumnDesc<>("spans", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD);
|
||||
public static ByteArrayColumn spanCodes = new ByteArrayColumn("spanCodes", StorageType.ZSTD);
|
||||
public static GammaCodedSequenceArrayColumn spans = new GammaCodedSequenceArrayColumn("spans", StorageType.ZSTD);
|
||||
|
||||
public IndexJournalPage {
|
||||
if (!baseDir.toFile().isDirectory()) {
|
||||
@ -38,40 +30,40 @@ public record IndexJournalPage(Path baseDir, int page) {
|
||||
}
|
||||
}
|
||||
|
||||
public LongColumnReader openCombinedId(SlopTable table) throws IOException {
|
||||
public LongColumn.Reader openCombinedId(SlopTable table) throws IOException {
|
||||
return combinedId.open(table, baseDir);
|
||||
}
|
||||
|
||||
public LongColumnReader openDocumentMeta(SlopTable table) throws IOException {
|
||||
public LongColumn.Reader openDocumentMeta(SlopTable table) throws IOException {
|
||||
return documentMeta.open(table, baseDir);
|
||||
}
|
||||
|
||||
public IntColumnReader openFeatures(SlopTable table) throws IOException {
|
||||
public IntColumn.Reader openFeatures(SlopTable table) throws IOException {
|
||||
return features.open(table, baseDir);
|
||||
}
|
||||
|
||||
public IntColumnReader openSize(SlopTable table) throws IOException {
|
||||
public IntColumn.Reader openSize(SlopTable table) throws IOException {
|
||||
return size.open(table, baseDir);
|
||||
}
|
||||
|
||||
|
||||
public LongArrayColumnReader openTermIds(SlopTable table) throws IOException {
|
||||
public LongArrayColumn.Reader openTermIds(SlopTable table) throws IOException {
|
||||
return termIds.open(table, baseDir);
|
||||
}
|
||||
|
||||
public ByteArrayColumnReader openTermMetadata(SlopTable table) throws IOException {
|
||||
public ByteArrayColumn.Reader openTermMetadata(SlopTable table) throws IOException {
|
||||
return termMeta.open(table, baseDir);
|
||||
}
|
||||
|
||||
public GammaCodedSequenceArrayReader openTermPositions(SlopTable table) throws IOException {
|
||||
public GammaCodedSequenceArrayColumn.Reader openTermPositions(SlopTable table) throws IOException {
|
||||
return positions.open(table, baseDir);
|
||||
}
|
||||
|
||||
public GammaCodedSequenceArrayReader openSpans(SlopTable table) throws IOException {
|
||||
public GammaCodedSequenceArrayColumn.Reader openSpans(SlopTable table) throws IOException {
|
||||
return spans.open(table, baseDir);
|
||||
}
|
||||
|
||||
public ByteArrayColumnReader openSpanCodes(SlopTable table) throws IOException {
|
||||
public ByteArrayColumn.Reader openSpanCodes(SlopTable table) throws IOException {
|
||||
return spanCodes.open(table, baseDir);
|
||||
}
|
||||
}
|
||||
|
@ -3,12 +3,12 @@ package nu.marginalia.index.journal;
|
||||
import lombok.SneakyThrows;
|
||||
import nu.marginalia.hash.MurmurHash3_128;
|
||||
import nu.marginalia.model.processed.SlopDocumentRecord;
|
||||
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayWriter;
|
||||
import nu.marginalia.slop.column.array.ByteArrayColumnWriter;
|
||||
import nu.marginalia.slop.column.array.LongArrayColumnWriter;
|
||||
import nu.marginalia.slop.column.primitive.IntColumnWriter;
|
||||
import nu.marginalia.slop.column.primitive.LongColumnWriter;
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayColumn;
|
||||
import nu.marginalia.slop.SlopTable;
|
||||
import nu.marginalia.slop.column.array.ByteArrayColumn;
|
||||
import nu.marginalia.slop.column.array.LongArrayColumn;
|
||||
import nu.marginalia.slop.column.primitive.IntColumn;
|
||||
import nu.marginalia.slop.column.primitive.LongColumn;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
@ -17,17 +17,17 @@ import java.util.List;
|
||||
|
||||
public class IndexJournalSlopWriter extends SlopTable {
|
||||
|
||||
private final IntColumnWriter featuresWriter;
|
||||
private final IntColumnWriter sizeWriter;
|
||||
private final LongColumnWriter combinedIdWriter;
|
||||
private final LongColumnWriter documentMetaWriter;
|
||||
private final IntColumn.Writer featuresWriter;
|
||||
private final IntColumn.Writer sizeWriter;
|
||||
private final LongColumn.Writer combinedIdWriter;
|
||||
private final LongColumn.Writer documentMetaWriter;
|
||||
|
||||
private final LongArrayColumnWriter termIdsWriter;
|
||||
private final ByteArrayColumnWriter termMetadataWriter;
|
||||
private final GammaCodedSequenceArrayWriter termPositionsWriter;
|
||||
private final LongArrayColumn.Writer termIdsWriter;
|
||||
private final ByteArrayColumn.Writer termMetadataWriter;
|
||||
private final GammaCodedSequenceArrayColumn.Writer termPositionsWriter;
|
||||
|
||||
private final GammaCodedSequenceArrayWriter spansWriter;
|
||||
private final ByteArrayColumnWriter spanCodesWriter;
|
||||
private final GammaCodedSequenceArrayColumn.Writer spansWriter;
|
||||
private final ByteArrayColumn.Writer spanCodesWriter;
|
||||
|
||||
private static final MurmurHash3_128 hash = new MurmurHash3_128();
|
||||
|
||||
|
@ -7,7 +7,7 @@ import nu.marginalia.index.construction.DocIdRewriter;
|
||||
import nu.marginalia.index.construction.PositionsFileConstructor;
|
||||
import nu.marginalia.index.journal.IndexJournalPage;
|
||||
import nu.marginalia.rwf.RandomFileAssembler;
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
import nu.marginalia.slop.SlopTable;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -6,7 +6,7 @@ import it.unimi.dsi.fastutil.longs.LongIterator;
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.LongArrayFactory;
|
||||
import nu.marginalia.index.journal.IndexJournalPage;
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
import nu.marginalia.slop.SlopTable;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
|
@ -6,7 +6,7 @@ import nu.marginalia.array.LongArrayFactory;
|
||||
import nu.marginalia.index.construction.DocIdRewriter;
|
||||
import nu.marginalia.index.journal.IndexJournalPage;
|
||||
import nu.marginalia.rwf.RandomFileAssembler;
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
import nu.marginalia.slop.SlopTable;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -6,7 +6,7 @@ import it.unimi.dsi.fastutil.longs.LongIterator;
|
||||
import nu.marginalia.array.LongArray;
|
||||
import nu.marginalia.array.LongArrayFactory;
|
||||
import nu.marginalia.index.journal.IndexJournalPage;
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
import nu.marginalia.slop.SlopTable;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
|
@ -1,13 +1,12 @@
|
||||
package nu.marginalia.sequence.slop;
|
||||
|
||||
import nu.marginalia.sequence.GammaCodedSequence;
|
||||
import nu.marginalia.slop.ColumnTypes;
|
||||
import nu.marginalia.slop.column.AbstractColumn;
|
||||
import nu.marginalia.slop.column.AbstractObjectColumn;
|
||||
import nu.marginalia.slop.column.ObjectColumnReader;
|
||||
import nu.marginalia.slop.column.ObjectColumnWriter;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumn;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnFunction;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
|
||||
import java.io.IOException;
|
||||
@ -18,45 +17,54 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/** Slop column extension for storing GammaCodedSequence objects. */
|
||||
public class GammaCodedSequenceArrayColumn {
|
||||
public class GammaCodedSequenceArrayColumn extends AbstractObjectColumn<List<GammaCodedSequence>, GammaCodedSequenceArrayColumn.Reader, GammaCodedSequenceArrayColumn.Writer> {
|
||||
|
||||
public static ColumnType<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> TYPE = ColumnTypes.register("s8[]+gcs[]", ByteOrder.nativeOrder(), GammaCodedSequenceArrayColumn::open, GammaCodedSequenceArrayColumn::create);
|
||||
private final VarintColumn groupsColumn;
|
||||
private final GammaCodedSequenceColumn dataColumn;
|
||||
|
||||
public static GammaCodedSequenceArrayReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(columnDesc,
|
||||
GammaCodedSequenceColumn.open(path, columnDesc),
|
||||
VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.GROUP_LENGTH,
|
||||
ColumnTypes.VARINT_LE,
|
||||
StorageType.PLAIN)
|
||||
)
|
||||
public GammaCodedSequenceArrayColumn(String name) {
|
||||
this(name, StorageType.PLAIN);
|
||||
}
|
||||
|
||||
public GammaCodedSequenceArrayColumn(String name, StorageType storageType) {
|
||||
super(name,
|
||||
"gcs[]",
|
||||
ByteOrder.nativeOrder(),
|
||||
ColumnFunction.DATA,
|
||||
storageType);
|
||||
|
||||
groupsColumn = new VarintColumn(name, ColumnFunction.GROUP_LENGTH, storageType);
|
||||
dataColumn = new GammaCodedSequenceColumn(name);
|
||||
}
|
||||
|
||||
public Writer createUnregistered(Path path, int page) throws IOException {
|
||||
return new Writer(
|
||||
dataColumn.createUnregistered(path, page),
|
||||
groupsColumn.createUnregistered(path, page)
|
||||
);
|
||||
}
|
||||
|
||||
public static GammaCodedSequenceArrayWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Writer(columnDesc,
|
||||
GammaCodedSequenceColumn.create(path, columnDesc),
|
||||
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.GROUP_LENGTH,
|
||||
ColumnTypes.VARINT_LE,
|
||||
StorageType.PLAIN)
|
||||
)
|
||||
public Reader openUnregistered(Path path, int page) throws IOException {
|
||||
return new Reader(
|
||||
dataColumn.openUnregistered(path, page),
|
||||
groupsColumn.openUnregistered(path, page)
|
||||
);
|
||||
}
|
||||
|
||||
private static class Writer implements GammaCodedSequenceArrayWriter {
|
||||
private final VarintColumnWriter groupsWriter;
|
||||
private final GammaCodedSequenceWriter dataWriter;
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
|
||||
public Writer(ColumnDesc<?, ?> columnDesc, GammaCodedSequenceWriter dataWriter, VarintColumnWriter groupsWriter)
|
||||
public class Writer implements ObjectColumnWriter<List<GammaCodedSequence>> {
|
||||
private final VarintColumn.Writer groupsWriter;
|
||||
private final GammaCodedSequenceColumn.Writer dataWriter;
|
||||
|
||||
Writer(GammaCodedSequenceColumn.Writer dataWriter, VarintColumn.Writer groupsWriter)
|
||||
{
|
||||
this.groupsWriter = groupsWriter;
|
||||
this.dataWriter = dataWriter;
|
||||
this.columnDesc = columnDesc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
public AbstractColumn<?, ?> columnDesc() {
|
||||
return GammaCodedSequenceArrayColumn.this;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -77,20 +85,18 @@ public class GammaCodedSequenceArrayColumn {
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader implements GammaCodedSequenceArrayReader {
|
||||
private final GammaCodedSequenceReader dataReader;
|
||||
private final VarintColumnReader groupsReader;
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
public class Reader implements ObjectColumnReader<List<GammaCodedSequence>> {
|
||||
private final GammaCodedSequenceColumn.Reader dataReader;
|
||||
private final VarintColumn.Reader groupsReader;
|
||||
|
||||
public Reader(ColumnDesc<?, ?> columnDesc, GammaCodedSequenceReader dataReader, VarintColumnReader groupsReader) throws IOException {
|
||||
public Reader(GammaCodedSequenceColumn.Reader dataReader, VarintColumn.Reader groupsReader) {
|
||||
this.dataReader = dataReader;
|
||||
this.groupsReader = groupsReader;
|
||||
this.columnDesc = columnDesc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
public AbstractColumn<?, ?> columnDesc() {
|
||||
return GammaCodedSequenceArrayColumn.this;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -123,7 +129,6 @@ public class GammaCodedSequenceArrayColumn {
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ByteBuffer> getData(ByteBuffer workArea) throws IOException {
|
||||
int count = groupsReader.get();
|
||||
var ret = new ArrayList<ByteBuffer>(count);
|
||||
|
@ -1,32 +0,0 @@
|
||||
package nu.marginalia.sequence.slop;
|
||||
|
||||
import nu.marginalia.sequence.GammaCodedSequence;
|
||||
import nu.marginalia.slop.column.ColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.List;
|
||||
|
||||
public interface GammaCodedSequenceArrayReader extends AutoCloseable, ColumnReader {
|
||||
/** Read the next gamma-coded sequence from the column. Unlike most other
|
||||
* readers, this method requires an intermediate buffer to use for reading
|
||||
* the sequence. As this buffer typically needs to be fairly large to accommodate
|
||||
* the largest possible sequence, it is not practical to allocate a new buffer
|
||||
* for each call to this method. Instead, the caller should allocate a buffer
|
||||
* once and reuse it for each call to this method.
|
||||
*
|
||||
* @return The next gamma-coded sequence.
|
||||
*/
|
||||
List<GammaCodedSequence> get() throws IOException;
|
||||
|
||||
/** Read just the data portion of the next gamma-coded sequence from the column.
|
||||
* This method is useful when the caller is only interested in the data portion
|
||||
* of the sequence and does not want to decode the values.
|
||||
*
|
||||
* @param workArea A buffer to use for reading the data.
|
||||
* @return slices of the work buffer containing the data.
|
||||
*/
|
||||
List<ByteBuffer> getData(ByteBuffer workArea) throws IOException;
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,12 +0,0 @@
|
||||
package nu.marginalia.sequence.slop;
|
||||
|
||||
import nu.marginalia.sequence.GammaCodedSequence;
|
||||
import nu.marginalia.slop.column.ColumnWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
public interface GammaCodedSequenceArrayWriter extends AutoCloseable, ColumnWriter {
|
||||
void put(List<GammaCodedSequence> sequence) throws IOException;
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,13 +1,12 @@
|
||||
package nu.marginalia.sequence.slop;
|
||||
|
||||
import nu.marginalia.sequence.GammaCodedSequence;
|
||||
import nu.marginalia.slop.ColumnTypes;
|
||||
import nu.marginalia.slop.column.AbstractColumn;
|
||||
import nu.marginalia.slop.column.AbstractObjectColumn;
|
||||
import nu.marginalia.slop.column.ObjectColumnReader;
|
||||
import nu.marginalia.slop.column.ObjectColumnWriter;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumn;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.ColumnFunction;
|
||||
import nu.marginalia.slop.desc.ColumnType;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
import nu.marginalia.slop.storage.Storage;
|
||||
import nu.marginalia.slop.storage.StorageReader;
|
||||
@ -19,48 +18,53 @@ import java.nio.ByteOrder;
|
||||
import java.nio.file.Path;
|
||||
|
||||
/** Slop column extension for storing GammaCodedSequence objects. */
|
||||
public class GammaCodedSequenceColumn {
|
||||
public class GammaCodedSequenceColumn extends AbstractObjectColumn<GammaCodedSequence, GammaCodedSequenceColumn.Reader, GammaCodedSequenceColumn.Writer> {
|
||||
|
||||
public static ColumnType<GammaCodedSequenceReader, GammaCodedSequenceWriter> TYPE = ColumnTypes.register("s8[]+gcs", ByteOrder.nativeOrder(), GammaCodedSequenceColumn::open, GammaCodedSequenceColumn::create);
|
||||
private final VarintColumn indexColumn;
|
||||
|
||||
public static GammaCodedSequenceReader open(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Reader(columnDesc,
|
||||
Storage.reader(path, columnDesc, false), // note we must never pass aligned=true here, as the data is not guaranteed alignment
|
||||
VarintColumn.open(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN,
|
||||
ColumnTypes.VARINT_LE,
|
||||
StorageType.PLAIN)
|
||||
)
|
||||
public GammaCodedSequenceColumn(String name) {
|
||||
this(name, StorageType.PLAIN);
|
||||
}
|
||||
|
||||
public GammaCodedSequenceColumn(String name, StorageType storageType) {
|
||||
super(name,
|
||||
"gamma",
|
||||
ByteOrder.nativeOrder(),
|
||||
ColumnFunction.DATA,
|
||||
storageType);
|
||||
|
||||
indexColumn = new VarintColumn(name, ColumnFunction.DATA_LEN, StorageType.PLAIN);
|
||||
}
|
||||
|
||||
public Writer createUnregistered(Path path, int page) throws IOException {
|
||||
return new Writer(
|
||||
Storage.writer(path, this, page),
|
||||
indexColumn.createUnregistered(path, page)
|
||||
);
|
||||
}
|
||||
|
||||
public static GammaCodedSequenceWriter create(Path path, ColumnDesc columnDesc) throws IOException {
|
||||
return new Writer(columnDesc,
|
||||
Storage.writer(path, columnDesc),
|
||||
VarintColumn.create(path, columnDesc.createSupplementaryColumn(ColumnFunction.DATA_LEN,
|
||||
ColumnTypes.VARINT_LE,
|
||||
StorageType.PLAIN)
|
||||
)
|
||||
public Reader openUnregistered(Path path, int page) throws IOException {
|
||||
return new Reader(
|
||||
Storage.reader(path, this, page, false),
|
||||
indexColumn.openUnregistered(path, page)
|
||||
);
|
||||
}
|
||||
|
||||
private static class Writer implements GammaCodedSequenceWriter {
|
||||
private final VarintColumnWriter indexWriter;
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
public class Writer implements ObjectColumnWriter<GammaCodedSequence> {
|
||||
private final VarintColumn.Writer indexWriter;
|
||||
private final StorageWriter storage;
|
||||
|
||||
public Writer(ColumnDesc<?, ?> columnDesc,
|
||||
StorageWriter storage,
|
||||
VarintColumnWriter indexWriter)
|
||||
public Writer(StorageWriter storage,
|
||||
VarintColumn.Writer indexWriter)
|
||||
{
|
||||
this.columnDesc = columnDesc;
|
||||
this.storage = storage;
|
||||
|
||||
this.indexWriter = indexWriter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
public AbstractColumn<?, ?> columnDesc() {
|
||||
return GammaCodedSequenceColumn.this;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -82,20 +86,18 @@ public class GammaCodedSequenceColumn {
|
||||
}
|
||||
}
|
||||
|
||||
private static class Reader implements GammaCodedSequenceReader {
|
||||
private final VarintColumnReader indexReader;
|
||||
private final ColumnDesc<?, ?> columnDesc;
|
||||
public class Reader implements ObjectColumnReader<GammaCodedSequence> {
|
||||
private final VarintColumn.Reader indexReader;
|
||||
private final StorageReader storage;
|
||||
|
||||
public Reader(ColumnDesc<?, ?> columnDesc, StorageReader reader, VarintColumnReader indexReader) throws IOException {
|
||||
this.columnDesc = columnDesc;
|
||||
Reader(StorageReader reader, VarintColumn.Reader indexReader) throws IOException {
|
||||
this.storage = reader;
|
||||
this.indexReader = indexReader;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ColumnDesc<?, ?> columnDesc() {
|
||||
return columnDesc;
|
||||
public AbstractColumn<?, ?> columnDesc() {
|
||||
return GammaCodedSequenceColumn.this;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -126,7 +128,6 @@ public class GammaCodedSequenceColumn {
|
||||
return new GammaCodedSequence(dest);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void getData(ByteBuffer workArea) throws IOException {
|
||||
int size = indexReader.get();
|
||||
|
||||
|
@ -1,33 +0,0 @@
|
||||
package nu.marginalia.sequence.slop;
|
||||
|
||||
import nu.marginalia.sequence.GammaCodedSequence;
|
||||
import nu.marginalia.slop.column.ColumnReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
public interface GammaCodedSequenceReader extends AutoCloseable, ColumnReader {
|
||||
/** Read the next gamma-coded sequence from the column. Unlike most other
|
||||
* readers, this method requires an intermediate buffer to use for reading
|
||||
* the sequence. As this buffer typically needs to be fairly large to accommodate
|
||||
* the largest possible sequence, it is not practical to allocate a new buffer
|
||||
* for each call to this method. Instead, the caller should allocate a buffer
|
||||
* once and reuse it for each call to this method.
|
||||
*
|
||||
* @return The next gamma-coded sequence.
|
||||
*/
|
||||
GammaCodedSequence get() throws IOException;
|
||||
|
||||
/** Read just the data portion of the next gamma-coded sequence from the column.
|
||||
* This method is useful when the caller is only interested in the data portion
|
||||
* of the sequence and does not want to decode the values.
|
||||
*
|
||||
* The position of the buffer is advanced to the end of the data that has just been read,
|
||||
* and the limit remains the same.
|
||||
*
|
||||
* @param workArea A buffer to use for reading the data.
|
||||
*/
|
||||
void getData(ByteBuffer workArea) throws IOException;
|
||||
|
||||
void close() throws IOException;
|
||||
}
|
@ -1,11 +0,0 @@
|
||||
package nu.marginalia.sequence.slop;
|
||||
|
||||
import nu.marginalia.sequence.GammaCodedSequence;
|
||||
import nu.marginalia.slop.column.ColumnWriter;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public interface GammaCodedSequenceWriter extends AutoCloseable, ColumnWriter {
|
||||
void put(GammaCodedSequence sequence) throws IOException;
|
||||
void close() throws IOException;
|
||||
}
|
@ -3,21 +3,16 @@ package nu.marginalia.model.processed;
|
||||
import lombok.Builder;
|
||||
import nu.marginalia.sequence.GammaCodedSequence;
|
||||
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayColumn;
|
||||
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayReader;
|
||||
import nu.marginalia.sequence.slop.GammaCodedSequenceArrayWriter;
|
||||
import nu.marginalia.slop.ColumnTypes;
|
||||
import nu.marginalia.slop.column.array.ByteArrayColumnReader;
|
||||
import nu.marginalia.slop.column.array.ByteArrayColumnWriter;
|
||||
import nu.marginalia.slop.column.array.ObjectArrayColumnReader;
|
||||
import nu.marginalia.slop.column.array.ObjectArrayColumnWriter;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnReader;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumnWriter;
|
||||
import nu.marginalia.slop.column.primitive.*;
|
||||
import nu.marginalia.slop.column.string.EnumColumnReader;
|
||||
import nu.marginalia.slop.column.string.StringColumnReader;
|
||||
import nu.marginalia.slop.column.string.StringColumnWriter;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
import nu.marginalia.slop.SlopTable;
|
||||
import nu.marginalia.slop.column.array.ByteArrayColumn;
|
||||
import nu.marginalia.slop.column.array.ObjectArrayColumn;
|
||||
import nu.marginalia.slop.column.dynamic.VarintColumn;
|
||||
import nu.marginalia.slop.column.primitive.FloatColumn;
|
||||
import nu.marginalia.slop.column.primitive.IntColumn;
|
||||
import nu.marginalia.slop.column.primitive.LongColumn;
|
||||
import nu.marginalia.slop.column.string.EnumColumn;
|
||||
import nu.marginalia.slop.column.string.StringColumn;
|
||||
import nu.marginalia.slop.column.string.TxtStringColumn;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
@ -111,45 +106,47 @@ public record SlopDocumentRecord(
|
||||
}
|
||||
|
||||
// Basic information
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> domainsColumn = new ColumnDesc<>("domain", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> urlsColumn = new ColumnDesc<>("url", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<VarintColumnReader, VarintColumnWriter> ordinalsColumn = new ColumnDesc<>("ordinal", ColumnTypes.VARINT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<EnumColumnReader, StringColumnWriter> statesColumn = new ColumnDesc<>("state", ColumnTypes.ENUM_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> stateReasonsColumn = new ColumnDesc<>("stateReason", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
private static final TxtStringColumn domainsColumn = new TxtStringColumn("domain", StorageType.GZIP);
|
||||
private static final TxtStringColumn urlsColumn = new TxtStringColumn("url", StorageType.GZIP);
|
||||
private static final VarintColumn ordinalsColumn = new VarintColumn("ordinal", StorageType.PLAIN);
|
||||
private static final EnumColumn statesColumn = new EnumColumn("state", StorageType.PLAIN);
|
||||
private static final StringColumn stateReasonsColumn = new StringColumn("stateReason", StorageType.GZIP);
|
||||
|
||||
// Document metadata
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> titlesColumn = new ColumnDesc<>("title", ColumnTypes.STRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> descriptionsColumn = new ColumnDesc<>("description", ColumnTypes.STRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<EnumColumnReader, StringColumnWriter> htmlStandardsColumn = new ColumnDesc<>("htmlStandard", ColumnTypes.ENUM_LE, StorageType.GZIP);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> htmlFeaturesColumn = new ColumnDesc<>("htmlFeatures", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> lengthsColumn = new ColumnDesc<>("length", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> pubYearColumn = new ColumnDesc<>("pubYear", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<LongColumnReader, LongColumnWriter> hashesColumn = new ColumnDesc<>("hash", ColumnTypes.LONG_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<FloatColumnReader, FloatColumnWriter> qualitiesColumn = new ColumnDesc<>("quality", ColumnTypes.FLOAT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<LongColumnReader, LongColumnWriter> domainMetadata = new ColumnDesc<>("domainMetadata", ColumnTypes.LONG_LE, StorageType.PLAIN);
|
||||
private static final StringColumn titlesColumn = new StringColumn("title", StorageType.GZIP);
|
||||
private static final StringColumn descriptionsColumn = new StringColumn("description", StorageType.GZIP);
|
||||
private static final EnumColumn htmlStandardsColumn = new EnumColumn("htmlStandard", StorageType.PLAIN);
|
||||
private static final IntColumn htmlFeaturesColumn = new IntColumn("htmlFeatures", StorageType.PLAIN);
|
||||
private static final IntColumn lengthsColumn = new IntColumn("length", StorageType.PLAIN);
|
||||
private static final IntColumn pubYearColumn = new IntColumn("pubYear", StorageType.PLAIN);
|
||||
private static final LongColumn hashesColumn = new LongColumn("hash", StorageType.PLAIN);
|
||||
private static final FloatColumn qualitiesColumn = new FloatColumn("quality", StorageType.PLAIN);
|
||||
private static final LongColumn domainMetadata = new LongColumn("domainMetadata", StorageType.PLAIN);
|
||||
|
||||
// Keyword-level columns, these are enumerated by the counts column
|
||||
private static final ColumnDesc<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> keywordsColumn = new ColumnDesc<>("keywords", ColumnTypes.STRING_ARRAY, StorageType.ZSTD);
|
||||
private static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> termMetaColumn = new ColumnDesc<>("termMetadata", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD);
|
||||
private static final ColumnDesc<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> termPositionsColumn = new ColumnDesc<>("termPositions", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD);
|
||||
|
||||
private static final ObjectArrayColumn<String> keywordsColumn = new StringColumn("keywords", StorageType.ZSTD).asArray();
|
||||
private static final ByteArrayColumn termMetaColumn = new ByteArrayColumn("termMetadata", StorageType.ZSTD);
|
||||
private static final GammaCodedSequenceArrayColumn termPositionsColumn = new GammaCodedSequenceArrayColumn("termPositions", StorageType.ZSTD);
|
||||
|
||||
// Spans columns
|
||||
private static final ColumnDesc<ByteArrayColumnReader, ByteArrayColumnWriter> spanCodesColumn = new ColumnDesc<>("spanCodes", ColumnTypes.BYTE_ARRAY, StorageType.ZSTD);
|
||||
private static final ColumnDesc<GammaCodedSequenceArrayReader, GammaCodedSequenceArrayWriter> spansColumn = new ColumnDesc<>("spans", GammaCodedSequenceArrayColumn.TYPE, StorageType.ZSTD);
|
||||
|
||||
private static final ByteArrayColumn spanCodesColumn = new ByteArrayColumn("spanCodes", StorageType.ZSTD);
|
||||
private static final GammaCodedSequenceArrayColumn spansColumn = new GammaCodedSequenceArrayColumn("spans", StorageType.ZSTD);
|
||||
|
||||
public static class KeywordsProjectionReader extends SlopTable {
|
||||
private final StringColumnReader domainsReader;
|
||||
private final VarintColumnReader ordinalsReader;
|
||||
private final IntColumnReader htmlFeaturesReader;
|
||||
private final LongColumnReader domainMetadataReader;
|
||||
private final IntColumnReader lengthsReader;
|
||||
private final TxtStringColumn.Reader domainsReader;
|
||||
private final VarintColumn.Reader ordinalsReader;
|
||||
private final IntColumn.Reader htmlFeaturesReader;
|
||||
private final LongColumn.Reader domainMetadataReader;
|
||||
private final IntColumn.Reader lengthsReader;
|
||||
|
||||
private final ObjectArrayColumnReader<String> keywordsReader;
|
||||
private final ByteArrayColumnReader termMetaReader;
|
||||
private final GammaCodedSequenceArrayReader termPositionsReader;
|
||||
private final ObjectArrayColumn<String>.Reader keywordsReader;
|
||||
private final ByteArrayColumn.Reader termMetaReader;
|
||||
private final GammaCodedSequenceArrayColumn.Reader termPositionsReader;
|
||||
|
||||
private final ByteArrayColumnReader spanCodesReader;
|
||||
private final GammaCodedSequenceArrayReader spansReader;
|
||||
private final ByteArrayColumn.Reader spanCodesReader;
|
||||
private final GammaCodedSequenceArrayColumn.Reader spansReader;
|
||||
|
||||
public KeywordsProjectionReader(SlopPageRef<SlopDocumentRecord> pageRef) throws IOException {
|
||||
this(pageRef.baseDir(), pageRef.page());
|
||||
@ -206,18 +203,18 @@ public record SlopDocumentRecord(
|
||||
}
|
||||
|
||||
public static class MetadataReader extends SlopTable {
|
||||
private final StringColumnReader domainsReader;
|
||||
private final StringColumnReader urlsReader;
|
||||
private final VarintColumnReader ordinalsReader;
|
||||
private final StringColumnReader titlesReader;
|
||||
private final StringColumnReader descriptionsReader;
|
||||
private final TxtStringColumn.Reader domainsReader;
|
||||
private final TxtStringColumn.Reader urlsReader;
|
||||
private final VarintColumn.Reader ordinalsReader;
|
||||
private final StringColumn.Reader titlesReader;
|
||||
private final StringColumn.Reader descriptionsReader;
|
||||
|
||||
private final IntColumnReader htmlFeaturesReader;
|
||||
private final StringColumnReader htmlStandardsReader;
|
||||
private final IntColumnReader lengthsReader;
|
||||
private final LongColumnReader hashesReader;
|
||||
private final FloatColumnReader qualitiesReader;
|
||||
private final IntColumnReader pubYearReader;
|
||||
private final IntColumn.Reader htmlFeaturesReader;
|
||||
private final EnumColumn.Reader htmlStandardsReader;
|
||||
private final IntColumn.Reader lengthsReader;
|
||||
private final LongColumn.Reader hashesReader;
|
||||
private final FloatColumn.Reader qualitiesReader;
|
||||
private final IntColumn.Reader pubYearReader;
|
||||
|
||||
public MetadataReader(SlopPageRef<SlopDocumentRecord> pageRef) throws IOException{
|
||||
this(pageRef.baseDir(), pageRef.page());
|
||||
@ -263,25 +260,25 @@ public record SlopDocumentRecord(
|
||||
}
|
||||
|
||||
public static class Writer extends SlopTable {
|
||||
private final StringColumnWriter domainsWriter;
|
||||
private final StringColumnWriter urlsWriter;
|
||||
private final VarintColumnWriter ordinalsWriter;
|
||||
private final StringColumnWriter statesWriter;
|
||||
private final StringColumnWriter stateReasonsWriter;
|
||||
private final StringColumnWriter titlesWriter;
|
||||
private final StringColumnWriter descriptionsWriter;
|
||||
private final IntColumnWriter htmlFeaturesWriter;
|
||||
private final StringColumnWriter htmlStandardsWriter;
|
||||
private final IntColumnWriter lengthsWriter;
|
||||
private final LongColumnWriter hashesWriter;
|
||||
private final FloatColumnWriter qualitiesWriter;
|
||||
private final LongColumnWriter domainMetadataWriter;
|
||||
private final IntColumnWriter pubYearWriter;
|
||||
private final ObjectArrayColumnWriter<String> keywordsWriter;
|
||||
private final ByteArrayColumnWriter termMetaWriter;
|
||||
private final GammaCodedSequenceArrayWriter termPositionsWriter;
|
||||
private final ByteArrayColumnWriter spansCodesWriter;
|
||||
private final GammaCodedSequenceArrayWriter spansWriter;
|
||||
private final TxtStringColumn.Writer domainsWriter;
|
||||
private final TxtStringColumn.Writer urlsWriter;
|
||||
private final VarintColumn.Writer ordinalsWriter;
|
||||
private final EnumColumn.Writer statesWriter;
|
||||
private final StringColumn.Writer stateReasonsWriter;
|
||||
private final StringColumn.Writer titlesWriter;
|
||||
private final StringColumn.Writer descriptionsWriter;
|
||||
private final IntColumn.Writer htmlFeaturesWriter;
|
||||
private final EnumColumn.Writer htmlStandardsWriter;
|
||||
private final IntColumn.Writer lengthsWriter;
|
||||
private final LongColumn.Writer hashesWriter;
|
||||
private final FloatColumn.Writer qualitiesWriter;
|
||||
private final LongColumn.Writer domainMetadataWriter;
|
||||
private final IntColumn.Writer pubYearWriter;
|
||||
private final ObjectArrayColumn<String>.Writer keywordsWriter;
|
||||
private final ByteArrayColumn.Writer termMetaWriter;
|
||||
private final GammaCodedSequenceArrayColumn.Writer termPositionsWriter;
|
||||
private final ByteArrayColumn.Writer spansCodesWriter;
|
||||
private final GammaCodedSequenceArrayColumn.Writer spansWriter;
|
||||
|
||||
public Writer(Path baseDir, int page) throws IOException {
|
||||
super(page);
|
||||
|
@ -1,10 +1,7 @@
|
||||
package nu.marginalia.model.processed;
|
||||
|
||||
import nu.marginalia.slop.ColumnTypes;
|
||||
import nu.marginalia.slop.column.string.StringColumnReader;
|
||||
import nu.marginalia.slop.column.string.StringColumnWriter;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
import nu.marginalia.slop.SlopTable;
|
||||
import nu.marginalia.slop.column.string.TxtStringColumn;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
|
||||
import java.io.IOException;
|
||||
@ -15,16 +12,16 @@ public record SlopDomainLinkRecord(
|
||||
String source,
|
||||
String dest)
|
||||
{
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> sourcesColumn = new ColumnDesc<>("source", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> destsColumn = new ColumnDesc<>("dest", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
private static final TxtStringColumn sourcesColumn = new TxtStringColumn("source", StorageType.GZIP);
|
||||
private static final TxtStringColumn destsColumn = new TxtStringColumn("dest", StorageType.GZIP);
|
||||
|
||||
public static Reader reader(Path baseDir, int page) throws IOException {
|
||||
return new Reader(baseDir, page);
|
||||
}
|
||||
|
||||
public static class Reader extends SlopTable {
|
||||
private final StringColumnReader sourcesReader;
|
||||
private final StringColumnReader destsReader;
|
||||
private final TxtStringColumn.Reader sourcesReader;
|
||||
private final TxtStringColumn.Reader destsReader;
|
||||
|
||||
public Reader(SlopPageRef<SlopDomainLinkRecord> page) throws IOException {
|
||||
this(page.baseDir(), page.page());
|
||||
@ -57,8 +54,8 @@ public record SlopDomainLinkRecord(
|
||||
}
|
||||
|
||||
public static class Writer extends SlopTable {
|
||||
private final StringColumnWriter sourcesWriter;
|
||||
private final StringColumnWriter destsWriter;
|
||||
private final TxtStringColumn.Writer sourcesWriter;
|
||||
private final TxtStringColumn.Writer destsWriter;
|
||||
|
||||
public Writer(Path baseDir, int page) throws IOException {
|
||||
super(page);
|
||||
|
@ -1,15 +1,10 @@
|
||||
package nu.marginalia.model.processed;
|
||||
|
||||
import nu.marginalia.slop.ColumnTypes;
|
||||
import nu.marginalia.slop.column.array.ObjectArrayColumnReader;
|
||||
import nu.marginalia.slop.column.array.ObjectArrayColumnWriter;
|
||||
import nu.marginalia.slop.column.primitive.IntColumnReader;
|
||||
import nu.marginalia.slop.column.primitive.IntColumnWriter;
|
||||
import nu.marginalia.slop.column.string.EnumColumnReader;
|
||||
import nu.marginalia.slop.column.string.StringColumnReader;
|
||||
import nu.marginalia.slop.column.string.StringColumnWriter;
|
||||
import nu.marginalia.slop.desc.ColumnDesc;
|
||||
import nu.marginalia.slop.desc.SlopTable;
|
||||
import nu.marginalia.slop.SlopTable;
|
||||
import nu.marginalia.slop.column.array.ObjectArrayColumn;
|
||||
import nu.marginalia.slop.column.primitive.IntColumn;
|
||||
import nu.marginalia.slop.column.string.EnumColumn;
|
||||
import nu.marginalia.slop.column.string.TxtStringColumn;
|
||||
import nu.marginalia.slop.desc.StorageType;
|
||||
|
||||
import java.io.IOException;
|
||||
@ -33,20 +28,20 @@ public record SlopDomainRecord(
|
||||
String ip)
|
||||
{}
|
||||
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> domainsColumn = new ColumnDesc<>("domain", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<EnumColumnReader, StringColumnWriter> statesColumn = new ColumnDesc<>("state", ColumnTypes.ENUM_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> redirectDomainsColumn = new ColumnDesc<>("redirectDomain", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
private static final ColumnDesc<StringColumnReader, StringColumnWriter> ipColumn = new ColumnDesc<>("ip", ColumnTypes.TXTSTRING, StorageType.GZIP);
|
||||
private static final TxtStringColumn domainsColumn = new TxtStringColumn("domain", StorageType.GZIP);
|
||||
private static final EnumColumn statesColumn = new EnumColumn("state", StorageType.PLAIN);
|
||||
private static final TxtStringColumn redirectDomainsColumn = new TxtStringColumn("redirectDomain", StorageType.GZIP);
|
||||
private static final TxtStringColumn ipColumn = new TxtStringColumn("ip", StorageType.GZIP);
|
||||
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> knownUrlsColumn = new ColumnDesc<>("knownUrls", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> goodUrlsColumn = new ColumnDesc<>("goodUrls", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
private static final ColumnDesc<IntColumnReader, IntColumnWriter> visitedUrlsColumn = new ColumnDesc<>("visitedUrls", ColumnTypes.INT_LE, StorageType.PLAIN);
|
||||
private static final IntColumn knownUrlsColumn = new IntColumn("knownUrls", StorageType.PLAIN);
|
||||
private static final IntColumn goodUrlsColumn = new IntColumn("goodUrls", StorageType.PLAIN);
|
||||
private static final IntColumn visitedUrlsColumn = new IntColumn("visitedUrls", StorageType.PLAIN);
|
||||
|
||||
private static final ColumnDesc<ObjectArrayColumnReader<String>, ObjectArrayColumnWriter<String>> rssFeedsColumn = new ColumnDesc<>("rssFeeds", ColumnTypes.TXTSTRING_ARRAY, StorageType.GZIP);
|
||||
private static final ObjectArrayColumn<String> rssFeedsColumn = new TxtStringColumn("rssFeeds", StorageType.GZIP).asArray();
|
||||
|
||||
|
||||
public static class DomainNameReader extends SlopTable {
|
||||
private final StringColumnReader domainsReader;
|
||||
private final TxtStringColumn.Reader domainsReader;
|
||||
|
||||
public DomainNameReader(SlopPageRef<SlopDomainRecord> page) throws IOException {
|
||||
this(page.baseDir(), page.page());
|
||||
@ -68,8 +63,8 @@ public record SlopDomainRecord(
|
||||
}
|
||||
|
||||
public static class DomainWithIpReader extends SlopTable {
|
||||
private final StringColumnReader domainsReader;
|
||||
private final StringColumnReader ipReader;
|
||||
private final TxtStringColumn.Reader domainsReader;
|
||||
private final TxtStringColumn.Reader ipReader;
|
||||
|
||||
public DomainWithIpReader(SlopPageRef<SlopDomainRecord> page) throws IOException {
|
||||
this(page.baseDir(), page.page());
|
||||
@ -96,16 +91,16 @@ public record SlopDomainRecord(
|
||||
}
|
||||
|
||||
public static class Reader extends SlopTable {
|
||||
private final StringColumnReader domainsReader;
|
||||
private final StringColumnReader statesReader;
|
||||
private final StringColumnReader redirectReader;
|
||||
private final StringColumnReader ipReader;
|
||||
private final TxtStringColumn.Reader domainsReader;
|
||||
private final EnumColumn.Reader statesReader;
|
||||
private final TxtStringColumn.Reader redirectReader;
|
||||
private final TxtStringColumn.Reader ipReader;
|
||||
|
||||
private final IntColumnReader knownUrlsReader;
|
||||
private final IntColumnReader goodUrlsReader;
|
||||
private final IntColumnReader visitedUrlsReader;
|
||||
private final IntColumn.Reader knownUrlsReader;
|
||||
private final IntColumn.Reader goodUrlsReader;
|
||||
private final IntColumn.Reader visitedUrlsReader;
|
||||
|
||||
private final ObjectArrayColumnReader<String> rssFeedsReader;
|
||||
private final ObjectArrayColumn<String>.Reader rssFeedsReader;
|
||||
|
||||
public Reader(SlopPageRef<SlopDomainRecord> page) throws IOException {
|
||||
this(page.baseDir(), page.page());
|
||||
@ -151,16 +146,16 @@ public record SlopDomainRecord(
|
||||
}
|
||||
|
||||
public static class Writer extends SlopTable {
|
||||
private final StringColumnWriter domainsWriter;
|
||||
private final StringColumnWriter statesWriter;
|
||||
private final StringColumnWriter redirectWriter;
|
||||
private final StringColumnWriter ipWriter;
|
||||
private final TxtStringColumn.Writer domainsWriter;
|
||||
private final EnumColumn.Writer statesWriter;
|
||||
private final TxtStringColumn.Writer redirectWriter;
|
||||
private final TxtStringColumn.Writer ipWriter;
|
||||
|
||||
private final IntColumnWriter knownUrlsWriter;
|
||||
private final IntColumnWriter goodUrlsWriter;
|
||||
private final IntColumnWriter visitedUrlsWriter;
|
||||
private final IntColumn.Writer knownUrlsWriter;
|
||||
private final IntColumn.Writer goodUrlsWriter;
|
||||
private final IntColumn.Writer visitedUrlsWriter;
|
||||
|
||||
private final ObjectArrayColumnWriter<String> rssFeedsWriter;
|
||||
private final ObjectArrayColumn<String>.Writer rssFeedsWriter;
|
||||
|
||||
public Writer(Path baseDir, int page) throws IOException {
|
||||
super(page);
|
||||
|
@ -226,7 +226,7 @@ dependencyResolutionManagement {
|
||||
library('jetty-util','org.eclipse.jetty','jetty-util').version('9.4.54.v20240208')
|
||||
library('jetty-servlet','org.eclipse.jetty','jetty-servlet').version('9.4.54.v20240208')
|
||||
|
||||
library('slop', 'nu.marginalia', 'slop').version('0.0.1-SNAPSHOT')
|
||||
library('slop', 'nu.marginalia', 'slop').version('0.0.3-SNAPSHOT')
|
||||
|
||||
bundle('jetty', ['jetty-server', 'jetty-util', 'jetty-servlet'])
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user