(*) Upgrade slop library -> 0.0.5

This commit is contained in:
Viktor Lofgren 2024-08-18 11:05:27 +02:00
parent bca40de107
commit b0a874a842
11 changed files with 112 additions and 112 deletions

View File

@ -86,7 +86,7 @@ public class ForwardIndexConverter {
ByteBuffer workArea = ByteBuffer.allocate(65536); ByteBuffer workArea = ByteBuffer.allocate(65536);
for (var instance : journal.pages()) { for (var instance : journal.pages()) {
try (var slopTable = new SlopTable(instance.page())) try (var slopTable = new SlopTable(instance.baseDir(), instance.page()))
{ {
var docIdReader = instance.openCombinedId(slopTable); var docIdReader = instance.openCombinedId(slopTable);
var metaReader = instance.openDocumentMeta(slopTable); var metaReader = instance.openDocumentMeta(slopTable);
@ -152,7 +152,7 @@ public class ForwardIndexConverter {
Roaring64Bitmap rbm = new Roaring64Bitmap(); Roaring64Bitmap rbm = new Roaring64Bitmap();
for (var instance : journalReader.pages()) { for (var instance : journalReader.pages()) {
try (var slopTable = new SlopTable(instance.page())) { try (var slopTable = new SlopTable(instance.baseDir(), instance.page())) {
LongColumn.Reader idReader = instance.openCombinedId(slopTable); LongColumn.Reader idReader = instance.openCombinedId(slopTable);
while (idReader.hasRemaining()) { while (idReader.hasRemaining()) {

View File

@ -31,39 +31,39 @@ public record IndexJournalPage(Path baseDir, int page) {
} }
public LongColumn.Reader openCombinedId(SlopTable table) throws IOException { public LongColumn.Reader openCombinedId(SlopTable table) throws IOException {
return combinedId.open(table, baseDir); return combinedId.open(table);
} }
public LongColumn.Reader openDocumentMeta(SlopTable table) throws IOException { public LongColumn.Reader openDocumentMeta(SlopTable table) throws IOException {
return documentMeta.open(table, baseDir); return documentMeta.open(table);
} }
public IntColumn.Reader openFeatures(SlopTable table) throws IOException { public IntColumn.Reader openFeatures(SlopTable table) throws IOException {
return features.open(table, baseDir); return features.open(table);
} }
public IntColumn.Reader openSize(SlopTable table) throws IOException { public IntColumn.Reader openSize(SlopTable table) throws IOException {
return size.open(table, baseDir); return size.open(table);
} }
public LongArrayColumn.Reader openTermIds(SlopTable table) throws IOException { public LongArrayColumn.Reader openTermIds(SlopTable table) throws IOException {
return termIds.open(table, baseDir); return termIds.open(table);
} }
public ByteArrayColumn.Reader openTermMetadata(SlopTable table) throws IOException { public ByteArrayColumn.Reader openTermMetadata(SlopTable table) throws IOException {
return termMeta.open(table, baseDir); return termMeta.open(table);
} }
public GammaCodedSequenceArrayColumn.Reader openTermPositions(SlopTable table) throws IOException { public GammaCodedSequenceArrayColumn.Reader openTermPositions(SlopTable table) throws IOException {
return positions.open(table, baseDir); return positions.open(table);
} }
public GammaCodedSequenceArrayColumn.Reader openSpans(SlopTable table) throws IOException { public GammaCodedSequenceArrayColumn.Reader openSpans(SlopTable table) throws IOException {
return spans.open(table, baseDir); return spans.open(table);
} }
public ByteArrayColumn.Reader openSpanCodes(SlopTable table) throws IOException { public ByteArrayColumn.Reader openSpanCodes(SlopTable table) throws IOException {
return spanCodes.open(table, baseDir); return spanCodes.open(table);
} }
} }

View File

@ -33,24 +33,24 @@ public class IndexJournalSlopWriter extends SlopTable {
public IndexJournalSlopWriter(Path dir, int page) throws IOException { public IndexJournalSlopWriter(Path dir, int page) throws IOException {
super(page); super(dir, page);
if (!Files.exists(dir)) { if (!Files.exists(dir)) {
Files.createDirectory(dir); Files.createDirectory(dir);
} }
featuresWriter = IndexJournalPage.features.create(this, dir); featuresWriter = IndexJournalPage.features.create(this);
sizeWriter = IndexJournalPage.size.create(this, dir); sizeWriter = IndexJournalPage.size.create(this);
combinedIdWriter = IndexJournalPage.combinedId.create(this, dir); combinedIdWriter = IndexJournalPage.combinedId.create(this);
documentMetaWriter = IndexJournalPage.documentMeta.create(this, dir); documentMetaWriter = IndexJournalPage.documentMeta.create(this);
termIdsWriter = IndexJournalPage.termIds.create(this, dir); termIdsWriter = IndexJournalPage.termIds.create(this);
termMetadataWriter = IndexJournalPage.termMeta.create(this, dir); termMetadataWriter = IndexJournalPage.termMeta.create(this);
termPositionsWriter = IndexJournalPage.positions.create(this, dir); termPositionsWriter = IndexJournalPage.positions.create(this);
spanCodesWriter = IndexJournalPage.spanCodes.create(this, dir); spanCodesWriter = IndexJournalPage.spanCodes.create(this);
spansWriter = IndexJournalPage.spans.create(this, dir); spansWriter = IndexJournalPage.spans.create(this);
} }
@SneakyThrows @SneakyThrows

View File

@ -68,7 +68,7 @@ public class FullPreindexDocuments {
private static void createUnsortedDocsFile(Path docsFile, private static void createUnsortedDocsFile(Path docsFile,
Path workDir, Path workDir,
IndexJournalPage journalInstance, IndexJournalPage instance,
FullPreindexWordSegments segments, FullPreindexWordSegments segments,
DocIdRewriter docIdRewriter) throws IOException { DocIdRewriter docIdRewriter) throws IOException {
@ -77,12 +77,12 @@ public class FullPreindexDocuments {
final ByteBuffer tempBuffer = ByteBuffer.allocate(1024*1024*100); final ByteBuffer tempBuffer = ByteBuffer.allocate(1024*1024*100);
try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs); try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs);
var slopTable = new SlopTable(journalInstance.page())) var slopTable = new SlopTable(instance.baseDir(), instance.page()))
{ {
var docIds = journalInstance.openCombinedId(slopTable); var docIds = instance.openCombinedId(slopTable);
var termIds = journalInstance.openTermIds(slopTable); var termIds = instance.openTermIds(slopTable);
var termMeta = journalInstance.openTermMetadata(slopTable); var termMeta = instance.openTermMetadata(slopTable);
var positions = journalInstance.openTermPositions(slopTable); var positions = instance.openTermPositions(slopTable);
var offsetMap = segments.asMap(RECORD_SIZE_LONGS); var offsetMap = segments.asMap(RECORD_SIZE_LONGS);
offsetMap.defaultReturnValue(0); offsetMap.defaultReturnValue(0);

View File

@ -52,7 +52,7 @@ public class FullPreindexWordSegments {
return ret; return ret;
} }
public static FullPreindexWordSegments construct(IndexJournalPage journalInstance, public static FullPreindexWordSegments construct(IndexJournalPage instance,
Path wordIdsFile, Path wordIdsFile,
Path countsFile) Path countsFile)
throws IOException throws IOException
@ -60,8 +60,8 @@ public class FullPreindexWordSegments {
Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f); Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
countsMap.defaultReturnValue(0); countsMap.defaultReturnValue(0);
try (var slopTable = new SlopTable(journalInstance.page())) { try (var slopTable = new SlopTable(instance.baseDir(), instance.page())) {
var termIds = journalInstance.openTermIds(slopTable); var termIds = instance.openTermIds(slopTable);
while (termIds.hasRemaining()) { while (termIds.hasRemaining()) {
long[] tids = termIds.get(); long[] tids = termIds.get();
for (long termId : tids) { for (long termId : tids) {

View File

@ -58,18 +58,18 @@ public class PrioPreindexDocuments {
private static void createUnsortedDocsFile(Path docsFile, private static void createUnsortedDocsFile(Path docsFile,
Path workDir, Path workDir,
IndexJournalPage journalInstance, IndexJournalPage instance,
PrioPreindexWordSegments segments, PrioPreindexWordSegments segments,
DocIdRewriter docIdRewriter) throws IOException { DocIdRewriter docIdRewriter) throws IOException {
long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize(); long fileSizeLongs = RECORD_SIZE_LONGS * segments.totalSize();
try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs); try (var assembly = RandomFileAssembler.create(workDir, fileSizeLongs);
var slopTable = new SlopTable(journalInstance.page())) var slopTable = new SlopTable(instance.baseDir(), instance.page()))
{ {
var docIds = journalInstance.openCombinedId(slopTable); var docIds = instance.openCombinedId(slopTable);
var termIds = journalInstance.openTermIds(slopTable); var termIds = instance.openTermIds(slopTable);
var termMeta = journalInstance.openTermMetadata(slopTable); var termMeta = instance.openTermMetadata(slopTable);
var offsetMap = segments.asMap(RECORD_SIZE_LONGS); var offsetMap = segments.asMap(RECORD_SIZE_LONGS);
offsetMap.defaultReturnValue(0); offsetMap.defaultReturnValue(0);

View File

@ -52,7 +52,7 @@ public class PrioPreindexWordSegments {
return ret; return ret;
} }
public static PrioPreindexWordSegments construct(IndexJournalPage journalInstance, public static PrioPreindexWordSegments construct(IndexJournalPage instance,
Path wordIdsFile, Path wordIdsFile,
Path countsFile) Path countsFile)
throws IOException throws IOException
@ -60,9 +60,9 @@ public class PrioPreindexWordSegments {
Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f); Long2IntOpenHashMap countsMap = new Long2IntOpenHashMap(100_000, 0.75f);
countsMap.defaultReturnValue(0); countsMap.defaultReturnValue(0);
try (var slopTable = new SlopTable(journalInstance.page())) { try (var slopTable = new SlopTable(instance.baseDir(), instance.page())) {
var termIds = journalInstance.openTermIds(slopTable); var termIds = instance.openTermIds(slopTable);
var termMetas = journalInstance.openTermMetadata(slopTable); var termMetas = instance.openTermMetadata(slopTable);
while (termIds.hasRemaining()) { while (termIds.hasRemaining()) {
long[] data = termIds.get(); long[] data = termIds.get();

View File

@ -153,19 +153,19 @@ public record SlopDocumentRecord(
} }
public KeywordsProjectionReader(Path baseDir, int page) throws IOException { public KeywordsProjectionReader(Path baseDir, int page) throws IOException {
super(page); super(baseDir, page);
domainsReader = domainsColumn.open(this, baseDir); domainsReader = domainsColumn.open(this);
ordinalsReader = ordinalsColumn.open(this, baseDir); ordinalsReader = ordinalsColumn.open(this);
htmlFeaturesReader = htmlFeaturesColumn.open(this, baseDir); htmlFeaturesReader = htmlFeaturesColumn.open(this);
domainMetadataReader = domainMetadata.open(this, baseDir); domainMetadataReader = domainMetadata.open(this);
lengthsReader = lengthsColumn.open(this, baseDir); lengthsReader = lengthsColumn.open(this);
keywordsReader = keywordsColumn.open(this, baseDir); keywordsReader = keywordsColumn.open(this);
termMetaReader = termMetaColumn.open(this, baseDir); termMetaReader = termMetaColumn.open(this);
termPositionsReader = termPositionsColumn.open(this, baseDir); termPositionsReader = termPositionsColumn.open(this);
spanCodesReader = spanCodesColumn.open(this, baseDir); spanCodesReader = spanCodesColumn.open(this);
spansReader = spansColumn.open(this, baseDir); spansReader = spansColumn.open(this);
} }
public boolean hasMore() throws IOException { public boolean hasMore() throws IOException {
@ -221,19 +221,19 @@ public record SlopDocumentRecord(
} }
public MetadataReader(Path baseDir, int page) throws IOException { public MetadataReader(Path baseDir, int page) throws IOException {
super(page); super(baseDir, page);
this.domainsReader = domainsColumn.open(this, baseDir); this.domainsReader = domainsColumn.open(this);
this.urlsReader = urlsColumn.open(this, baseDir); this.urlsReader = urlsColumn.open(this);
this.ordinalsReader = ordinalsColumn.open(this, baseDir); this.ordinalsReader = ordinalsColumn.open(this);
this.titlesReader = titlesColumn.open(this, baseDir); this.titlesReader = titlesColumn.open(this);
this.descriptionsReader = descriptionsColumn.open(this, baseDir); this.descriptionsReader = descriptionsColumn.open(this);
this.htmlFeaturesReader = htmlFeaturesColumn.open(this, baseDir); this.htmlFeaturesReader = htmlFeaturesColumn.open(this);
this.htmlStandardsReader = htmlStandardsColumn.open(this, baseDir); this.htmlStandardsReader = htmlStandardsColumn.open(this);
this.lengthsReader = lengthsColumn.open(this, baseDir); this.lengthsReader = lengthsColumn.open(this);
this.hashesReader = hashesColumn.open(this, baseDir); this.hashesReader = hashesColumn.open(this);
this.qualitiesReader = qualitiesColumn.open(this, baseDir); this.qualitiesReader = qualitiesColumn.open(this);
this.pubYearReader = pubYearColumn.open(this, baseDir); this.pubYearReader = pubYearColumn.open(this);
} }
public boolean hasMore() throws IOException { public boolean hasMore() throws IOException {
@ -281,29 +281,29 @@ public record SlopDocumentRecord(
private final GammaCodedSequenceArrayColumn.Writer spansWriter; private final GammaCodedSequenceArrayColumn.Writer spansWriter;
public Writer(Path baseDir, int page) throws IOException { public Writer(Path baseDir, int page) throws IOException {
super(page); super(baseDir, page);
domainsWriter = domainsColumn.create(this, baseDir); domainsWriter = domainsColumn.create(this);
urlsWriter = urlsColumn.create(this, baseDir); urlsWriter = urlsColumn.create(this);
ordinalsWriter = ordinalsColumn.create(this, baseDir); ordinalsWriter = ordinalsColumn.create(this);
statesWriter = statesColumn.create(this, baseDir); statesWriter = statesColumn.create(this);
stateReasonsWriter = stateReasonsColumn.create(this, baseDir); stateReasonsWriter = stateReasonsColumn.create(this);
titlesWriter = titlesColumn.create(this, baseDir); titlesWriter = titlesColumn.create(this);
descriptionsWriter = descriptionsColumn.create(this, baseDir); descriptionsWriter = descriptionsColumn.create(this);
htmlFeaturesWriter = htmlFeaturesColumn.create(this, baseDir); htmlFeaturesWriter = htmlFeaturesColumn.create(this);
htmlStandardsWriter = htmlStandardsColumn.create(this, baseDir); htmlStandardsWriter = htmlStandardsColumn.create(this);
lengthsWriter = lengthsColumn.create(this, baseDir); lengthsWriter = lengthsColumn.create(this);
hashesWriter = hashesColumn.create(this, baseDir); hashesWriter = hashesColumn.create(this);
qualitiesWriter = qualitiesColumn.create(this, baseDir); qualitiesWriter = qualitiesColumn.create(this);
domainMetadataWriter = domainMetadata.create(this, baseDir); domainMetadataWriter = domainMetadata.create(this);
pubYearWriter = pubYearColumn.create(this, baseDir); pubYearWriter = pubYearColumn.create(this);
keywordsWriter = keywordsColumn.create(this, baseDir); keywordsWriter = keywordsColumn.create(this);
termMetaWriter = termMetaColumn.create(this, baseDir); termMetaWriter = termMetaColumn.create(this);
termPositionsWriter = termPositionsColumn.create(this, baseDir); termPositionsWriter = termPositionsColumn.create(this);
spansCodesWriter = spanCodesColumn.create(this, baseDir); spansCodesWriter = spanCodesColumn.create(this);
spansWriter = spansColumn.create(this, baseDir); spansWriter = spansColumn.create(this);
} }
public void write(SlopDocumentRecord record) throws IOException { public void write(SlopDocumentRecord record) throws IOException {

View File

@ -28,10 +28,10 @@ public record SlopDomainLinkRecord(
} }
public Reader(Path baseDir, int page) throws IOException { public Reader(Path baseDir, int page) throws IOException {
super(page); super(baseDir, page);
sourcesReader = sourcesColumn.open(this, baseDir); sourcesReader = sourcesColumn.open(this);
destsReader = destsColumn.open(this, baseDir); destsReader = destsColumn.open(this);
} }
public boolean hasMore() throws IOException { public boolean hasMore() throws IOException {
@ -58,10 +58,10 @@ public record SlopDomainLinkRecord(
private final TxtStringColumn.Writer destsWriter; private final TxtStringColumn.Writer destsWriter;
public Writer(Path baseDir, int page) throws IOException { public Writer(Path baseDir, int page) throws IOException {
super(page); super(baseDir, page);
sourcesWriter = sourcesColumn.create(this, baseDir); sourcesWriter = sourcesColumn.create(this);
destsWriter = destsColumn.create(this, baseDir); destsWriter = destsColumn.create(this);
} }
public void write(SlopDomainLinkRecord record) throws IOException { public void write(SlopDomainLinkRecord record) throws IOException {

View File

@ -48,9 +48,9 @@ public record SlopDomainRecord(
} }
public DomainNameReader(Path baseDir, int page) throws IOException { public DomainNameReader(Path baseDir, int page) throws IOException {
super(page); super(baseDir, page);
domainsReader = domainsColumn.open(this, baseDir); domainsReader = domainsColumn.open(this);
} }
public boolean hasMore() throws IOException { public boolean hasMore() throws IOException {
@ -71,10 +71,10 @@ public record SlopDomainRecord(
} }
public DomainWithIpReader(Path baseDir, int page) throws IOException { public DomainWithIpReader(Path baseDir, int page) throws IOException {
super(page); super(baseDir, page);
domainsReader = domainsColumn.open(this, baseDir); domainsReader = domainsColumn.open(this);
ipReader = ipColumn.open(this, baseDir); ipReader = ipColumn.open(this);
} }
public boolean hasMore() throws IOException { public boolean hasMore() throws IOException {
@ -107,18 +107,18 @@ public record SlopDomainRecord(
} }
public Reader(Path baseDir, int page) throws IOException { public Reader(Path baseDir, int page) throws IOException {
super(page); super(baseDir, page);
domainsReader = domainsColumn.open(this, baseDir); domainsReader = domainsColumn.open(this);
statesReader = statesColumn.open(this, baseDir); statesReader = statesColumn.open(this);
redirectReader = redirectDomainsColumn.open(this, baseDir); redirectReader = redirectDomainsColumn.open(this);
ipReader = ipColumn.open(this, baseDir); ipReader = ipColumn.open(this);
knownUrlsReader = knownUrlsColumn.open(this, baseDir); knownUrlsReader = knownUrlsColumn.open(this);
goodUrlsReader = goodUrlsColumn.open(this, baseDir); goodUrlsReader = goodUrlsColumn.open(this);
visitedUrlsReader = visitedUrlsColumn.open(this, baseDir); visitedUrlsReader = visitedUrlsColumn.open(this);
rssFeedsReader = rssFeedsColumn.open(this, baseDir); rssFeedsReader = rssFeedsColumn.open(this);
} }
public boolean hasMore() throws IOException { public boolean hasMore() throws IOException {
@ -158,18 +158,18 @@ public record SlopDomainRecord(
private final ObjectArrayColumn<String>.Writer rssFeedsWriter; private final ObjectArrayColumn<String>.Writer rssFeedsWriter;
public Writer(Path baseDir, int page) throws IOException { public Writer(Path baseDir, int page) throws IOException {
super(page); super(baseDir, page);
domainsWriter = domainsColumn.create(this, baseDir); domainsWriter = domainsColumn.create(this);
statesWriter = statesColumn.create(this, baseDir); statesWriter = statesColumn.create(this);
redirectWriter = redirectDomainsColumn.create(this, baseDir); redirectWriter = redirectDomainsColumn.create(this);
ipWriter = ipColumn.create(this, baseDir); ipWriter = ipColumn.create(this);
knownUrlsWriter = knownUrlsColumn.create(this, baseDir); knownUrlsWriter = knownUrlsColumn.create(this);
goodUrlsWriter = goodUrlsColumn.create(this, baseDir); goodUrlsWriter = goodUrlsColumn.create(this);
visitedUrlsWriter = visitedUrlsColumn.create(this, baseDir); visitedUrlsWriter = visitedUrlsColumn.create(this);
rssFeedsWriter = rssFeedsColumn.create(this, baseDir); rssFeedsWriter = rssFeedsColumn.create(this);
} }
public void write(SlopDomainRecord record) throws IOException { public void write(SlopDomainRecord record) throws IOException {

View File

@ -226,7 +226,7 @@ dependencyResolutionManagement {
library('jetty-util','org.eclipse.jetty','jetty-util').version('9.4.54.v20240208') library('jetty-util','org.eclipse.jetty','jetty-util').version('9.4.54.v20240208')
library('jetty-servlet','org.eclipse.jetty','jetty-servlet').version('9.4.54.v20240208') library('jetty-servlet','org.eclipse.jetty','jetty-servlet').version('9.4.54.v20240208')
library('slop', 'nu.marginalia', 'slop').version('0.0.4-SNAPSHOT') library('slop', 'nu.marginalia', 'slop').version('0.0.5-SNAPSHOT')
bundle('jetty', ['jetty-server', 'jetty-util', 'jetty-servlet']) bundle('jetty', ['jetty-server', 'jetty-util', 'jetty-servlet'])