mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 13:19:02 +00:00
(reverse-index) Fix over-allocation of the count array in merging
This commit is contained in:
parent
3101b74580
commit
00c4686ef0
@ -56,10 +56,11 @@ public class ReverseIndexConstructor {
|
|||||||
List<ReversePreindex> merged = new ArrayList<>();
|
List<ReversePreindex> merged = new ArrayList<>();
|
||||||
|
|
||||||
while (toMerge.size() != 1) {
|
while (toMerge.size() != 1) {
|
||||||
for (int i = 0; i < toMerge.size(); i+=2) {
|
for (int i = 0; i + 1< toMerge.size(); i+=2) {
|
||||||
var left = toMerge.get(i);
|
var left = toMerge.get(i);
|
||||||
var right = toMerge.get(i+1);
|
var right = toMerge.get(i+1);
|
||||||
|
|
||||||
|
logger.info("Merge {}, {}", i, i+1);
|
||||||
merged.add(ReversePreindex.merge(workDir, left, right));
|
merged.add(ReversePreindex.merge(workDir, left, right));
|
||||||
|
|
||||||
left.delete();
|
left.delete();
|
||||||
|
@ -38,7 +38,6 @@ public class ReversePreindex {
|
|||||||
IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(ReverseIndexParameters.docsBTreeContext, 2);
|
IndexSizeEstimator sizeEstimator = new IndexSizeEstimator(ReverseIndexParameters.docsBTreeContext, 2);
|
||||||
offsets.fold(0, 0, offsets.size(), sizeEstimator);
|
offsets.fold(0, 0, offsets.size(), sizeEstimator);
|
||||||
|
|
||||||
System.out.println("size estimate = " + sizeEstimator.size);
|
|
||||||
// Write the docs file
|
// Write the docs file
|
||||||
LongArray finalDocs = LongArray.mmapForWriting(outputFileDocs, sizeEstimator.size);
|
LongArray finalDocs = LongArray.mmapForWriting(outputFileDocs, sizeEstimator.size);
|
||||||
try (var intermediateDocChannel = documents.createDocumentsFileChannel()) {
|
try (var intermediateDocChannel = documents.createDocumentsFileChannel()) {
|
||||||
@ -48,6 +47,8 @@ public class ReversePreindex {
|
|||||||
|
|
||||||
LongArray wordIds = segments.wordIds;
|
LongArray wordIds = segments.wordIds;
|
||||||
|
|
||||||
|
assert offsets.size() == wordIds.size() : "Offsets and word-ids of different size";
|
||||||
|
|
||||||
// Estimate the size of the words index data
|
// Estimate the size of the words index data
|
||||||
long wordsSize = ReverseIndexParameters.wordsBTreeContext.calculateSize((int) offsets.size());
|
long wordsSize = ReverseIndexParameters.wordsBTreeContext.calculateSize((int) offsets.size());
|
||||||
|
|
||||||
@ -108,7 +109,7 @@ public class ReversePreindex {
|
|||||||
0, left.wordIds.size(),
|
0, left.wordIds.size(),
|
||||||
0, right.wordIds.size());
|
0, right.wordIds.size());
|
||||||
|
|
||||||
LongArray counts = LongArray.mmapForWriting(segmentCountsFile, 8*segmentsSize);
|
LongArray counts = LongArray.mmapForWriting(segmentCountsFile, segmentsSize);
|
||||||
|
|
||||||
return new ReversePreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile);
|
return new ReversePreindexWordSegments(wordIdsFile, counts, segmentWordsFile, segmentCountsFile);
|
||||||
}
|
}
|
||||||
@ -177,6 +178,8 @@ public class ReversePreindex {
|
|||||||
|
|
||||||
mergedDocuments = shrinkMergedDocuments(mergedDocuments, docsFile, 2 * mergingSegment.totalSize());
|
mergedDocuments = shrinkMergedDocuments(mergedDocuments, docsFile, 2 * mergingSegment.totalSize());
|
||||||
|
|
||||||
|
mergingSegment.force();
|
||||||
|
|
||||||
return new ReversePreindex(
|
return new ReversePreindex(
|
||||||
mergingSegment,
|
mergingSegment,
|
||||||
new ReversePreindexDocuments(mergedDocuments, docsFile)
|
new ReversePreindexDocuments(mergedDocuments, docsFile)
|
||||||
|
@ -28,6 +28,8 @@ public class ReversePreindexWordSegments {
|
|||||||
Path wordsFile,
|
Path wordsFile,
|
||||||
Path countsFile)
|
Path countsFile)
|
||||||
{
|
{
|
||||||
|
assert wordIds.size() == counts.size();
|
||||||
|
|
||||||
this.wordIds = wordIds;
|
this.wordIds = wordIds;
|
||||||
this.counts = counts;
|
this.counts = counts;
|
||||||
this.wordsFile = wordsFile;
|
this.wordsFile = wordsFile;
|
||||||
@ -97,6 +99,11 @@ public class ReversePreindexWordSegments {
|
|||||||
Files.delete(wordsFile);
|
Files.delete(wordsFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void force() {
|
||||||
|
counts.force();
|
||||||
|
wordIds.force();
|
||||||
|
}
|
||||||
|
|
||||||
public class SegmentIterator {
|
public class SegmentIterator {
|
||||||
private final int recordSize;
|
private final int recordSize;
|
||||||
private final long fileSize;
|
private final long fileSize;
|
||||||
|
Loading…
Reference in New Issue
Block a user