mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
(loader) Fix bugfix where the loader would omit some meta and words.
This commit is contained in:
parent
f321fa5ad3
commit
a6f1335375
@ -28,6 +28,7 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
|
|||||||
private final FileChannel fileChannel;
|
private final FileChannel fileChannel;
|
||||||
|
|
||||||
private int numEntries = 0;
|
private int numEntries = 0;
|
||||||
|
private boolean closed = false;
|
||||||
|
|
||||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||||
|
|
||||||
@ -99,6 +100,11 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
|
if (closed)
|
||||||
|
return;
|
||||||
|
else
|
||||||
|
closed = true;
|
||||||
|
|
||||||
dataBuffer.flip();
|
dataBuffer.flip();
|
||||||
compressingStream.compress(dataBuffer);
|
compressingStream.compress(dataBuffer);
|
||||||
dataBuffer.clear();
|
dataBuffer.clear();
|
||||||
|
@ -70,7 +70,7 @@ public class LoaderIndexJournalWriter {
|
|||||||
buffer[2*i + 1] = meta[start+i];
|
buffer[2*i + 1] = meta[start+i];
|
||||||
}
|
}
|
||||||
|
|
||||||
var entry = new IndexJournalEntryData(end-start, buffer);
|
var entry = new IndexJournalEntryData(2 * (end-start), buffer);
|
||||||
var header = new IndexJournalEntryHeader(combinedId, features, metadata.encode());
|
var header = new IndexJournalEntryHeader(combinedId, features, metadata.encode());
|
||||||
|
|
||||||
indexWriter.put(header, entry);
|
indexWriter.put(header, entry);
|
||||||
|
@ -0,0 +1,83 @@
|
|||||||
|
package nu.marginalia.loading.loader;
|
||||||
|
|
||||||
|
import nu.marginalia.db.storage.FileStorageService;
|
||||||
|
import nu.marginalia.db.storage.model.FileStorage;
|
||||||
|
import nu.marginalia.db.storage.model.FileStorageType;
|
||||||
|
import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
|
||||||
|
import nu.marginalia.keyword.model.DocumentKeywords;
|
||||||
|
import nu.marginalia.model.idx.DocumentMetadata;
|
||||||
|
import nu.marginallia.index.journal.IndexJournalFileNames;
|
||||||
|
import org.junit.jupiter.api.AfterEach;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.io.TempDir;
|
||||||
|
import org.mockito.Mockito;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.stream.LongStream;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
class LoaderIndexJournalWriterTest {
|
||||||
|
|
||||||
|
Path tempDir;
|
||||||
|
LoaderIndexJournalWriter writer;
|
||||||
|
@BeforeEach
|
||||||
|
public void setUp() throws IOException, SQLException {
|
||||||
|
tempDir = Files.createTempDirectory(getClass().getSimpleName());
|
||||||
|
FileStorageService storageService = Mockito.mock(FileStorageService.class);
|
||||||
|
Mockito.when(storageService.getStorageByType(FileStorageType.INDEX_STAGING)).
|
||||||
|
thenReturn(new FileStorage(null, null, null, tempDir.toString(),
|
||||||
|
"test"));
|
||||||
|
writer = new LoaderIndexJournalWriter(storageService);
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterEach
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
writer.close();
|
||||||
|
List<Path> junk = Files.list(tempDir).toList();
|
||||||
|
for (var item : junk)
|
||||||
|
Files.delete(item);
|
||||||
|
Files.delete(tempDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testBreakup() throws Exception {
|
||||||
|
String[] keywords = new String[2000];
|
||||||
|
long[] metadata = new long[2000];
|
||||||
|
for (int i = 0; i < 2000; i++) {
|
||||||
|
keywords[i] = Integer.toString(i);
|
||||||
|
metadata[i] = i+1;
|
||||||
|
}
|
||||||
|
DocumentKeywords words = new DocumentKeywords(keywords, metadata);
|
||||||
|
writer.putWords(1, 0, new DocumentMetadata(0),
|
||||||
|
words);
|
||||||
|
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
List<Path> journalFiles =IndexJournalFileNames.findJournalFiles(tempDir);
|
||||||
|
assertEquals(1, journalFiles.size());
|
||||||
|
|
||||||
|
var reader = new IndexJournalReaderSingleCompressedFile(journalFiles.get(0));
|
||||||
|
List<Long> docIds = new ArrayList<>();
|
||||||
|
reader.forEachDocId(docIds::add);
|
||||||
|
assertEquals(List.of(1L, 1L), docIds);
|
||||||
|
|
||||||
|
List<Long> metas = new ArrayList<Long>();
|
||||||
|
reader.forEach(r -> {
|
||||||
|
var entry = r.readEntry();
|
||||||
|
for (int i = 0; i + 1 < entry.size(); i+=2) {
|
||||||
|
entry.get(i);
|
||||||
|
metas.add(entry.get(i+1));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
assertEquals(LongStream.of(metadata).boxed().toList(), metas);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user