mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 21:18:58 +00:00
(loader) Fix bugfix where the loader would omit some meta and words.
This commit is contained in:
parent
f321fa5ad3
commit
a6f1335375
@ -28,6 +28,7 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
|
||||
private final FileChannel fileChannel;
|
||||
|
||||
private int numEntries = 0;
|
||||
private boolean closed = false;
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
@ -99,6 +100,11 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
if (closed)
|
||||
return;
|
||||
else
|
||||
closed = true;
|
||||
|
||||
dataBuffer.flip();
|
||||
compressingStream.compress(dataBuffer);
|
||||
dataBuffer.clear();
|
||||
|
@ -70,7 +70,7 @@ public class LoaderIndexJournalWriter {
|
||||
buffer[2*i + 1] = meta[start+i];
|
||||
}
|
||||
|
||||
var entry = new IndexJournalEntryData(end-start, buffer);
|
||||
var entry = new IndexJournalEntryData(2 * (end-start), buffer);
|
||||
var header = new IndexJournalEntryHeader(combinedId, features, metadata.encode());
|
||||
|
||||
indexWriter.put(header, entry);
|
||||
|
@ -0,0 +1,83 @@
|
||||
package nu.marginalia.loading.loader;
|
||||
|
||||
import nu.marginalia.db.storage.FileStorageService;
|
||||
import nu.marginalia.db.storage.model.FileStorage;
|
||||
import nu.marginalia.db.storage.model.FileStorageType;
|
||||
import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
|
||||
import nu.marginalia.keyword.model.DocumentKeywords;
|
||||
import nu.marginalia.model.idx.DocumentMetadata;
|
||||
import nu.marginallia.index.journal.IndexJournalFileNames;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
import org.mockito.Mockito;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.stream.LongStream;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class LoaderIndexJournalWriterTest {
|
||||
|
||||
Path tempDir;
|
||||
LoaderIndexJournalWriter writer;
|
||||
@BeforeEach
|
||||
public void setUp() throws IOException, SQLException {
|
||||
tempDir = Files.createTempDirectory(getClass().getSimpleName());
|
||||
FileStorageService storageService = Mockito.mock(FileStorageService.class);
|
||||
Mockito.when(storageService.getStorageByType(FileStorageType.INDEX_STAGING)).
|
||||
thenReturn(new FileStorage(null, null, null, tempDir.toString(),
|
||||
"test"));
|
||||
writer = new LoaderIndexJournalWriter(storageService);
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
public void tearDown() throws Exception {
|
||||
writer.close();
|
||||
List<Path> junk = Files.list(tempDir).toList();
|
||||
for (var item : junk)
|
||||
Files.delete(item);
|
||||
Files.delete(tempDir);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBreakup() throws Exception {
|
||||
String[] keywords = new String[2000];
|
||||
long[] metadata = new long[2000];
|
||||
for (int i = 0; i < 2000; i++) {
|
||||
keywords[i] = Integer.toString(i);
|
||||
metadata[i] = i+1;
|
||||
}
|
||||
DocumentKeywords words = new DocumentKeywords(keywords, metadata);
|
||||
writer.putWords(1, 0, new DocumentMetadata(0),
|
||||
words);
|
||||
|
||||
writer.close();
|
||||
|
||||
List<Path> journalFiles =IndexJournalFileNames.findJournalFiles(tempDir);
|
||||
assertEquals(1, journalFiles.size());
|
||||
|
||||
var reader = new IndexJournalReaderSingleCompressedFile(journalFiles.get(0));
|
||||
List<Long> docIds = new ArrayList<>();
|
||||
reader.forEachDocId(docIds::add);
|
||||
assertEquals(List.of(1L, 1L), docIds);
|
||||
|
||||
List<Long> metas = new ArrayList<Long>();
|
||||
reader.forEach(r -> {
|
||||
var entry = r.readEntry();
|
||||
for (int i = 0; i + 1 < entry.size(); i+=2) {
|
||||
entry.get(i);
|
||||
metas.add(entry.get(i+1));
|
||||
}
|
||||
});
|
||||
|
||||
assertEquals(LongStream.of(metadata).boxed().toList(), metas);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user