(loader) Fix bugfix where the loader would omit some meta and words.

This commit is contained in:
Viktor Lofgren 2023-08-31 17:48:08 +02:00
parent f321fa5ad3
commit a6f1335375
3 changed files with 90 additions and 1 deletions

View File

@ -28,6 +28,7 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
private final FileChannel fileChannel;
private int numEntries = 0;
private boolean closed = false;
private final Logger logger = LoggerFactory.getLogger(getClass());
@ -99,6 +100,11 @@ public class IndexJournalWriterSingleFileImpl implements IndexJournalWriter{
}
public void close() throws IOException {
if (closed)
return;
else
closed = true;
dataBuffer.flip();
compressingStream.compress(dataBuffer);
dataBuffer.clear();

View File

@ -70,7 +70,7 @@ public class LoaderIndexJournalWriter {
buffer[2*i + 1] = meta[start+i];
}
var entry = new IndexJournalEntryData(end-start, buffer);
var entry = new IndexJournalEntryData(2 * (end-start), buffer);
var header = new IndexJournalEntryHeader(combinedId, features, metadata.encode());
indexWriter.put(header, entry);

View File

@ -0,0 +1,83 @@
package nu.marginalia.loading.loader;
import nu.marginalia.db.storage.FileStorageService;
import nu.marginalia.db.storage.model.FileStorage;
import nu.marginalia.db.storage.model.FileStorageType;
import nu.marginalia.index.journal.reader.IndexJournalReaderSingleCompressedFile;
import nu.marginalia.keyword.model.DocumentKeywords;
import nu.marginalia.model.idx.DocumentMetadata;
import nu.marginallia.index.journal.IndexJournalFileNames;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import org.mockito.Mockito;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.LongStream;
import static org.junit.jupiter.api.Assertions.*;
class LoaderIndexJournalWriterTest {
Path tempDir;
LoaderIndexJournalWriter writer;
@BeforeEach
public void setUp() throws IOException, SQLException {
tempDir = Files.createTempDirectory(getClass().getSimpleName());
FileStorageService storageService = Mockito.mock(FileStorageService.class);
Mockito.when(storageService.getStorageByType(FileStorageType.INDEX_STAGING)).
thenReturn(new FileStorage(null, null, null, tempDir.toString(),
"test"));
writer = new LoaderIndexJournalWriter(storageService);
}
@AfterEach
public void tearDown() throws Exception {
writer.close();
List<Path> junk = Files.list(tempDir).toList();
for (var item : junk)
Files.delete(item);
Files.delete(tempDir);
}
@Test
public void testBreakup() throws Exception {
String[] keywords = new String[2000];
long[] metadata = new long[2000];
for (int i = 0; i < 2000; i++) {
keywords[i] = Integer.toString(i);
metadata[i] = i+1;
}
DocumentKeywords words = new DocumentKeywords(keywords, metadata);
writer.putWords(1, 0, new DocumentMetadata(0),
words);
writer.close();
List<Path> journalFiles =IndexJournalFileNames.findJournalFiles(tempDir);
assertEquals(1, journalFiles.size());
var reader = new IndexJournalReaderSingleCompressedFile(journalFiles.get(0));
List<Long> docIds = new ArrayList<>();
reader.forEachDocId(docIds::add);
assertEquals(List.of(1L, 1L), docIds);
List<Long> metas = new ArrayList<Long>();
reader.forEach(r -> {
var entry = r.readEntry();
for (int i = 0; i + 1 < entry.size(); i+=2) {
entry.get(i);
metas.add(entry.get(i+1));
}
});
assertEquals(LongStream.of(metadata).boxed().toList(), metas);
}
}