Fixes in sorting logic, and optimized update domain statistics to not take 4+ hours.

This commit is contained in:
vlofgren 2022-10-20 21:55:51 +02:00
parent 05762fe200
commit 5393167bf8
10 changed files with 115 additions and 481 deletions

View File

@ -74,7 +74,7 @@ public enum UnicodeRanges {
for (int i = 0; i < Math.min(2000, text.length()); i++) {
char c = text.charAt(i);
if (c >= min && c <= max) {
if (c >= min && c <= this.max) {
if (count++ > max) {
return true;
}

View File

@ -216,6 +216,9 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice {
@Override
public long get(long idx) {
if (idx < 0)
throw new IllegalArgumentException("get("+idx+")");
if (idx >= mappedSize)
grow(idx);
@ -650,7 +653,7 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice {
if (start + n + wordSize - 1 >= mappedSize)
grow(start + n + wordSize - 1);
if (n == 1) {
if (n <= 1) {
return;
}
@ -659,33 +662,29 @@ public class MultimapFileLong implements AutoCloseable, MultimapFileLongSlice {
int off = (int) (start % bufferSize);
for (int i = 1; i < n; i++) {
for (int j = i; j > 0; j--) {
int a = off + wordSize*(j-1);
int b = off + wordSize*j;
long key = buffer.get(off + i * wordSize);
if (buffer.get(a) > buffer.get(b)) {
for (int w = 0; w < wordSize; w++) {
long tmp = buffer.get(a+w);
buffer.put(a+w, buffer.get(b+w));
buffer.put(b+w, tmp);
}
int j = i - 1;
while (j >= 0 && buffer.get(off + wordSize*j) > key) {
for (int w = 0; w < wordSize; w++) {
long tmp = buffer.get(off+wordSize*j+w);
buffer.put(off+wordSize*j+w, buffer.get(off+wordSize*(j+1)+w));
buffer.put(off+wordSize*(j+1)+w, tmp);
}
else break;
j--;
}
buffer.put(off + (j+1) * wordSize, key);
}
}
else for (int i = 1; i < n; i++) {
for (int j = i; j > 0; j--) {
long a = start + (long)wordSize*(j-1);
long b = start + (long)wordSize*j;
long key = get(start + (long) i * wordSize);
if (get(a) > get(b)) {
swap(a, b);
}
else {
break;
}
int j = i - 1;
while (j >= 0 && get(start + (long)wordSize*j) > key) {
swapn(wordSize, start + (long)wordSize*j, start + (long)wordSize*(j+1));
j--;
}
put(start + (long) (j+1) * wordSize, key);
}
}

View File

@ -135,7 +135,7 @@ public class MultimapSorter {
if (low >= 0 && highInclusive >= 0 && low < highInclusive) {
if (highInclusive - low < 32) {
multimapFileLong.insertionSort(wordSize, low, (int) (1 + (highInclusive - low) / wordSize));
multimapFileLong.insertionSort(wordSize, low, (int) ((wordSize + highInclusive - low) / wordSize));
}
else {
long p = multimapFileLong.quickSortPartition(wordSize, low, highInclusive);

View File

@ -28,32 +28,8 @@ public class ReindexTriggerMain {
.followRedirects(true)
.build();
try (var ds = db.provideConnection(); var conn = ds.getConnection(); var stmt = conn.createStatement()) {
var rs = stmt.executeQuery("SELECT ID, DOMAIN_NAME, STATE, INDEXED FROM EC_DOMAIN LIMIT 100");
while (rs.next()) {
System.out.printf("%d %s %s %d\n",
rs.getInt(1),
rs.getString(2),
rs.getString(3),
rs.getInt(4));
}
rs = stmt.executeQuery("SELECT ID, DOMAIN_ID, PATH, VISITED, STATE FROM EC_URL LIMIT 100");
while (rs.next()) {
System.out.printf("%d %d %s %d %s\n",
rs.getInt(1),
rs.getInt(2),
rs.getString(3),
rs.getInt(4),
rs.getString(5));
}
stmt.executeUpdate("INSERT IGNORE INTO DOMAIN_METADATA(ID,GOOD_URLS,KNOWN_URLS,VISITED_URLS) SELECT ID,0,0,0 FROM EC_DOMAIN WHERE INDEXED>0");
stmt.executeUpdate("UPDATE DOMAIN_METADATA INNER JOIN (SELECT DOMAIN_ID,COUNT(*) CNT FROM EC_URL WHERE VISITED AND STATE='ok' GROUP BY DOMAIN_ID) T ON T.DOMAIN_ID=ID SET GOOD_URLS=CNT");
stmt.executeUpdate("UPDATE DOMAIN_METADATA INNER JOIN (SELECT DOMAIN_ID,COUNT(*) CNT FROM EC_URL GROUP BY DOMAIN_ID) T ON T.DOMAIN_ID=ID SET KNOWN_URLS=CNT");
stmt.executeUpdate("UPDATE DOMAIN_METADATA INNER JOIN (SELECT DOMAIN_ID,COUNT(*) CNT FROM EC_URL WHERE VISITED GROUP BY DOMAIN_ID) T ON T.DOMAIN_ID=ID SET VISITED_URLS=CNT");
}
var updateStatistics = new UpdateDomainStatistics(db.provideConnection());
updateStatistics.run();
var rb = new RequestBody() {

View File

@ -0,0 +1,66 @@
package nu.marginalia.wmsa.edge.converting;
import com.zaxxer.hikari.HikariDataSource;
import gnu.trove.map.hash.TIntIntHashMap;
import java.sql.SQLException;
public class UpdateDomainStatistics {
private final HikariDataSource dataSource;
public UpdateDomainStatistics(HikariDataSource dataSource) {
this.dataSource = dataSource;
}
public void run() throws SQLException {
// This looks weird, but it's actually much faster than doing the computations with SQL queries
//
// ... in part because we can assume the data is immutable and don't mind consuming egregious
// resources
try (var conn = dataSource.getConnection();
var stmt = conn.createStatement();
var domainInfoQuery = conn.prepareStatement("SELECT DOMAIN_ID, VISITED, STATE='ok' FROM EC_URL");
var insertDomainInfo = conn.prepareStatement("INSERT INTO DOMAIN_METADATA(ID,KNOWN_URLS,GOOD_URLS,VISITED_URLS) VALUES (?, ?, ?, ?)")
) {
stmt.executeUpdate("DELETE FROM DOMAIN_METADATA");
TIntIntHashMap knownUrls = new TIntIntHashMap(1_000_000, 0.75f, 0, 0);
TIntIntHashMap visitedUrls = new TIntIntHashMap(1_000_000, 0.75f, 0, 0);
TIntIntHashMap goodUrls = new TIntIntHashMap(1_000_000, 0.75f, 0, 0);
domainInfoQuery.setFetchSize(10_000);
var rsp = domainInfoQuery.executeQuery();
while (rsp.next()) {
int domainId = rsp.getInt(1);
boolean visited = rsp.getBoolean(2);
boolean stateOk = rsp.getBoolean(3);
knownUrls.adjustOrPutValue(domainId, 1, 1);
if (visited) {
visitedUrls.adjustOrPutValue(domainId, 1, 1);
if (stateOk) {
goodUrls.adjustOrPutValue(domainId, 1, 1);
}
}
}
int i = 0;
for (int domainId : knownUrls.keys()) {
insertDomainInfo.setInt(1, domainId);
insertDomainInfo.setInt(2, knownUrls.get(domainId));
insertDomainInfo.setInt(3, visitedUrls.get(domainId));
insertDomainInfo.setInt(4, goodUrls.get(domainId));
insertDomainInfo.addBatch();
if ((++i % 1000) == 0) {
insertDomainInfo.executeBatch();
}
}
if ((i % 1000) != 0) {
insertDomainInfo.executeBatch();
}
}
}
}

View File

@ -153,7 +153,7 @@ class BTreeWriterTest {
for (int i = 0; i < 500; i++) {
long val = (long)(Long.MAX_VALUE * Math.random());
while (toPut.contains((int)val)) val = (long)(Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.findEntry( val));
assertTrue(reader.findEntry( val) < 0);
}
}
} catch (Exception e) {
@ -203,7 +203,7 @@ class BTreeWriterTest {
for (int i = 0; i < 500; i++) {
long val = (long) (Long.MAX_VALUE * Math.random());
while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.findEntry(val));
assertTrue(reader.findEntry( val) < 0);
}
}
} catch (Exception e) {
@ -255,7 +255,7 @@ class BTreeWriterTest {
for (int i = 0; i < 500; i++) {
long val = (long) (Long.MAX_VALUE * Math.random());
while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.findEntry(val & mask));
assertTrue(reader.findEntry(val & mask) < 0);
}
}
} catch (Exception e) {
@ -309,7 +309,7 @@ class BTreeWriterTest {
for (int i = 0; i < 500; i++) {
long val = (long) (Long.MAX_VALUE * Math.random());
while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.findEntry(val & mask));
assertTrue(reader.findEntry(val & mask) < 0);
}
}
} catch (Exception e) {

View File

@ -1,382 +0,0 @@
package nu.marginalia.util.btree;
import nu.marginalia.util.btree.model.BTreeContext;
import nu.marginalia.util.btree.model.BTreeHeader;
import nu.marginalia.util.multimap.MultimapFileLong;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.channels.FileChannel;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import java.util.StringJoiner;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
class BTreeWriterTestCachedReader {
final BTreeContext ctx = new BTreeContext(4, 2, 0xFFFF_FFFF_FFFF_FFFFL, 8);
final BTreeWriter writer = new BTreeWriter(null, ctx);
Logger logger = LoggerFactory.getLogger(getClass());
@Test
void testSmallDataBlock() {
var header = writer.makeHeader(1024, ctx.BLOCK_SIZE_WORDS()/2);
assertEquals(1024 + BTreeHeader.BTreeHeaderSizeLongs, header.dataOffsetLongs());
assertEquals(header.dataOffsetLongs(), header.indexOffsetLongs());
}
@Test
void testLayerCount() {
int wsq = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS();
int wcub = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS();
assertEquals(2, writer.makeHeader(1024, wsq-1).layers());
assertEquals(2, writer.makeHeader(1024, wsq).layers());
assertEquals(3, writer.makeHeader(1024, wsq+1).layers());
assertEquals(3, writer.makeHeader(1024, wcub-1).layers());
assertEquals(3, writer.makeHeader(1024, wcub).layers());
assertEquals(4, writer.makeHeader(1024, wcub+1).layers());
}
@Test
void testLayerOffset() {
int wcub = ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS()*ctx.BLOCK_SIZE_WORDS();
System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 0));
System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 1));
System.out.println(writer.makeHeader(1025, wcub).relativeIndexLayerOffset(ctx, 2));
for (int i = 0; i < 1024; i++) {
var header = writer.makeHeader(0, i);
printTreeLayout(i, header, ctx);
if (header.layers() >= 1) {
assertEquals(1, ctx.indexLayerSize(i, header.layers() - 1) / ctx.BLOCK_SIZE_WORDS());
}
}
}
private void printTreeLayout(int numEntries, BTreeHeader header, BTreeContext ctx) {
StringJoiner sj = new StringJoiner(",");
for (int l = 0; l < header.layers(); l++) {
sj.add(""+ctx.indexLayerSize(numEntries, l)/ctx.BLOCK_SIZE_WORDS());
}
System.out.println(numEntries + ":" + sj);
}
@Test
public void testWriteEntrySize2() throws IOException {
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
Set<Integer> toPut = new HashSet<>();
for (int i = 0; i < 500; i++) {
while (!toPut.add((int)(Integer.MAX_VALUE * Math.random())));
}
int[] data = toPut.stream().mapToInt(Integer::valueOf).sorted().toArray();
try {
RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw");
MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000);
{
var writer = new BTreeWriter(mmf, ctx);
writer.write(0, toPut.size(), (slice) -> {
for (int i = 0; i < data.length; i++) {
slice.put(2L*i, data[i]);
slice.put( 2L*i + 1, i);
}
});
mmf.force();
}
{
var reader = new BTreeReader(mmf, ctx, 0);
for (int i = 0; i < data.length; i++) {
long offset = reader.findEntry(data[i]);
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
assertEquals(i, mmf.get(offset+1));
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
Files.delete(tempFile);
}
}
@Test
public void testQB() throws IOException {
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
Set<Integer> toPut = new HashSet<>();
for (int i = 0; i < 144646; i++) {
while (!toPut.add(3*i));
}
int[] data = toPut.stream().mapToInt(Integer::valueOf).sorted().toArray();
try {
RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw");
MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000);
{
var writer = new BTreeWriter(mmf, ctx);
writer.write(0, toPut.size(), (slice) -> {
for (int i = 0; i < data.length; i++) {
slice.put(2L*i, data[i]);
slice.put( 2L*i + 1, i);
}
});
mmf.force();
}
{
var reader = new BTreeReader(mmf, ctx, 0);
for (int i = 0; i < data.length; i++) {
long offset = reader.findEntry(data[i]);
}
long[] d = new long[] { -1, 1, 5000, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 30, 300, 303, 306, 312, 330,3000,30000,300000};
BTreeQueryBuffer buffer = new BTreeQueryBuffer(d, d.length);
Arrays.sort(buffer.data);
System.out.println("layers = " + reader.getHeader().layers());
reader.retainEntries(buffer);
buffer.finalizeFiltering();
for (int i = 0; i < buffer.end; i++) {
System.out.println(buffer.data[i]);
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
Files.delete(tempFile);
}
}
@Test
public void testWriteEntrySize2Small() throws IOException {
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
Set<Integer> toPut = new HashSet<>();
for (int i = 0; i < 5; i++) {
while (!toPut.add((int)(Integer.MAX_VALUE * Math.random())));
}
int[] data = toPut.stream().mapToInt(Integer::valueOf).sorted().toArray();
try {
RandomAccessFile raf = new RandomAccessFile(tempFile.toFile(), "rw");
MultimapFileLong mmf = new MultimapFileLong(raf, FileChannel.MapMode.READ_WRITE, 10000, 1000);
{
var writer = new BTreeWriter(mmf, ctx);
writer.write( 0, toPut.size(), (slice) -> {
for (int i = 0; i < data.length; i++) {
slice.put(2L*i, data[i]);
slice.put(2L*i + 1, i);
}
});
mmf.force();
}
{
var reader = new BTreeReader(mmf, ctx, 0);
for (int i = 0; i < data.length; i++) {
long offset = reader.findEntry(data[i]);
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
assertEquals(i, mmf.get(offset+1));
}
for (int i = 0; i < 500; i++) {
long val = (long)(Long.MAX_VALUE * Math.random());
while (toPut.contains((int)val)) val = (long)(Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.findEntry(val));
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
Files.delete(tempFile);
}
}
@Test
public void testWriteEqualityNotMasked() throws IOException {
for (int bs = 2; bs <= 4; bs++) {
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
Set<Long> toPut = new HashSet<>();
var ctx = new BTreeContext(5, 1, ~0, bs);
for (int i = 0; i < 500; i++) {
while (!toPut.add((long) (Long.MAX_VALUE * Math.random()))) ;
}
long[] data = toPut.stream().mapToLong(Long::valueOf).sorted().toArray();
try (MultimapFileLong mmf = MultimapFileLong.forOutput(tempFile, 1000)) {
{
var writer = new BTreeWriter(mmf, ctx);
writer.write(0, toPut.size(), (slice) -> {
for (int i = 0; i < data.length; i++) {
slice.put(i, data[i]);
}
});
mmf.force();
}
{
var reader = new BTreeReader(mmf, ctx, 0);
printTreeLayout(toPut.size(), reader.getHeader(), ctx);
for (int i = 0; i < data.length; i++) {
long offset = reader.findEntry(data[i]);
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
assertEquals(data[i], mmf.get(offset));
}
for (int i = 0; i < 500; i++) {
long val = (long) (Long.MAX_VALUE * Math.random());
while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.findEntry(val));
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
Files.delete(tempFile);
}
}
}
@Test
public void testWriteEqualityMasked() throws IOException {
for (int bs = 2; bs <= 4; bs++) {
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
Set<Long> toPut = new HashSet<>();
long mask = 0xFFFF_FFFF_0000_0000L;
var ctx = new BTreeContext(5, 1, mask, bs);
for (int i = 0; i < 500; i++) {
while (!toPut.add((long) (Long.MAX_VALUE * Math.random()))) ;
}
long[] data = toPut.stream().mapToLong(Long::valueOf).sorted().toArray();
try (MultimapFileLong mmf = MultimapFileLong.forOutput(tempFile, 1000)) {
{
var writer = new BTreeWriter(mmf, ctx);
writer.write(0, toPut.size(), (slice) -> {
for (int i = 0; i < data.length; i++) {
slice.put(i, data[i]);
}
});
mmf.force();
}
{
var reader = new BTreeReader(mmf, ctx, 0);
printTreeLayout(toPut.size(), reader.getHeader(), ctx);
for (int i = 0; i < data.length; i++) {
long offset = reader.findEntry(data[i] & mask);
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
assertEquals(data[i], mmf.get(offset));
}
for (int i = 0; i < 500; i++) {
long val = (long) (Long.MAX_VALUE * Math.random());
while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.findEntry(val & mask));
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
Files.delete(tempFile);
}
}
}
@Test
public void testWriteTwoEqualityMasked() throws IOException {
for (int bs = 2; bs <= 4; bs++) {
var tempFile = Files.createTempFile(Path.of("/tmp"), "tst", "dat");
Set<Long> toPut = new HashSet<>();
long mask = 0xFFFF_FFFF_0000_0000L;
var ctx = new BTreeContext(5, 2, mask, bs);
for (int i = 0; i < 500; i++) {
while (!toPut.add((long) (Long.MAX_VALUE * Math.random()))) ;
}
long[] data = toPut.stream().mapToLong(Long::valueOf).sorted().toArray();
try (MultimapFileLong mmf = MultimapFileLong.forOutput(tempFile, 1000)) {
{
var writer = new BTreeWriter(mmf, ctx);
writer.write(0, toPut.size(), (slice) -> {
for (int i = 0; i < data.length; i++) {
slice.put(i*2L, data[i]);
slice.put(i*2L+1, i);
}
});
mmf.force();
}
{
var reader = new BTreeReader(mmf, ctx, 0);
printTreeLayout(toPut.size(), reader.getHeader(), ctx);
for (int i = 0; i < data.length; i++) {
long offset = reader.findEntry(data[i] & mask);
assertTrue(offset >= 0, "Negative offset for " + i + " -> " + offset);
assertEquals(data[i], mmf.get(offset));
assertEquals(i, mmf.get(offset+1));
}
for (int i = 0; i < 500; i++) {
long val = (long) (Long.MAX_VALUE * Math.random());
while (toPut.contains(val)) val = (long) (Long.MAX_VALUE * Math.random());
assertEquals(-1, reader.findEntry(val & mask));
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
Files.delete(tempFile);
}
}
}
}

View File

@ -18,7 +18,7 @@ class LanguageFilterTest {
}
@Test
public void isStringChinsese() {
public void isStringChinese() {
var languageFilter = new LanguageFilter();
assertTrue(languageFilter.isBlockedUnicodeRange("溶岩ドームの手前に広がる斜面木が生えているところは普賢岳の山体です今回の噴火にともないこのあたりの山体がマグマに押されて変形し写真では左にむかって100mほどせりだしました\n"));
}

View File

@ -103,6 +103,7 @@ class MultimapFileTest {
var sorter = file.createSorter(Path.of("/tmp"), 16, 2);
for (int start = 0; start < 8; start+=2) {
System.out.println("~");
for (int end = start; end < 128; end+=2) {
for (int i = 0; i < 128; i+=2) {
file.put(i, -i/2);
@ -110,9 +111,17 @@ class MultimapFileTest {
}
sorter.quickSortLH(start, end);
for (int i = start+2; i < end; i+=2) {
System.out.println("**" + i);
System.out.println(file.get(i-2));
System.out.println(file.get(i-1));
System.out.println(file.get(i));
System.out.println(file.get(i+1));
assertTrue(file.get(i-2) <= file.get(i));
assertEquals(file.get(i+1), -file.get(i));
}
System.out.println("~");
}
}
@ -158,13 +167,17 @@ class MultimapFileTest {
var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 128, 8);
var sorter = file.createSorter(Path.of("/tmp"), 16, 2);
for (int start = 0; start < 8; start+=2) {
for (int start = 2; start < 8; start+=2) {
for (int end = start+2; end < 126; end+=2) {
for (int i = 0; i < 128; i+=2) {
file.put(i, -(128-i/2));
file.put(i+1, (128-i)/2);
file.put(i+1, (128-i/2));
}
sorter.insertionSort(start, (end - start));
file.put(0, 0xFFFF_FFFFL);
file.put(end, 0x7FFF_FFFFL);
sorter.insertionSort(start, (end - start)/2);
assertEquals(0xFFFF_FFFFL, file.get(0));
assertEquals(file.get(end), 0x7FFF_FFFFL);
for (int i = start+2; i < end; i+=2) {
assertTrue(file.get(i-2) <= file.get(i));
assertEquals(file.get(i+1), -file.get(i));
@ -178,14 +191,14 @@ class MultimapFileTest {
var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 128, 8);
var sorter = file.createSorter(Path.of("/tmp"), 16, 2);
for (int start = 0; start < 512; start+=2) {
for (int start = 0; start < 512; start+=18) {
System.out.println(start);
for (int end = start+2; end < 8192; end+=2) {
for (int end = start+2; end < 8192; end+=68) {
for (int i = 0; i < 8192; i+=2) {
file.put(i, -i/2);
file.put(i+1, i/2);
}
sorter.quickSortLH(start, end);
sorter.mergeSort(start, end-start);
assertEquals(file.get(start+1), -file.get(start));
for (int i = start+2; i < end; i+=2) {
@ -216,24 +229,6 @@ class MultimapFileTest {
}
}
@Test
void sortInternalSS2() throws IOException {
var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8);
var sorter = file.createSorter(Path.of("/tmp"), 16, 2);
for (int i = 0; i < 32; i+=2) {
file.put(i, 32-i/2);
file.put(i+1, ~(32-i/2));
}
sorter.sortRange( 2, 14);
for (int i = 2+2; i < 16; i+=2) {
System.out.println(file.get(i) + "-" + ~file.get(i+1));
assertTrue(file.get(i) > file.get(i-2));
}
}
@Test
void sortExternal() throws IOException {
var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8);
@ -252,25 +247,7 @@ class MultimapFileTest {
assertTrue(searcher.binarySearchTest(file.get(i), 2, 16));
}
}
@Test
void sortExternalSS2() throws IOException {
var file = new MultimapFileLong(new RandomAccessFile(tmp, "rw"), FileChannel.MapMode.READ_WRITE, 32, 8);
var sorter = file.createSorter(Path.of("/tmp"), 2, 2);
var searcher = file.createSearcher();
for (int i = 0; i < 32; i+=2) {
file.put(i, 32-i/2);
file.put(i+1, ~(32-i/2));
}
sorter.sortRange( 2, 14);
file.force();
for (int i = 2+2; i < 16; i+=2) {
System.out.println(file.get(i) + "-" + ~file.get(i+1));
assertTrue(file.get(i) > file.get(i-2));
}
}
@Test
void close() {

View File

@ -136,14 +136,12 @@ class IndexQueryTest {
IndexQuery query = new IndexQuery(List.of(threesRange().asPrefixSource(102, 200)));
/** Read from 17s range */
// 17s range is shorter and should read fully in one go
/** Read from 3s range */
query.getMoreResults(buffer);
System.out.println(Arrays.toString(buffer.copyData()));
assertFalse(buffer.isEmpty());
assertArrayEquals(LongStream.range(0, 100).map(l -> l*17).toArray(), buffer.copyData());
assertArrayEquals(LongStream.range(100, 200).filter(v -> (v % 3) == 0).toArray(), buffer.copyData());
}