mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-24 05:18:58 +00:00
Merge branch 'master' into live-search
This commit is contained in:
commit
14519294d2
@ -242,4 +242,8 @@ public class EdgeUrl implements Serializable {
|
|||||||
return this.domain;
|
return this.domain;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getProto() {
|
||||||
|
return this.proto;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -16,6 +16,7 @@ import java.net.URISyntaxException;
|
|||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
import java.util.stream.Stream;
|
||||||
|
|
||||||
public class ParquetSerializableCrawlDataStream implements AutoCloseable, SerializableCrawlDataStream {
|
public class ParquetSerializableCrawlDataStream implements AutoCloseable, SerializableCrawlDataStream {
|
||||||
private static final Logger logger = LoggerFactory.getLogger(ParquetSerializableCrawlDataStream.class);
|
private static final Logger logger = LoggerFactory.getLogger(ParquetSerializableCrawlDataStream.class);
|
||||||
@ -26,9 +27,12 @@ public class ParquetSerializableCrawlDataStream implements AutoCloseable, Serial
|
|||||||
private boolean wroteDomainRecord = false;
|
private boolean wroteDomainRecord = false;
|
||||||
private final Path path;
|
private final Path path;
|
||||||
|
|
||||||
|
// Reference to the underlying stream that needs to be closed when this object is closed
|
||||||
|
private final Stream<CrawledDocumentParquetRecord> streamForClosing;
|
||||||
|
|
||||||
public ParquetSerializableCrawlDataStream(Path file) throws IOException {
|
public ParquetSerializableCrawlDataStream(Path file) throws IOException {
|
||||||
path = file;
|
path = file;
|
||||||
backingIterator = CrawledDocumentParquetRecordFileReader.stream(file).iterator();
|
backingIterator = (streamForClosing = CrawledDocumentParquetRecordFileReader.stream(file)).iterator();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -150,7 +154,9 @@ public class ParquetSerializableCrawlDataStream implements AutoCloseable, Serial
|
|||||||
etag));
|
etag));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() throws IOException {}
|
public void close() throws IOException {
|
||||||
|
streamForClosing.close();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public SerializableCrawlData next() throws IOException {
|
public SerializableCrawlData next() throws IOException {
|
||||||
|
Loading…
Reference in New Issue
Block a user