mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-02-23 13:09:00 +00:00
(sample-exporter) Add some limits on sizes and lengths
Tar files will reject entries with filenames over 100b, so we need a limit there. Also added a maximum size limit to keep the file sizes reasonable.
This commit is contained in:
parent
0846606b12
commit
1b8b97b8ec
@ -37,7 +37,14 @@ public class SampleDataExporter {
|
|||||||
List<WorkLogEntry> entriesAll = new ArrayList<>(100_000);
|
List<WorkLogEntry> entriesAll = new ArrayList<>(100_000);
|
||||||
|
|
||||||
for (var item : WorkLog.iterable(crawlerLogFile)) {
|
for (var item : WorkLog.iterable(crawlerLogFile)) {
|
||||||
if (item.cnt() < 2) continue;
|
if (item.cnt() < 2) // this one's too small
|
||||||
|
continue;
|
||||||
|
if (item.cnt() > 5000) // this one's too big
|
||||||
|
continue;
|
||||||
|
if (item.relPath().length() > 90) // this one's too long
|
||||||
|
continue; // TAR file name limit is 100, but we add some extra for good measure
|
||||||
|
|
||||||
|
// this one's just right
|
||||||
entriesAll.add(item);
|
entriesAll.add(item);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user