feat: deprecate uploads.cacheFileIdentifiers conf

maintaining it is an unnecessary complexity
it's a feature that doesn't scale too well anyways

also renamed "queryDbForFileCollisions" to
"queryDatabaseForIdentifierMatch"
and updated config description accordingly

this should also now properly free the internal onHold Set
This commit is contained in:
Bobby Wibowo 2022-07-29 09:14:55 +07:00
parent 03eff45e8c
commit fae28f9aa2
No known key found for this signature in database
GPG Key ID: 51C3A1E1E22D26CF
4 changed files with 58 additions and 77 deletions

View File

@ -567,42 +567,21 @@ module.exports = {
}, },
/* /*
Cache file identifiers. The service will query database on every new uploads,
to make sure newly generated random identifier will not match any existing uploads.
They will be used for stricter collision checks, such that a single identifier Otherwise, the same identifier may be used by multiple different extensions
may not be used by more than a single file (e.g. if "abcd.jpg" already exists, a new PNG (e.g. if "abcd.jpg" already exists, new files can be named as "abcd.png", "abcd.mp4", etc).
file may not be named as "abcd.png").
If this is enabled, the safe will query files from the database during first launch, In the rare chance that multiple image/video files are sharing the same identifier,
parse their names, then cache the identifiers into memory. they will end up with the same thumbnail in dashboard, since thumbnails will
Its downside is that it will use a bit more memory.
If this is disabled, collision check will become less strict.
As in, the same identifier may be used by multiple different extensions (e.g. if "abcd.jpg"
already exists, new files can be possibly be named as "abcd.png", "abcd.mp4", etc).
Its downside will be, in the rare chance that multiple image/video files are sharing the same
identifier, they will end up with the same thumbnail in dashboard, since thumbnails will
only be saved as PNG in storage (e.g. "abcd.jpg" and "abcd.png" will share a single thumbnail only be saved as PNG in storage (e.g. "abcd.jpg" and "abcd.png" will share a single thumbnail
named "abcd.png" in thumbs directory, in which case, the file that's uploaded the earliest will named "abcd.png" in thumbs directory, in which case, the file that's uploaded the earliest will
be the source for the thumbnail). be the source for the thumbnail).
Unless you do not use thumbnails, it is highly recommended to enable this feature. Unless you do not use thumbnails, it is highly recommended to enable this feature.
*/ */
cacheFileIdentifiers: false, queryDatabaseForIdentifierMatch: true,
/*
An alternative to caching file identifiers.
Basically the service will instead query the database for stricter collision checks.
Right off the bat this has the disadvantage of adding one or more SQL queries on every
new uploads, but it has the advantage of not having to pre-cache anything.
Essentially this reduces the service's startup time and memory usage, but slows down new uploads.
As this is an alternative, you need to disable cacheFileIdentifiers to use this.
You'll have to figure out which method suits your use case best.
*/
queryDbForFileCollisions: true,
/* /*
The length of the randomly generated identifier for albums. The length of the randomly generated identifier for albums.

View File

@ -12,8 +12,15 @@ const ServerError = require('./utils/ServerError')
const config = require('./../config') const config = require('./../config')
const logger = require('./../logger') const logger = require('./../logger')
/** Deprecated config options */
if (config.uploads.cacheFileIdentifiers) {
logger.error('Config option "uploads.cacheFileIdentifiers" is DEPRECATED.')
logger.error('There is now only "uploads.queryDatabaseForIdentifierMatch" for a similar behavior.')
}
const self = { const self = {
onHold: new Set(), onHold: new Set(), // temporarily held random upload identifiers
scanHelpers: {} scanHelpers: {}
} }
@ -51,6 +58,9 @@ const enableHashing = config.uploads.hash === undefined
? true ? true
: Boolean(config.uploads.hash) : Boolean(config.uploads.hash)
const queryDatabaseForIdentifierMatch = config.uploads.queryDatabaseForIdentifierMatch ||
config.uploads.queryDbForFileCollisions // old config name for identical behavior
/** Chunks helper class & function **/ /** Chunks helper class & function **/
class ChunksData { class ChunksData {
@ -139,19 +149,16 @@ self.parseFileIdentifierLength = fileLength => {
} }
} }
self.getUniqueRandomName = async (length, extension) => { self.getUniqueUploadIdentifier = async (length, extension = '', res) => {
for (let i = 0; i < utils.idMaxTries; i++) { for (let i = 0; i < utils.idMaxTries; i++) {
const identifier = randomstring.generate(length) const identifier = randomstring.generate(length)
const name = identifier + extension
if (config.uploads.cacheFileIdentifiers) { if (queryDatabaseForIdentifierMatch) {
if (utils.idSet.has(identifier)) { // If must query database for identifiers matches
logger.log(`Identifier ${identifier} is already in use (${i + 1}/${utils.idMaxTries}).`) if (self.onHold.has(identifier)) {
logger.log(`Identifier ${identifier} is currently held by another upload (${i + 1}/${utils.idMaxTries}).`)
continue continue
} }
utils.idSet.add(identifier)
logger.debug(`Added ${identifier} to identifiers cache`)
} else if (config.uploads.queryDbForFileCollisions) {
if (self.onHold.has(identifier)) continue
// Put token on-hold (wait for it to be inserted to DB) // Put token on-hold (wait for it to be inserted to DB)
self.onHold.add(identifier) self.onHold.add(identifier)
@ -165,8 +172,19 @@ self.getUniqueRandomName = async (length, extension) => {
logger.log(`Identifier ${identifier} is already in use (${i + 1}/${utils.idMaxTries}).`) logger.log(`Identifier ${identifier} is already in use (${i + 1}/${utils.idMaxTries}).`)
continue continue
} }
// Unhold identifier once the Response has been sent
if (res) {
if (!res.locals.identifiers) {
res.locals.identifiers = []
res.once('finish', () => { self.unholdUploadIdentifiers(res) })
}
res.locals.identifiers.push(identifier)
}
} else { } else {
// Otherwise, check for physical files' full name matches
try { try {
const name = identifier + extension
await paths.access(path.join(paths.uploads, name)) await paths.access(path.join(paths.uploads, name))
logger.log(`${name} is already in use (${i + 1}/${utils.idMaxTries}).`) logger.log(`${name} is already in use (${i + 1}/${utils.idMaxTries}).`)
continue continue
@ -175,12 +193,25 @@ self.getUniqueRandomName = async (length, extension) => {
if (error & error.code !== 'ENOENT') throw error if (error & error.code !== 'ENOENT') throw error
} }
} }
return name
// Return the random identifier only
return identifier
} }
throw new ServerError('Failed to allocate a unique name for the upload. Try again?') throw new ServerError('Failed to allocate a unique name for the upload. Try again?')
} }
self.unholdUploadIdentifiers = res => {
if (!res.locals.identifiers) return
for (const identifier of res.locals.identifiers) {
self.onHold.delete(identifier)
logger.debug(`Unhold identifier ${identifier}.`)
}
delete res.locals.identifiers
}
self.assertRetentionPeriod = (user, age) => { self.assertRetentionPeriod = (user, age) => {
if (!utils.retentions.enabled) return null if (!utils.retentions.enabled) return null
@ -327,7 +358,8 @@ self.actuallyUpload = async (req, res, user, data = {}) => {
file.path = file.chunksData.path file.path = file.chunksData.path
} else { } else {
const length = self.parseFileIdentifierLength(req.headers.filelength) const length = self.parseFileIdentifierLength(req.headers.filelength)
file.filename = await self.getUniqueRandomName(length, file.extname) const identifier = await self.getUniqueUploadIdentifier(length, file.extname, res)
file.filename = identifier + file.extname
file.path = path.join(paths.uploads, file.filename) file.path = path.join(paths.uploads, file.filename)
} }
@ -466,6 +498,7 @@ self.actuallyUpload = async (req, res, user, data = {}) => {
unlinkFiles(req.files) unlinkFiles(req.files)
// If req.multipart() did not error out, but some file field did, // If req.multipart() did not error out, but some file field did,
// then Request connection was likely dropped // then Request connection was likely dropped
self.unholdUploadIdentifiers(res)
return return
} }
@ -580,7 +613,8 @@ self.actuallyUploadUrls = async (req, res, user, data = {}) => {
} }
const length = self.parseFileIdentifierLength(req.headers.filelength) const length = self.parseFileIdentifierLength(req.headers.filelength)
file.filename = await self.getUniqueRandomName(length, file.extname) const identifier = await self.getUniqueUploadIdentifier(length, file.extname, res)
file.filename = identifier + file.extname
file.path = path.join(paths.uploads, file.filename) file.path = path.join(paths.uploads, file.filename)
let writeStream let writeStream
@ -758,7 +792,8 @@ self.actuallyFinishChunks = async (req, res, user, files) => {
// Generate name // Generate name
const length = self.parseFileIdentifierLength(file.filelength) const length = self.parseFileIdentifierLength(file.filelength)
const name = await self.getUniqueRandomName(length, file.extname) const identifier = await self.getUniqueUploadIdentifier(length, file.extname, res)
const name = identifier + file.extname
// Move tmp file to final destination // Move tmp file to final destination
// For fs.copyFile(), tmpfile will eventually be unlinked by self.cleanUpChunks() // For fs.copyFile(), tmpfile will eventually be unlinked by self.cleanUpChunks()
@ -1049,14 +1084,6 @@ self.storeFilesToDb = async (req, res, user, filesData) => {
await utils.db.table('files').insert(files) await utils.db.table('files').insert(files)
utils.invalidateStatsCache('uploads') utils.invalidateStatsCache('uploads')
if (config.uploads.queryDbForFileCollisions) {
for (const file of files) {
const extname = utils.extname(file.name)
const identifier = file.name.slice(0, -(extname.length))
self.onHold.delete(identifier)
}
}
// Update albums' timestamp // Update albums' timestamp
if (authorizedIds.length) { if (authorizedIds.length) {
await utils.db.table('albums') await utils.db.table('albums')

View File

@ -45,7 +45,6 @@ const self = {
defaultRenderers: {} defaultRenderers: {}
}, },
gitHash: null, gitHash: null,
idSet: null,
idMaxTries: config.uploads.maxTries || 1, idMaxTries: config.uploads.maxTries || 1,
@ -592,14 +591,8 @@ self.unlinkFile = async (filename, predb) => {
} }
const identifier = filename.split('.')[0] const identifier = filename.split('.')[0]
// Do not remove from identifiers cache on pre-db-deletion
if (!predb && self.idSet) {
self.idSet.delete(identifier)
logger.debug(`Removed ${identifier} from identifiers cache (deleteFile)`)
}
const extname = self.extname(filename, true) const extname = self.extname(filename, true)
if (self.imageExts.includes(extname) || self.videoExts.includes(extname)) { if (self.imageExts.includes(extname) || self.videoExts.includes(extname)) {
try { try {
await paths.unlink(path.join(paths.thumbs, `${identifier}.png`)) await paths.unlink(path.join(paths.thumbs, `${identifier}.png`))
@ -662,14 +655,6 @@ self.bulkDeleteFromDb = async (field, values, user) => {
.del() .del()
self.invalidateStatsCache('uploads') self.invalidateStatsCache('uploads')
if (self.idSet) {
unlinked.forEach(file => {
const identifier = file.name.split('.')[0]
self.idSet.delete(identifier)
logger.debug(`Removed ${identifier} from identifiers cache (bulkDeleteFromDb)`)
})
}
unlinked.forEach(file => { unlinked.forEach(file => {
// Push album ids // Push album ids
if (file.albumid && !albumids.includes(file.albumid)) { if (file.albumid && !albumids.includes(file.albumid)) {

View File

@ -76,7 +76,7 @@ if (Array.isArray(config.rateLimiters)) {
} }
} }
} else if (config.rateLimits) { } else if (config.rateLimits) {
logger.error('Config option "rateLimits" is deprecated.') logger.error('Config option "rateLimits" is DEPRECATED.')
logger.error('Please consult the provided sample file for the new option "rateLimiters".') logger.error('Please consult the provided sample file for the new option "rateLimiters".')
} }
@ -303,16 +303,6 @@ safe.use('/api', api)
logger.log(`Connection established with ${utils.scan.version}`) logger.log(`Connection established with ${utils.scan.version}`)
} }
// Cache file identifiers
if (config.uploads.cacheFileIdentifiers) {
utils.idSet = await utils.db.table('files')
.select('name')
.then(rows => {
return new Set(rows.map(row => row.name.split('.')[0]))
})
logger.log(`Cached ${utils.idSet.size} file identifiers`)
}
// Binds Express to port // Binds Express to port
await safe.listen(utils.conf.port) await safe.listen(utils.conf.port)
logger.log(`lolisafe started on port ${utils.conf.port}`) logger.log(`lolisafe started on port ${utils.conf.port}`)