feat: deprecate uploads.cacheFileIdentifiers conf

maintaining it is an unnecessary complexity
it's a feature that doesn't scale too well anyways

also renamed "queryDbForFileCollisions" to
"queryDatabaseForIdentifierMatch"
and updated config description accordingly

this should also now properly free the internal onHold Set
This commit is contained in:
Bobby Wibowo 2022-07-29 09:14:55 +07:00
parent 03eff45e8c
commit fae28f9aa2
No known key found for this signature in database
GPG Key ID: 51C3A1E1E22D26CF
4 changed files with 58 additions and 77 deletions

View File

@ -567,42 +567,21 @@ module.exports = {
},
/*
Cache file identifiers.
The service will query database on every new uploads,
to make sure newly generated random identifier will not match any existing uploads.
They will be used for stricter collision checks, such that a single identifier
may not be used by more than a single file (e.g. if "abcd.jpg" already exists, a new PNG
file may not be named as "abcd.png").
Otherwise, the same identifier may be used by multiple different extensions
(e.g. if "abcd.jpg" already exists, new files can be named as "abcd.png", "abcd.mp4", etc).
If this is enabled, the safe will query files from the database during first launch,
parse their names, then cache the identifiers into memory.
Its downside is that it will use a bit more memory.
If this is disabled, collision check will become less strict.
As in, the same identifier may be used by multiple different extensions (e.g. if "abcd.jpg"
already exists, new files can be possibly be named as "abcd.png", "abcd.mp4", etc).
Its downside will be, in the rare chance that multiple image/video files are sharing the same
identifier, they will end up with the same thumbnail in dashboard, since thumbnails will
In the rare chance that multiple image/video files are sharing the same identifier,
they will end up with the same thumbnail in dashboard, since thumbnails will
only be saved as PNG in storage (e.g. "abcd.jpg" and "abcd.png" will share a single thumbnail
named "abcd.png" in thumbs directory, in which case, the file that's uploaded the earliest will
be the source for the thumbnail).
Unless you do not use thumbnails, it is highly recommended to enable this feature.
*/
cacheFileIdentifiers: false,
/*
An alternative to caching file identifiers.
Basically the service will instead query the database for stricter collision checks.
Right off the bat this has the disadvantage of adding one or more SQL queries on every
new uploads, but it has the advantage of not having to pre-cache anything.
Essentially this reduces the service's startup time and memory usage, but slows down new uploads.
As this is an alternative, you need to disable cacheFileIdentifiers to use this.
You'll have to figure out which method suits your use case best.
*/
queryDbForFileCollisions: true,
queryDatabaseForIdentifierMatch: true,
/*
The length of the randomly generated identifier for albums.

View File

@ -12,8 +12,15 @@ const ServerError = require('./utils/ServerError')
const config = require('./../config')
const logger = require('./../logger')
/** Deprecated config options */
if (config.uploads.cacheFileIdentifiers) {
logger.error('Config option "uploads.cacheFileIdentifiers" is DEPRECATED.')
logger.error('There is now only "uploads.queryDatabaseForIdentifierMatch" for a similar behavior.')
}
const self = {
onHold: new Set(),
onHold: new Set(), // temporarily held random upload identifiers
scanHelpers: {}
}
@ -51,6 +58,9 @@ const enableHashing = config.uploads.hash === undefined
? true
: Boolean(config.uploads.hash)
const queryDatabaseForIdentifierMatch = config.uploads.queryDatabaseForIdentifierMatch ||
config.uploads.queryDbForFileCollisions // old config name for identical behavior
/** Chunks helper class & function **/
class ChunksData {
@ -139,19 +149,16 @@ self.parseFileIdentifierLength = fileLength => {
}
}
self.getUniqueRandomName = async (length, extension) => {
self.getUniqueUploadIdentifier = async (length, extension = '', res) => {
for (let i = 0; i < utils.idMaxTries; i++) {
const identifier = randomstring.generate(length)
const name = identifier + extension
if (config.uploads.cacheFileIdentifiers) {
if (utils.idSet.has(identifier)) {
logger.log(`Identifier ${identifier} is already in use (${i + 1}/${utils.idMaxTries}).`)
if (queryDatabaseForIdentifierMatch) {
// If must query database for identifiers matches
if (self.onHold.has(identifier)) {
logger.log(`Identifier ${identifier} is currently held by another upload (${i + 1}/${utils.idMaxTries}).`)
continue
}
utils.idSet.add(identifier)
logger.debug(`Added ${identifier} to identifiers cache`)
} else if (config.uploads.queryDbForFileCollisions) {
if (self.onHold.has(identifier)) continue
// Put token on-hold (wait for it to be inserted to DB)
self.onHold.add(identifier)
@ -165,8 +172,19 @@ self.getUniqueRandomName = async (length, extension) => {
logger.log(`Identifier ${identifier} is already in use (${i + 1}/${utils.idMaxTries}).`)
continue
}
// Unhold identifier once the Response has been sent
if (res) {
if (!res.locals.identifiers) {
res.locals.identifiers = []
res.once('finish', () => { self.unholdUploadIdentifiers(res) })
}
res.locals.identifiers.push(identifier)
}
} else {
// Otherwise, check for physical files' full name matches
try {
const name = identifier + extension
await paths.access(path.join(paths.uploads, name))
logger.log(`${name} is already in use (${i + 1}/${utils.idMaxTries}).`)
continue
@ -175,12 +193,25 @@ self.getUniqueRandomName = async (length, extension) => {
if (error & error.code !== 'ENOENT') throw error
}
}
return name
// Return the random identifier only
return identifier
}
throw new ServerError('Failed to allocate a unique name for the upload. Try again?')
}
self.unholdUploadIdentifiers = res => {
if (!res.locals.identifiers) return
for (const identifier of res.locals.identifiers) {
self.onHold.delete(identifier)
logger.debug(`Unhold identifier ${identifier}.`)
}
delete res.locals.identifiers
}
self.assertRetentionPeriod = (user, age) => {
if (!utils.retentions.enabled) return null
@ -327,7 +358,8 @@ self.actuallyUpload = async (req, res, user, data = {}) => {
file.path = file.chunksData.path
} else {
const length = self.parseFileIdentifierLength(req.headers.filelength)
file.filename = await self.getUniqueRandomName(length, file.extname)
const identifier = await self.getUniqueUploadIdentifier(length, file.extname, res)
file.filename = identifier + file.extname
file.path = path.join(paths.uploads, file.filename)
}
@ -466,6 +498,7 @@ self.actuallyUpload = async (req, res, user, data = {}) => {
unlinkFiles(req.files)
// If req.multipart() did not error out, but some file field did,
// then Request connection was likely dropped
self.unholdUploadIdentifiers(res)
return
}
@ -580,7 +613,8 @@ self.actuallyUploadUrls = async (req, res, user, data = {}) => {
}
const length = self.parseFileIdentifierLength(req.headers.filelength)
file.filename = await self.getUniqueRandomName(length, file.extname)
const identifier = await self.getUniqueUploadIdentifier(length, file.extname, res)
file.filename = identifier + file.extname
file.path = path.join(paths.uploads, file.filename)
let writeStream
@ -758,7 +792,8 @@ self.actuallyFinishChunks = async (req, res, user, files) => {
// Generate name
const length = self.parseFileIdentifierLength(file.filelength)
const name = await self.getUniqueRandomName(length, file.extname)
const identifier = await self.getUniqueUploadIdentifier(length, file.extname, res)
const name = identifier + file.extname
// Move tmp file to final destination
// For fs.copyFile(), tmpfile will eventually be unlinked by self.cleanUpChunks()
@ -1049,14 +1084,6 @@ self.storeFilesToDb = async (req, res, user, filesData) => {
await utils.db.table('files').insert(files)
utils.invalidateStatsCache('uploads')
if (config.uploads.queryDbForFileCollisions) {
for (const file of files) {
const extname = utils.extname(file.name)
const identifier = file.name.slice(0, -(extname.length))
self.onHold.delete(identifier)
}
}
// Update albums' timestamp
if (authorizedIds.length) {
await utils.db.table('albums')

View File

@ -45,7 +45,6 @@ const self = {
defaultRenderers: {}
},
gitHash: null,
idSet: null,
idMaxTries: config.uploads.maxTries || 1,
@ -592,14 +591,8 @@ self.unlinkFile = async (filename, predb) => {
}
const identifier = filename.split('.')[0]
// Do not remove from identifiers cache on pre-db-deletion
if (!predb && self.idSet) {
self.idSet.delete(identifier)
logger.debug(`Removed ${identifier} from identifiers cache (deleteFile)`)
}
const extname = self.extname(filename, true)
if (self.imageExts.includes(extname) || self.videoExts.includes(extname)) {
try {
await paths.unlink(path.join(paths.thumbs, `${identifier}.png`))
@ -662,14 +655,6 @@ self.bulkDeleteFromDb = async (field, values, user) => {
.del()
self.invalidateStatsCache('uploads')
if (self.idSet) {
unlinked.forEach(file => {
const identifier = file.name.split('.')[0]
self.idSet.delete(identifier)
logger.debug(`Removed ${identifier} from identifiers cache (bulkDeleteFromDb)`)
})
}
unlinked.forEach(file => {
// Push album ids
if (file.albumid && !albumids.includes(file.albumid)) {

View File

@ -76,7 +76,7 @@ if (Array.isArray(config.rateLimiters)) {
}
}
} else if (config.rateLimits) {
logger.error('Config option "rateLimits" is deprecated.')
logger.error('Config option "rateLimits" is DEPRECATED.')
logger.error('Please consult the provided sample file for the new option "rateLimiters".')
}
@ -303,16 +303,6 @@ safe.use('/api', api)
logger.log(`Connection established with ${utils.scan.version}`)
}
// Cache file identifiers
if (config.uploads.cacheFileIdentifiers) {
utils.idSet = await utils.db.table('files')
.select('name')
.then(rows => {
return new Set(rows.map(row => row.name.split('.')[0]))
})
logger.log(`Cached ${utils.idSet.size} file identifiers`)
}
// Binds Express to port
await safe.listen(utils.conf.port)
logger.log(`lolisafe started on port ${utils.conf.port}`)