feat: allow to disable file hashing completely

This commit is contained in:
Bobby Wibowo 2022-07-25 07:32:25 +07:00
parent 6ba30a23c6
commit 5bab3a495e
No known key found for this signature in database
GPG Key ID: 51C3A1E1E22D26CF
2 changed files with 74 additions and 56 deletions

View File

@ -467,6 +467,13 @@ module.exports = {
*/ */
temporaryUploadsInterval: 1 * 60000, // 1 minute temporaryUploadsInterval: 1 * 60000, // 1 minute
/*
Hash files on upload.
If enabled, the service will also attempt to detect duplicates by searching for uploads
with the exact same hash and size in the database.
*/
hash: true,
/* /*
Scan uploads for threats with ClamAV. Scan uploads for threats with ClamAV.

View File

@ -46,6 +46,11 @@ const extensionsFilter = Array.isArray(config.extensionsFilter) &&
const urlExtensionsFilter = Array.isArray(config.uploads.urlExtensionsFilter) && const urlExtensionsFilter = Array.isArray(config.uploads.urlExtensionsFilter) &&
config.uploads.urlExtensionsFilter.length config.uploads.urlExtensionsFilter.length
// Only disable hashing if explicitly disabled in config file
const enableHashing = config.uploads.hash === undefined
? true
: Boolean(config.uploads.hash)
/** Chunks helper class & function **/ /** Chunks helper class & function **/
class ChunksData { class ChunksData {
@ -93,7 +98,7 @@ const initChunks = async uuid => {
// Init write & hasher streams // Init write & hasher streams
chunksData[uuid].writeStream = fs.createWriteStream(chunksData[uuid].path, { flags: 'a' }) chunksData[uuid].writeStream = fs.createWriteStream(chunksData[uuid].path, { flags: 'a' })
chunksData[uuid].hashStream = blake3.createHash() chunksData[uuid].hashStream = enableHashing && blake3.createHash()
} else if (chunksData[uuid].processing) { } else if (chunksData[uuid].processing) {
// Wait for the first spawned init tasks // Wait for the first spawned init tasks
throw new ClientError('Previous chunk upload is still being processed. Parallel chunked uploads is not supported.') throw new ClientError('Previous chunk upload is still being processed. Parallel chunked uploads is not supported.')
@ -332,9 +337,7 @@ self.actuallyUpload = async (req, res, user, data = {}) => {
// Helper function to remove event listeners from multiple emitters // Helper function to remove event listeners from multiple emitters
const _unlisten = (emitters = [], event, listener) => { const _unlisten = (emitters = [], event, listener) => {
for (const emitter of emitters) { for (const emitter of emitters) {
if (emitter !== undefined) { if (emitter) emitter.off(event, listener)
emitter.off(event, listener)
}
} }
} }
@ -381,7 +384,7 @@ self.actuallyUpload = async (req, res, user, data = {}) => {
hashStream = file.chunksData.hashStream hashStream = file.chunksData.hashStream
} else { } else {
writeStream = fs.createWriteStream(file.path) writeStream = fs.createWriteStream(file.path)
hashStream = blake3.createHash() hashStream = enableHashing && blake3.createHash()
if (utils.scan.passthrough && if (utils.scan.passthrough &&
!self.scanHelpers.assertUserBypass(req._user, file.filename) && !self.scanHelpers.assertUserBypass(req._user, file.filename) &&
@ -392,11 +395,11 @@ self.actuallyUpload = async (req, res, user, data = {}) => {
// Re-init stream errors listeners for this Request // Re-init stream errors listeners for this Request
writeStream.once('error', _reject) writeStream.once('error', _reject)
hashStream.once('error', _reject)
readStream.once('error', _reject) readStream.once('error', _reject)
// Pass data into hashStream if required // Pass data into hashStream if required
if (hashStream) { if (hashStream) {
hashStream.once('error', _reject)
readStream.on('data', data => { readStream.on('data', data => {
// .dispose() will destroy this internal component, // .dispose() will destroy this internal component,
// so use it as an indicator of whether the hashStream has been .dispose()'d // so use it as an indicator of whether the hashStream has been .dispose()'d
@ -417,7 +420,9 @@ self.actuallyUpload = async (req, res, user, data = {}) => {
// both writeStream and scanStream finish // both writeStream and scanStream finish
writeStream.once('finish', () => _resolve({ writeStream.once('finish', () => _resolve({
size: writeStream.bytesWritten, size: writeStream.bytesWritten,
hash: hashStream.hash.hash ? hashStream.digest('hex') : null hash: hashStream && hashStream.hash.hash
? hashStream.digest('hex')
: null
}, scanStream ? 1 : 2)) }, scanStream ? 1 : 2))
if (scanStream) { if (scanStream) {
@ -582,21 +587,25 @@ self.actuallyUploadUrls = async (req, res, user, data = {}) => {
let hashStream let hashStream
return Promise.resolve().then(async () => { return Promise.resolve().then(async () => {
writeStream = fs.createWriteStream(file.path) writeStream = fs.createWriteStream(file.path)
hashStream = blake3.createHash() hashStream = enableHashing && blake3.createHash()
// Limit max response body size with maximum allowed size // Limit max response body size with maximum allowed size
const fetchFile = await fetch(url, { method: 'GET', size: urlMaxSizeBytes }) const fetchFile = await fetch(url, { method: 'GET', size: urlMaxSizeBytes })
.then(res => new Promise((resolve, reject) => { .then(res => new Promise((resolve, reject) => {
if (res.status === 200) { if (res.status !== 200) {
writeStream.on('error', reject) return resolve(res)
res.body.on('error', reject)
res.body.on('data', d => hashStream.update(d))
res.body.pipe(writeStream)
writeStream.on('finish', () => resolve(res))
} else {
resolve(res)
} }
writeStream.once('error', reject)
res.body.once('error', reject)
if (hashStream) {
hashStream.once('error', reject)
res.body.on('data', d => hashStream.update(d))
}
res.body.pipe(writeStream)
writeStream.once('finish', () => resolve(res))
})) }))
if (fetchFile.status !== 200) { if (fetchFile.status !== 200) {
@ -610,17 +619,17 @@ self.actuallyUploadUrls = async (req, res, user, data = {}) => {
const contentType = fetchFile.headers.get('content-type') const contentType = fetchFile.headers.get('content-type')
file.mimetype = contentType ? contentType.split(';')[0] : 'application/octet-stream' file.mimetype = contentType ? contentType.split(';')[0] : 'application/octet-stream'
file.size = writeStream.bytesWritten file.size = writeStream.bytesWritten
file.hash = hashStream.digest('hex') file.hash = hashStream
? hashStream.digest('hex')
: null
}).catch(err => { }).catch(err => {
// Dispose of unfinished write & hasher streams // Dispose of unfinished write & hasher streams
if (writeStream && !writeStream.destroyed) { if (writeStream && !writeStream.destroyed) {
writeStream.destroy() writeStream.destroy()
} }
try { if (hashStream && hashStream.hash.hash) {
if (hashStream) { hashStream.dispose()
hashStream.dispose() }
}
} catch (_) {}
// Re-throw errors // Re-throw errors
throw err throw err
@ -710,7 +719,9 @@ self.actuallyFinishChunks = async (req, res, user, files) => {
// Conclude write and hasher streams // Conclude write and hasher streams
chunksData[file.uuid].writeStream.end() chunksData[file.uuid].writeStream.end()
const bytesWritten = chunksData[file.uuid].writeStream.bytesWritten const bytesWritten = chunksData[file.uuid].writeStream.bytesWritten
const hash = chunksData[file.uuid].hashStream.digest('hex') const hash = chunksData[file.uuid].hashStream
? chunksData[file.uuid].hashStream.digest('hex')
: null
if (chunksData[file.uuid].chunks < 2 || chunksData[file.uuid].chunks > maxChunksCount) { if (chunksData[file.uuid].chunks < 2 || chunksData[file.uuid].chunks > maxChunksCount) {
throw new ClientError('Invalid chunks count.') throw new ClientError('Invalid chunks count.')
@ -792,11 +803,9 @@ self.cleanUpChunks = async uuid => {
if (chunksData[uuid].writeStream && !chunksData[uuid].writeStream.destroyed) { if (chunksData[uuid].writeStream && !chunksData[uuid].writeStream.destroyed) {
chunksData[uuid].writeStream.destroy() chunksData[uuid].writeStream.destroy()
} }
try { if (chunksData[uuid].hashStream && chunksData[uuid].hashStream.hash.hash) {
if (chunksData[uuid].hashStream) { chunksData[uuid].hashStream.dispose()
chunksData[uuid].hashStream.dispose() }
}
} catch (_) {}
// Remove tmp file // Remove tmp file
await paths.unlink(path.join(chunksData[uuid].root, chunksData[uuid].filename)) await paths.unlink(path.join(chunksData[uuid].root, chunksData[uuid].filename))
@ -952,36 +961,38 @@ self.storeFilesToDb = async (req, res, user, filesData) => {
const albumids = [] const albumids = []
await Promise.all(filesData.map(async file => { await Promise.all(filesData.map(async file => {
// Check if the file exists by checking its hash and size if (enableHashing) {
const dbFile = await utils.db.table('files') // Check if the file exists by checking its hash and size
.where(function () { const dbFile = await utils.db.table('files')
if (user === undefined) { .where(function () {
this.whereNull('userid') if (user === undefined) {
} else { this.whereNull('userid')
this.where('userid', user.id) } else {
this.where('userid', user.id)
}
})
.where({
hash: file.hash,
size: String(file.size)
})
// Select expirydate to display expiration date of existing files as well
.select('name', 'expirydate')
.first()
if (dbFile) {
// Continue even when encountering errors
await utils.unlinkFile(file.filename).catch(logger.error)
logger.debug(`Unlinked ${file.filename} since a duplicate named ${dbFile.name} exists`)
// If on /nojs route, append original name reported by client,
// instead of the actual original name from database
if (req.path === '/nojs') {
dbFile.original = file.originalname
} }
})
.where({
hash: file.hash,
size: String(file.size)
})
// Select expirydate to display expiration date of existing files as well
.select('name', 'expirydate')
.first()
if (dbFile) { exists.push(dbFile)
// Continue even when encountering errors return
await utils.unlinkFile(file.filename).catch(logger.error)
logger.debug(`Unlinked ${file.filename} since a duplicate named ${dbFile.name} exists`)
// If on /nojs route, append original name reported by client,
// instead of the actual original name from database
if (req.path === '/nojs') {
dbFile.original = file.originalname
} }
exists.push(dbFile)
return
} }
const timestamp = Math.floor(Date.now() / 1000) const timestamp = Math.floor(Date.now() / 1000)