mirror of
https://github.com/BobbyWibowo/lolisafe.git
synced 2025-01-19 01:31:34 +00:00
BLAZING FAST CHUNKED UPLOADS 🚀
Inspired by our recent switch to using blake3 for file hashing, chunks will now be written to a tmp file directly as they're uploaded. So no more waiting so long for "rebuilding chunks". There will still be some delay on every following attempts of uploading each chunks. I'm not sure the specifics, as we're already reusing the write stream.
This commit is contained in:
parent
14b97ecbf1
commit
b4c8b1d90e
@ -37,32 +37,53 @@ DiskStorage.prototype._handleFile = function _handleFile (req, file, cb) {
|
|||||||
if (err) return cb(err)
|
if (err) return cb(err)
|
||||||
|
|
||||||
const finalPath = path.join(destination, filename)
|
const finalPath = path.join(destination, filename)
|
||||||
const outStream = fs.createWriteStream(finalPath)
|
const onerror = err => {
|
||||||
|
|
||||||
let hash = null
|
|
||||||
if (!file._ischunk) {
|
|
||||||
hash = blake3.createHash()
|
|
||||||
const onerror = function (err) {
|
|
||||||
hash.dispose()
|
hash.dispose()
|
||||||
cb(err)
|
cb(err)
|
||||||
}
|
}
|
||||||
outStream.on('error', onerror)
|
|
||||||
file.stream.on('error', onerror)
|
let outStream
|
||||||
file.stream.on('data', d => hash.update(d))
|
let hash
|
||||||
|
if (file._isChunk) {
|
||||||
|
if (!file._chunksData.stream) {
|
||||||
|
file._chunksData.stream = fs.createWriteStream(finalPath, { flags: 'a' })
|
||||||
|
file._chunksData.stream.on('error', onerror)
|
||||||
|
}
|
||||||
|
if (!file._chunksData.hasher)
|
||||||
|
file._chunksData.hasher = blake3.createHash()
|
||||||
|
|
||||||
|
outStream = file._chunksData.stream
|
||||||
|
hash = file._chunksData.hasher
|
||||||
} else {
|
} else {
|
||||||
outStream.on('error', cb)
|
outStream = fs.createWriteStream(finalPath)
|
||||||
|
outStream.on('error', onerror)
|
||||||
|
hash = blake3.createHash()
|
||||||
}
|
}
|
||||||
|
|
||||||
file.stream.pipe(outStream)
|
file.stream.on('error', onerror)
|
||||||
outStream.on('finish', function () {
|
file.stream.on('data', d => hash.update(d))
|
||||||
|
|
||||||
|
if (file._isChunk) {
|
||||||
|
file.stream.on('end', () => {
|
||||||
|
cb(null, {
|
||||||
|
destination,
|
||||||
|
filename,
|
||||||
|
path: finalPath
|
||||||
|
})
|
||||||
|
})
|
||||||
|
file.stream.pipe(outStream, { end: false })
|
||||||
|
} else {
|
||||||
|
outStream.on('finish', () => {
|
||||||
cb(null, {
|
cb(null, {
|
||||||
destination,
|
destination,
|
||||||
filename,
|
filename,
|
||||||
path: finalPath,
|
path: finalPath,
|
||||||
size: outStream.bytesWritten,
|
size: outStream.bytesWritten,
|
||||||
hash: hash && hash.digest('hex')
|
hash: hash.digest('hex')
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
file.stream.pipe(outStream)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -51,9 +51,15 @@ const initChunks = async uuid => {
|
|||||||
throw err
|
throw err
|
||||||
await paths.mkdir(root)
|
await paths.mkdir(root)
|
||||||
}
|
}
|
||||||
chunksData[uuid] = { root, chunks: [], size: 0 }
|
chunksData[uuid] = {
|
||||||
|
root,
|
||||||
|
filename: 'tmp',
|
||||||
|
chunks: 0,
|
||||||
|
stream: null,
|
||||||
|
hasher: null
|
||||||
}
|
}
|
||||||
return chunksData[uuid].root
|
}
|
||||||
|
return chunksData[uuid]
|
||||||
}
|
}
|
||||||
|
|
||||||
const executeMulter = multer({
|
const executeMulter = multer({
|
||||||
@ -90,11 +96,14 @@ const executeMulter = multer({
|
|||||||
storage: multerStorage({
|
storage: multerStorage({
|
||||||
destination (req, file, cb) {
|
destination (req, file, cb) {
|
||||||
// Is file a chunk!?
|
// Is file a chunk!?
|
||||||
file._ischunk = chunkedUploads && req.body.uuid !== undefined && req.body.chunkindex !== undefined
|
file._isChunk = chunkedUploads && req.body.uuid !== undefined && req.body.chunkindex !== undefined
|
||||||
|
|
||||||
if (file._ischunk)
|
if (file._isChunk)
|
||||||
initChunks(req.body.uuid)
|
initChunks(req.body.uuid)
|
||||||
.then(uuidDir => cb(null, uuidDir))
|
.then(chunksData => {
|
||||||
|
file._chunksData = chunksData
|
||||||
|
cb(null, chunksData.root)
|
||||||
|
})
|
||||||
.catch(error => {
|
.catch(error => {
|
||||||
logger.error(error)
|
logger.error(error)
|
||||||
return cb('Could not process the chunked upload. Try again?')
|
return cb('Could not process the chunked upload. Try again?')
|
||||||
@ -104,12 +113,8 @@ const executeMulter = multer({
|
|||||||
},
|
},
|
||||||
|
|
||||||
filename (req, file, cb) {
|
filename (req, file, cb) {
|
||||||
if (file._ischunk) {
|
if (file._isChunk) {
|
||||||
// index.extension (i.e. 0, 1, ..., n - will prepend zeros depending on the amount of chunks)
|
return cb(null, chunksData[req.body.uuid].filename)
|
||||||
const digits = req.body.totalchunkcount !== undefined ? `${req.body.totalchunkcount - 1}`.length : 1
|
|
||||||
const zeros = new Array(digits + 1).join('0')
|
|
||||||
const name = (zeros + req.body.chunkindex).slice(-digits)
|
|
||||||
return cb(null, name)
|
|
||||||
} else {
|
} else {
|
||||||
const length = self.parseFileIdentifierLength(req.headers.filelength)
|
const length = self.parseFileIdentifierLength(req.headers.filelength)
|
||||||
return self.getUniqueRandomName(length, file.extname)
|
return self.getUniqueRandomName(length, file.extname)
|
||||||
@ -258,8 +263,7 @@ self.actuallyUploadFiles = async (req, res, user, albumid, age) => {
|
|||||||
const uuid = req.body.uuid
|
const uuid = req.body.uuid
|
||||||
if (chunkedUploads && chunksData[uuid] !== undefined) {
|
if (chunkedUploads && chunksData[uuid] !== undefined) {
|
||||||
req.files.forEach(file => {
|
req.files.forEach(file => {
|
||||||
chunksData[uuid].chunks.push(file.filename)
|
chunksData[uuid].chunks++
|
||||||
chunksData[uuid].size += file.size
|
|
||||||
})
|
})
|
||||||
return res.json({ success: true })
|
return res.json({ success: true })
|
||||||
}
|
}
|
||||||
@ -440,7 +444,7 @@ self.finishChunks = async (req, res, next) => {
|
|||||||
self.actuallyFinishChunks = async (req, res, user) => {
|
self.actuallyFinishChunks = async (req, res, user) => {
|
||||||
const check = file => typeof file.uuid !== 'string' ||
|
const check = file => typeof file.uuid !== 'string' ||
|
||||||
!chunksData[file.uuid] ||
|
!chunksData[file.uuid] ||
|
||||||
chunksData[file.uuid].chunks.length < 2
|
chunksData[file.uuid].chunks < 2
|
||||||
|
|
||||||
const files = req.body.files
|
const files = req.body.files
|
||||||
if (!Array.isArray(files) || !files.length || files.some(check))
|
if (!Array.isArray(files) || !files.length || files.some(check))
|
||||||
@ -449,7 +453,10 @@ self.actuallyFinishChunks = async (req, res, user) => {
|
|||||||
const infoMap = []
|
const infoMap = []
|
||||||
try {
|
try {
|
||||||
await Promise.all(files.map(async file => {
|
await Promise.all(files.map(async file => {
|
||||||
if (chunksData[file.uuid].chunks.length > maxChunksCount)
|
// Close stream
|
||||||
|
chunksData[file.uuid].stream.end()
|
||||||
|
|
||||||
|
if (chunksData[file.uuid].chunks > maxChunksCount)
|
||||||
throw 'Too many chunks.'
|
throw 'Too many chunks.'
|
||||||
|
|
||||||
file.extname = typeof file.original === 'string' ? utils.extname(file.original) : ''
|
file.extname = typeof file.original === 'string' ? utils.extname(file.original) : ''
|
||||||
@ -462,28 +469,30 @@ self.actuallyFinishChunks = async (req, res, user) => {
|
|||||||
throw 'Permanent uploads are not permitted.'
|
throw 'Permanent uploads are not permitted.'
|
||||||
}
|
}
|
||||||
|
|
||||||
file.size = chunksData[file.uuid].size
|
file.size = chunksData[file.uuid].stream.bytesWritten
|
||||||
if (config.filterEmptyFile && file.size === 0)
|
if (config.filterEmptyFile && file.size === 0)
|
||||||
throw 'Empty files are not allowed.'
|
throw 'Empty files are not allowed.'
|
||||||
else if (file.size > maxSizeBytes)
|
else if (file.size > maxSizeBytes)
|
||||||
throw `File too large. Chunks are bigger than ${maxSize} MB.`
|
throw `File too large. Chunks are bigger than ${maxSize} MB.`
|
||||||
|
|
||||||
|
// Double-check file size
|
||||||
|
const tmpfile = path.join(chunksData[file.uuid].root, chunksData[file.uuid].filename)
|
||||||
|
const lstat = await paths.lstat(tmpfile)
|
||||||
|
if (lstat.size !== file.size)
|
||||||
|
throw `File size mismatched (${lstat.size} vs. ${file.size}).`
|
||||||
|
|
||||||
// Generate name
|
// Generate name
|
||||||
const length = self.parseFileIdentifierLength(file.filelength)
|
const length = self.parseFileIdentifierLength(file.filelength)
|
||||||
const name = await self.getUniqueRandomName(length, file.extname)
|
const name = await self.getUniqueRandomName(length, file.extname)
|
||||||
|
|
||||||
// Combine chunks
|
// Move tmp file to final destination
|
||||||
const destination = path.join(paths.uploads, name)
|
const destination = path.join(paths.uploads, name)
|
||||||
const hash = await self.combineChunks(destination, file.uuid)
|
await paths.rename(tmpfile, destination)
|
||||||
|
const hash = chunksData[file.uuid].hasher.digest('hex')
|
||||||
|
|
||||||
// Continue even when encountering errors
|
// Continue even when encountering errors
|
||||||
await self.cleanUpChunks(file.uuid).catch(logger.error)
|
await self.cleanUpChunks(file.uuid).catch(logger.error)
|
||||||
|
|
||||||
// Double-check file size
|
|
||||||
const lstat = await paths.lstat(destination)
|
|
||||||
if (lstat.size !== file.size)
|
|
||||||
throw 'Chunks size mismatched.'
|
|
||||||
|
|
||||||
let albumid = parseInt(file.albumid)
|
let albumid = parseInt(file.albumid)
|
||||||
if (isNaN(albumid))
|
if (isNaN(albumid))
|
||||||
albumid = null
|
albumid = null
|
||||||
@ -512,11 +521,17 @@ self.actuallyFinishChunks = async (req, res, user) => {
|
|||||||
const result = await self.storeFilesToDb(req, res, user, infoMap)
|
const result = await self.storeFilesToDb(req, res, user, infoMap)
|
||||||
await self.sendUploadResponse(req, res, result)
|
await self.sendUploadResponse(req, res, result)
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Clean up leftover chunks
|
// Dispose unfinished hasher and clean up leftover chunks
|
||||||
// Should continue even when encountering errors
|
// Should continue even when encountering errors
|
||||||
await Promise.all(files.map(file => {
|
await Promise.all(files.map(file => {
|
||||||
if (chunksData[file.uuid] !== undefined)
|
// eslint-disable-next-line curly
|
||||||
return self.cleanUpChunks(file.uuid).catch(logger.error)
|
if (chunksData[file.uuid] !== undefined) {
|
||||||
|
try {
|
||||||
|
if (chunksData[file.uuid].hasher)
|
||||||
|
chunksData[file.uuid].hasher.dispose()
|
||||||
|
} catch (error) {}
|
||||||
|
self.cleanUpChunks(file.uuid).catch(logger.error)
|
||||||
|
}
|
||||||
}))
|
}))
|
||||||
|
|
||||||
// Re-throw error
|
// Re-throw error
|
||||||
@ -524,50 +539,16 @@ self.actuallyFinishChunks = async (req, res, user) => {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.combineChunks = async (destination, uuid) => {
|
|
||||||
let errorObj
|
|
||||||
const outStream = fs.createWriteStream(destination, { flags: 'a' })
|
|
||||||
const hash = blake3.createHash()
|
|
||||||
|
|
||||||
outStream.on('error', error => {
|
|
||||||
hash.dispose()
|
|
||||||
errorObj = error
|
|
||||||
})
|
|
||||||
|
|
||||||
try {
|
|
||||||
chunksData[uuid].chunks.sort()
|
|
||||||
for (const chunk of chunksData[uuid].chunks)
|
|
||||||
await new Promise((resolve, reject) => {
|
|
||||||
const stream = fs.createReadStream(path.join(chunksData[uuid].root, chunk))
|
|
||||||
stream.pipe(outStream, { end: false })
|
|
||||||
|
|
||||||
stream.on('data', d => hash.update(d))
|
|
||||||
stream.on('error', reject)
|
|
||||||
stream.on('end', () => resolve())
|
|
||||||
})
|
|
||||||
} catch (error) {
|
|
||||||
hash.dispose()
|
|
||||||
errorObj = error
|
|
||||||
}
|
|
||||||
|
|
||||||
// Close stream
|
|
||||||
outStream.end()
|
|
||||||
|
|
||||||
// Re-throw error
|
|
||||||
if (errorObj) throw errorObj
|
|
||||||
|
|
||||||
// Return hash
|
|
||||||
return hash.digest('hex')
|
|
||||||
}
|
|
||||||
|
|
||||||
self.cleanUpChunks = async (uuid) => {
|
self.cleanUpChunks = async (uuid) => {
|
||||||
// Unlink chunks
|
// Remove tmp file
|
||||||
await Promise.all(chunksData[uuid].chunks.map(chunk =>
|
await paths.unlink(path.join(chunksData[uuid].root, chunksData[uuid].filename))
|
||||||
paths.unlink(path.join(chunksData[uuid].root, chunk))
|
.catch(error => {
|
||||||
))
|
if (error.code !== 'ENOENT')
|
||||||
|
logger.error(error)
|
||||||
|
})
|
||||||
// Remove UUID dir
|
// Remove UUID dir
|
||||||
await paths.rmdir(chunksData[uuid].root)
|
await paths.rmdir(chunksData[uuid].root)
|
||||||
// Delete cached date
|
// Delete cached chunks data
|
||||||
delete chunksData[uuid]
|
delete chunksData[uuid]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user