S3 is a powerful object storage service that is offered by Amazon Web Services (AWS). It is popularly used for storing data, and specifically it can also be used as an effective way to store large files. Node.js makes it possible to upload large files to S3 with ease, and in this post, we’ll take a look at how you can do it.
Uploading large files to Amazon S3 is a common use case that many developers need to solve. If you’re using Node.js, the AWS SDK offers an easy way to upload files to S3 using streams. Streams enable you to read and write large amounts of data without having to store it all in memory.
Sometimes you need to upload a big file, let’s say larger than 100MB. Streaming from disk must be the approach to avoid loading the entire file into memory.
To get started, you’ll first need to install the AWS SDK and configure credentials. Once that’s done, you can use the createReadStream function to read a file into a stream and the S3 createMultipartUpload function
AWS API provides methods to upload a big file in parts (chunks).
The main steps are:
- Let the API know that we are going to upload a file in chunks
- Stream the file from disk and upload each chunk
- Let the API know all the chunks were uploaded
/**
*
* @param {string} fileName the name in S3
* @param {string} filePath the absolute path to our local file
* @return the final file name in S3
*/
async function uploadToS3(fileName, filePath) {
if (!fileName) {
throw new Error('the fileName is empty');
}
if (!filePath) {
throw new Error('the file absolute path is empty');
}
const fileNameInS3 = `/some/sub/folder/${fileName}`; // the relative path inside the bucket
console.info(`file name: ${fileNameInS3} file path: ${filePath}`);
if (!fs.existsSync(filePath)) {
throw new Error(`file does not exist: ${filePath}`);
}
const bucket = 'my-bucket';
const s3 = new AWS.S3();
const statsFile = fs.statSync(filePath);
console.info(`file size: ${Math.round(statsFile.size / 1024 / 1024)}MB`);
// Each part must be at least 5 MB in size, except the last part.
let uploadId;
try {
const params = {
Bucket: bucket,
Key: fileNameInS3,
};
const result = await s3.createMultipartUpload(params).promise();
uploadId = result.UploadId;
console.info(`csv ${fileNameInS3} multipart created with upload id: ${uploadId}`);
} catch (e) {
throw new Error(`Error creating S3 multipart. ${e.message}`);
}
const chunkSize = 10 * 1024 * 1024; // 10MB
const readStream = fs.createReadStream(filePath); // you can use a second parameter here with this option to read with a bigger chunk size than 64 KB: { highWaterMark: chunkSize }
// read the file to upload using streams and upload part by part to S3
const uploadPartsPromise = new Promise((resolve, reject) => {
const multipartMap = { Parts: [] };
let partNumber = 1;
let chunkAccumulator = null;
readStream.on('error', (err) => {
reject(err);
});
readStream.on('data', (chunk) => {
// it reads in chunks of 64KB. We accumulate them up to 10MB and then we send to S3
if (chunkAccumulator === null) {
chunkAccumulator = chunk;
} else {
chunkAccumulator = Buffer.concat([chunkAccumulator, chunk]);
}
if (chunkAccumulator.length > chunkSize) {
// pause the stream to upload this chunk to S3
readStream.pause();
const chunkMB = chunkAccumulator.length / 1024 / 1024;
const params = {
Bucket: bucket,
Key: fileNameInS3,
PartNumber: partNumber,
UploadId: uploadId,
Body: chunkAccumulator,
ContentLength: chunkAccumulator.length,
};
s3.uploadPart(params).promise()
.then((result) => {
console.info(`Data uploaded. Entity tag: ${result.ETag} Part: ${params.PartNumber} Size: ${chunkMB}`);
multipartMap.Parts.push({ ETag: result.ETag, PartNumber: params.PartNumber });
partNumber++;
chunkAccumulator = null;
// resume to read the next chunk
readStream.resume();
}).catch((err) => {
console.error(`error uploading the chunk to S3 ${err.message}`);
reject(err);
});
}
});
readStream.on('end', () => {
console.info('End of the stream');
});
readStream.on('close', () => {
console.info('Close stream');
if (chunkAccumulator) {
const chunkMB = chunkAccumulator.length / 1024 / 1024;
// upload the last chunk
const params = {
Bucket: bucket,
Key: fileNameInS3,
PartNumber: partNumber,
UploadId: uploadId,
Body: chunkAccumulator,
ContentLength: chunkAccumulator.length,
};
s3.uploadPart(params).promise()
.then((result) => {
console.info(`Last Data uploaded. Entity tag: ${result.ETag} Part: ${params.PartNumber} Size: ${chunkMB}`);
multipartMap.Parts.push({ ETag: result.ETag, PartNumber: params.PartNumber });
chunkAccumulator = null;
resolve(multipartMap);
}).catch((err) => {
console.error(`error uploading the last csv chunk to S3 ${err.message}`);
reject(err);
});
}
});
});
const multipartMap = await uploadPartsPromise;
console.info(`All parts have been upload. Let's complete the multipart upload. Parts: ${multipartMap.Parts.length} `);
// gather all parts' tags and complete the upload
try {
const params = {
Bucket: bucket,
Key: fileNameInS3,
MultipartUpload: multipartMap,
UploadId: uploadId,
};
const result = await s3.completeMultipartUpload(params).promise();
console.info(`Upload multipart completed. Location: ${result.Location} Entity tag: ${result.ETag}`);
} catch (e) {
throw new Error(`Error completing S3 multipart. ${e.message}`);
}
return fileNameInS3;
}
One response to “Upload large files to S3 using Node.js”
gracias