Recently I was developing a functionality where there was a requirement to get the first page of PDF (stored on s3) and convert it to an image. I have dug up the internet for this one but couldn't find anything to the point which will guide me on how to do this for AWS lambda. So here I am sharing my workaround.
Things you need to do before moving onto the code section
- Give s3 permissions to the lambda function.
- Add a Ghostscript layer: https://github.com/shelfio/ghostscript-lambda-layer
Here are the steps to be followed (I will write steps for code only)
1 => Getting a file from S3 and saving it temporarily.
function getFile(bucket, objectname) {
return new Promise((res, rej) => {
var params = { Bucket: bucket, Key: objectname };
s3.getObject(params, function (err, data) {
if (err) {
console.log(err);
res(null);
}
const name = `/tmp/${objectname}`;
fs.writeFile(name, data.Body, function (err) {
if (err) res(null);
res(name);
});
});
});
}
const filepath = await getFile(bucket, key);
2 => Create a helper file for conversion code, name it pdf2Img.js
. This code will convert the tmp pdf file to a jpeg image. The code is inspired from pdf2png which is generating png image.
const exec = require("child_process").exec;
const fs = require("fs");
const tmp = require("tmp");
// ghostscript executables path
let projectPath = __dirname.split("\\");
projectPath.pop();
projectPath = projectPath.join("\\");
exports.ghostscriptPath = projectPath + "\\executables\\ghostScript";
exports.convert = (pdfPath, options) => {
return new Promise((resolve, reject) => {
if (!options.useLocalGS) {
process.env.Path += ";" + exports.ghostscriptPath;
}
options.quality = options.quality || 100;
// get temporary filepath
tmp.file({ postfix: ".jpeg" }, function (err, imageFilepath, fd) {
if (err) {
resolve({
success: false,
error: "Error getting second temporary filepath: " + err,
});
return;
}
exec(
"gs -dQUIET -dPARANOIDSAFER -dBATCH -dNOPAUSE -dNOPROMPT -sDEVICE=jpeg -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r" +
options.quality +
" -dFirstPage=1 -dLastPage=1 -sOutputFile=" +
imageFilepath +
" " +
pdfPath,
(error, stdout, stderr) => {
if (error !== null) {
resolve({
success: false,
error: "Error converting pdf to png: " + error,
});
return;
}
const img = fs.readFileSync(imageFilepath);
resolve({ success: true, data: img });
}
);
});
});
};
To generate a jpeg, use the below command in exec
"gs -dQUIET -dPARANOIDSAFER -dBATCH -dNOPAUSE -dNOPROMPT -sDEVICE=jpeg -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r" +
options.quality +
" -dFirstPage=1 -dLastPage=1 -sOutputFile=" +
imageFilepath +
" " +
pdfPath
To generate png use the below command in exec
"gs -dQUIET -dPARANOIDSAFER -dBATCH -dNOPAUSE -dNOPROMPT -sDEVICE=png16m -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r" +
options.quality +
" -dFirstPage=1 -dLastPage=1 -sOutputFile=" +
imageFilepath +
" " +
pdfPath
More details about Ghostscript options you can find it here https://www.ghostscript.com/doc/current/Use.htm
3 => Use helper function code in index file. Also set ghostscriptPath path to "/opt/bin/gs"
const pdf2Img = require("./pdf2Img");
pdf2Img.ghostscriptPath = "/opt/bin/gs";
Create a function that will execute the conversion code;
async function pdfToImage(pdfPath) {
try {
const response = await pdf2Img.convert(pdfPath, {});
if (!response.success) {
console.log("Error in pdfToImage", response.error);
return response;
}
return {
contentType: "image/jpeg",
data: response.data,
};
} catch (e) {
console.log("Error in pdfToImage", e.message);
}
}
const pdfToImageRes = await pdfToImage(filepath);
4 => Upload the converted image to the bucket.
function uploadFile(bucket, objectname, contentType, data) {
return new Promise((res, rej) => {
var params = {
Bucket: bucket,
Key: `${somePath}/${objectname}`,
Body: data,
ContentType: contentType,
};
s3.putObject(params, function (err, data) {
if (err) {
console.log(err);
res(null);
}
res(true);
});
});
}
const responseUpload = await uploadFile(
bucket,
imageName,
pdfToImageRes.contentType,
pdfToImageRes.data
);
That's it!
Top comments (0)