Convert first page of PDF to Image| AWS lambda,S3

#node #aws

Recently I was developing a functionality where there was a requirement to get the first page of PDF (stored on s3) and convert it to an image. I have dug up the internet for this one but couldn't find anything to the point which will guide me on how to do this for AWS lambda. So here I am sharing my workaround.

Things you need to do before moving onto the code section

Give s3 permissions to the lambda function.
Add a Ghostscript layer: https://github.com/shelfio/ghostscript-lambda-layer

Here are the steps to be followed (I will write steps for code only)

1 => Getting a file from S3 and saving it temporarily.

function getFile(bucket, objectname) {
  return new Promise((res, rej) => {
    var params = { Bucket: bucket, Key: objectname };
    s3.getObject(params, function (err, data) {
      if (err) {
        console.log(err);
        res(null);
      }
      const name = `/tmp/${objectname}`;
      fs.writeFile(name, data.Body, function (err) {
        if (err) res(null);
        res(name);
      });
    });
  });
}

const filepath = await getFile(bucket, key);

2 => Create a helper file for conversion code, name it pdf2Img.js. This code will convert the tmp pdf file to a jpeg image. The code is inspired from pdf2png which is generating png image.

const exec = require("child_process").exec;
const fs = require("fs");
const tmp = require("tmp");

// ghostscript executables path
let projectPath = __dirname.split("\\");
projectPath.pop();
projectPath = projectPath.join("\\");

exports.ghostscriptPath = projectPath + "\\executables\\ghostScript";

exports.convert = (pdfPath, options) => {
  return new Promise((resolve, reject) => {
    if (!options.useLocalGS) {
      process.env.Path += ";" + exports.ghostscriptPath;
    }
    options.quality = options.quality || 100;
    // get temporary filepath
    tmp.file({ postfix: ".jpeg" }, function (err, imageFilepath, fd) {
      if (err) {
        resolve({
          success: false,
          error: "Error getting second temporary filepath: " + err,
        });
        return;
      }

      exec(
        "gs -dQUIET -dPARANOIDSAFER -dBATCH -dNOPAUSE -dNOPROMPT -sDEVICE=jpeg -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r" +
          options.quality +
          " -dFirstPage=1 -dLastPage=1 -sOutputFile=" +
          imageFilepath +
          " " +
          pdfPath,
        (error, stdout, stderr) => {
          if (error !== null) {
            resolve({
              success: false,
              error: "Error converting pdf to png: " + error,
            });
            return;
          }
          const img = fs.readFileSync(imageFilepath);
          resolve({ success: true, data: img });
        }
      );
    });
  });
};

To generate a jpeg, use the below command in exec

"gs -dQUIET -dPARANOIDSAFER -dBATCH -dNOPAUSE -dNOPROMPT -sDEVICE=jpeg -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r" +
          options.quality +
          " -dFirstPage=1 -dLastPage=1 -sOutputFile=" +
          imageFilepath +
          " " +
          pdfPath

To generate png use the below command in exec

"gs -dQUIET -dPARANOIDSAFER -dBATCH -dNOPAUSE -dNOPROMPT -sDEVICE=png16m -dTextAlphaBits=4 -dGraphicsAlphaBits=4 -r" +
          options.quality +
          " -dFirstPage=1 -dLastPage=1 -sOutputFile=" +
          imageFilepath +
          " " +
          pdfPath

More details about Ghostscript options you can find it here https://www.ghostscript.com/doc/current/Use.htm

3 => Use helper function code in index file. Also set ghostscriptPath path to "/opt/bin/gs"

const pdf2Img = require("./pdf2Img");
pdf2Img.ghostscriptPath = "/opt/bin/gs";

Create a function that will execute the conversion code;

async function pdfToImage(pdfPath) {
  try {
    const response = await pdf2Img.convert(pdfPath, {});
    if (!response.success) {
      console.log("Error in pdfToImage", response.error);
      return response;
    }
    return {
      contentType: "image/jpeg",
      data: response.data,
    };
  } catch (e) {
    console.log("Error in pdfToImage", e.message);
  }
}

const pdfToImageRes = await pdfToImage(filepath);

4 => Upload the converted image to the bucket.

function uploadFile(bucket, objectname, contentType, data) {
  return new Promise((res, rej) => {
    var params = {
      Bucket: bucket,
      Key: `${somePath}/${objectname}`,
      Body: data,
      ContentType: contentType,
    };
    s3.putObject(params, function (err, data) {
      if (err) {
        console.log(err);
        res(null);
      }
      res(true);
    });
  });
}

const responseUpload = await uploadFile(
  bucket,
  imageName,
  pdfToImageRes.contentType,
  pdfToImageRes.data
);

That's it!

DEV Community

Convert first page of PDF to Image| AWS lambda,S3

Things you need to do before moving onto the code section

Here are the steps to be followed (I will write steps for code only)

Top comments (0)

Read next

Release of Node.js 22 [Live] What is new?!

API using Deno and ElyasiaJS

Github ARC Runners sur EKS - Assume roles

Importing existing AWS IAM resources to Terraform