在Google云端存储上将PDF转换为PNG

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了在Google云端存储上将PDF转换为PNG相关的知识,希望对你有一定的参考价值。

我想从不同的文件中创建一个PNG缩略图,这些文件会上传到Google存储桶中。目前,我的目标是图片和PDF。对于图像,功能工作正常,但对于PDF,我无法使其工作。我们的想法是从存储桶下载文件,完成工作,然后将新文件(PNG缩略图)上传到存储桶。

所以我正在检查上传文件的类型,如果文件是图像,我正在使用createImageFromImage函数进行转换,如果是PDF,我使用的是createImageFromPDF

主功能:

const gm = require('gm').subClass({imageMagick: true});
const fs = require('fs');
const path = require('path');
const {Storage} = require('@google-cloud/storage');
const storage = new Storage();
const im = require('imagemagick');

exports.generatePreviewImage = event => {
  const object = event.data || event; // Node 6: event.data === Node 8+: event

  const file = storage.bucket(object.bucket).file(object.name);
  const filePath = `gs://${object.bucket}/${object.name}`;

  // Ignore already-resized files (to prevent re-invoking this function)
  if (file.name.endsWith('-thumb.png')) {
    console.log(`The image ${file.name} is already resized.`);
    return;
  } else {
    console.log(`Analyzing ${file.name}.`);
    //  Check the file extension
    if(object.contentType.startsWith('image/')) {  //  It's an image
      console.log("This is an image!")
      return createImageFromImage(file);
    } else if (object.contentType === 'application/pdf') {  //  It's a PDF
      console.log("This is a PDF file!")
      return createImageFromPDF(file);
    } else {
      return;
    }
  }
};

createImageFromImage(file) - 有效

function createImageFromImage(file) {
  const tempLocalPath = `/tmp/${path.parse(file.name).base}`;

  // Download file from bucket.
  return file
    .download({destination: tempLocalPath})
    .catch(err => {
      console.error('Failed to download file.', err);
      return Promise.reject(err);
    })
    .then(() => {
      console.log(
        `Image ${file.name} has been downloaded to ${tempLocalPath}.`
      );

      // Resize the image using ImageMagick.
      return new Promise((resolve, reject) => {
        gm(tempLocalPath)
          .resize(250)
          .setFormat('png')
          .write(tempLocalPath, (err, stdout) => {
            if (err) {
              console.error('Failed to resize the image.', err);
              reject(err);
            } else {
              resolve(stdout);
            }
          });
      });
    })
    .then(() => {
      console.log(`Image ${file.name} has been resized.`);

      //  Get the name of the file without the file extension and mark the result as resized, to avoid re-triggering this function.
      const newName = `${path.parse(file.name).name}-thumb.png`;

      // Upload the Blurred image back into the bucket.
      return file.bucket
        .upload(tempLocalPath, {destination: newName})
        .catch(err => {
          console.error('Failed to upload resized image.', err);
          return Promise.reject(err);
        });
    })
    .then(() => {
      console.log(`Resized image has been uploaded to ${file.name}.`);

      // Delete the temporary file.
      return new Promise((resolve, reject) => {
        fs.unlink(tempLocalPath, err => {
          if (err) {
            reject(err);
          } else {
            resolve();
          }
        });
      });
    });
}

createImageFromPDF(file) - 不起作用

function createImageFromPDF(file) {
  const tempLocalPath = `/tmp/${path.parse(file.name).base}`;

  return file
    .download({destination: tempLocalPath}) // Download file from bucket.
    .catch(err => {
      console.error('Failed to download file.', err);
      return Promise.reject(err);
    })
    .then(() => { // Convert the file to PDF.
      console.log(`File ${file.name} has been downloaded to ${tempLocalPath}.`);

      return new Promise((resolve, reject) => {

        im.convert([tempLocalPath, '-resize', '250x250', `${path.parse(file.name).name}-thumb.png`], 
          function(err, stdout) {
            if (err) {
              reject(err);
            } else {
              resolve(stdout);
            }
          });
      });
    })
    .then(() => { //  Upload the new image to the bucket
      console.log(`File ${file.name} has been resized.`);

      //  Get the name of the file without the file extension and mark the result as resized, to avoid re-triggering this function.
      const newName = `${path.parse(file.name).name}-thumb.png`;

      // Upload the Blurred image back into the bucket.
      return file.bucket
        .upload(tempLocalPath, {destination: newName})
        .catch(err => {
          console.error('Failed to upload resized image.', err);
          return Promise.reject(err);
        });
    })
    .then(() => { // Delete the temporary file.
      console.log(`Resized image has been uploaded to ${file.name}.`);

      return new Promise((resolve, reject) => {
        fs.unlink(tempLocalPath, err => {
          if (err) {
            reject(err);
          } else {
            resolve();
          }
        });
      });
    });
}

我从im.convert得到一个错误:Command failed: convert: no images defined 'test1-thumb.png' @ error/convert.c/ConvertImageCommand/3210.我不确定这是否是从PDF文件创建PNG缩略图的正确方法,我尝试了其他解决方案但没有成功。请告诉我我做错了什么。谢谢!

答案

我刚刚意识到gm可以处理ImageMagick,你已经这样做了(使用.subClass({imageMagick: true})),那么为什么要打扰另一个包装器呢?

无论如何,我只是试过这个:

const gm = require('gm').subClass({imageMagick: true});
const file = './test.pdf';
gm(file)
.resize(250, 250)
.setFormat('png')
.write(file, (err) => {
    if (err) console.log('FAILED', err);
    else console.log('SUCCESS');
});

它声明了一些“未授权”错误,因为PDF处理最初被禁用 - 请参阅this - 但在我按照建议编辑/etc/ImageMagick*/policy.xml之后,它运行得很好。

以上是关于在Google云端存储上将PDF转换为PNG的主要内容,如果未能解决你的问题,请参考以下文章

将 PDF 转换为 PNG 时出错 - Python 3.6 和 GhostScript

使用Google应用脚本从PDF到文本转换获取文本

Google Drive API v3-将文档转换为pdf

ImageMagick 无法在 WSL 上将 PDF 转换为图像

Libreoffice 在命令行上将 HTML 转换为 PDF 会产生空白页面

如何在Mac OS X上将PDF转换为Microsoft Word