DEV Community

drake
drake

Posted on

调用OpenAI (chatGPT) API 翻译PDF文档

from openai import OpenAI
from PyPDF2 import PdfReader, PdfWriter

# openai API的使用方式:https://platform.openai.com/docs/quickstart?language-preference=python
client = OpenAI()

class Translate:
    def __init__(self):
        self.file_path = "origin.pdf"
        self.output_pdf = "output_translated1.pdf"

    def extract_text_from_pdf_translate(self, interupt=None):
        """
        抽取每一页的PDF提交给chatgpt翻译
        interupt:上一次翻译异常导致退出的页码,None表示没有任何异常导致中途退出
        """
        reader = PdfReader(self.file_path)
        text = ""
        num = 0
        for page in reader.pages:
            num += 1
            # 跳过前面已经翻译过的页面,从上一次翻译异常的页面重新开始
            if interupt:
                if num < interupt:
                    continue
            print(f'开始处理第 {num}')
            page_text = page.extract_text()
            chinese = self.translate(page_text)
            if chinese:
                text += f'{chinese}\n'
            else:
                print(f'{num} 页失败')
                break

        print(f'全部翻译完成\n\n{text}')
        return text

    def translate(self, text_origin):
        try:
            response = client.chat.completions.create(
                # model="gpt-4",
                model="gpt-4o-mini",
                messages=[
                    {"role": "system", "content": "You are a translation assistant."},
                    {"role": "user", "content": f"将该文本翻译成中文: {text_origin}"}
                ]
            )
            return response.choices[0].message.content
        except:
            return

    def save_to_pdf(self, text):
        """
        保存翻译后的内容为 PDF
        """
        writer = PdfWriter()
        writer.add_blank_page(width=595, height=842)  # A4 size in points
        with open(self.output_pdf, "wb") as file:
            file.write(text.encode("utf-8"))

    def run(self):
        text = self.extract_text_from_pdf_translate()
        self.save_to_pdf(text)

if __name__ == '__main__':
    Translate().run()
Enter fullscreen mode Exit fullscreen mode

Top comments (0)