from openai import OpenAI
from PyPDF2 import PdfReader, PdfWriter
# openai API的使用方式:https://platform.openai.com/docs/quickstart?language-preference=python
client = OpenAI()
class Translate:
def __init__(self):
self.file_path = "origin.pdf"
self.output_pdf = "output_translated1.pdf"
def extract_text_from_pdf_translate(self, interupt=None):
"""
抽取每一页的PDF提交给chatgpt翻译
interupt:上一次翻译异常导致退出的页码,None表示没有任何异常导致中途退出
"""
reader = PdfReader(self.file_path)
text = ""
num = 0
for page in reader.pages:
num += 1
# 跳过前面已经翻译过的页面,从上一次翻译异常的页面重新开始
if interupt:
if num < interupt:
continue
print(f'开始处理第 {num} 页')
page_text = page.extract_text()
chinese = self.translate(page_text)
if chinese:
text += f'{chinese}\n'
else:
print(f'第 {num} 页失败')
break
print(f'全部翻译完成\n\n{text}')
return text
def translate(self, text_origin):
try:
response = client.chat.completions.create(
# model="gpt-4",
model="gpt-4o-mini",
messages=[
{"role": "system", "content": "You are a translation assistant."},
{"role": "user", "content": f"将该文本翻译成中文: {text_origin}"}
]
)
return response.choices[0].message.content
except:
return
def save_to_pdf(self, text):
"""
保存翻译后的内容为 PDF
"""
writer = PdfWriter()
writer.add_blank_page(width=595, height=842) # A4 size in points
with open(self.output_pdf, "wb") as file:
file.write(text.encode("utf-8"))
def run(self):
text = self.extract_text_from_pdf_translate()
self.save_to_pdf(text)
if __name__ == '__main__':
Translate().run()
For further actions, you may consider blocking this person and/or reporting abuse
Top comments (0)