# %%
# %pip install pymupdf
# %pip install frontend
# %pip install tools
# %%
import pymupdf # PyMuPDF
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer
# %%
def extract_text_from_pdf(pdf_path):
doc = pymupdf.open(pdf_path)
text = ""
for page_num in range(len(doc)):
page = doc.load_page(page_num)
text += page.get_text()
return (text, len(doc))
# %%
def summarize_text(text, num_sentences=10):
parser = PlaintextParser.from_string(text, Tokenizer("french"))
summarizer = LsaSummarizer()
summary = summarizer(parser.document, num_sentences)
return summary
# %%
pdf_path = "sy.pdf"
# %%
(text, l) = extract_text_from_pdf(pdf_path)
print(l)
summary = summarize_text(text, num_sentences=30)
# %%
print(summary)
# %%
for sentence in summary[1:]:
print(sentence)
For further actions, you may consider blocking this person and/or reporting abuse
Top comments (0)