from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfparser import PDFParser
from io import StringIO
def read_pdf(file_path):
output_string=StringIO()
with open(file_path,'rb') as f:
parser=PDFParser(f)
doc=PDFDocument(parser)
rm=PDFResourceManager()
device= TextConverter(rm,output_string,laparams=LAParams())
interpreter=PDFPageInterpreter(rm,device)
for page in PDFPage.create_pages(doc):
interpreter.process_page(page)
return output_string.getvalue()
|