feat: Implement mobile application and lead processing utilities.
This commit is contained in:
28
scripts/extract_pdf_links.py
Normal file
28
scripts/extract_pdf_links.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import pypdf
|
||||
|
||||
pdf_path = 'cologne_duesseldorf_data/duesseldorf_innungen.pdf'
|
||||
|
||||
def extract_links():
|
||||
try:
|
||||
reader = pypdf.PdfReader(pdf_path)
|
||||
links = []
|
||||
for page in reader.pages:
|
||||
if "/Annots" in page:
|
||||
for annot in page["/Annots"]:
|
||||
obj = annot.get_object()
|
||||
if "/A" in obj and "/URI" in obj["/A"]:
|
||||
uri = obj["/A"]["/URI"]
|
||||
links.append(uri)
|
||||
|
||||
print(f"Found {len(links)} links.")
|
||||
for link in links:
|
||||
if "mailto:" in link:
|
||||
print(f"Mailto: {link}")
|
||||
else:
|
||||
print(f"Link: {link}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
extract_links()
|
||||
Reference in New Issue
Block a user