feat: Implement mobile application and lead processing utilities.
This commit is contained in:
22
scripts/preview_duesseldorf_pdf.py
Normal file
22
scripts/preview_duesseldorf_pdf.py
Normal file
@@ -0,0 +1,22 @@
|
||||
import pypdf
|
||||
import re
|
||||
|
||||
pdf_path = 'cologne_duesseldorf_data/duesseldorf_innungen.pdf'
|
||||
|
||||
try:
|
||||
reader = pypdf.PdfReader(pdf_path)
|
||||
text = ""
|
||||
for page in reader.pages:
|
||||
text += page.extract_text() + "\n"
|
||||
|
||||
print(f"Extracted {len(text)} characters.")
|
||||
print("--- PREVIEW ---")
|
||||
print(text[:1000])
|
||||
print("--- END PREVIEW ---")
|
||||
|
||||
# Simple regex check for emails
|
||||
emails = re.findall(r'[\w\.-]+@[\w\.-]+', text)
|
||||
print(f"Found {len(emails)} potential email addresses.")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error reading PDF: {e}")
|
||||
Reference in New Issue
Block a user