feat: Implement mobile application and lead processing utilities.

This commit is contained in:
2026-02-19 14:21:51 +01:00
parent fca42db4d2
commit c53a71a5f9
120 changed files with 24080 additions and 851 deletions

View File

@@ -0,0 +1,35 @@
import pypdf
import re
pdf_path = 'cologne_duesseldorf_data/duesseldorf_innungen.pdf'
def extract_emails_direct():
try:
reader = pypdf.PdfReader(pdf_path)
print(f"PDF matches {len(reader.pages)} pages.")
full_text = ""
for i, page in enumerate(reader.pages):
text = page.extract_text()
full_text += text + "\n"
print(f"--- Page {i+1} Text Sample (First 200 chars) ---")
print(text[:200])
print("------------------------------------------------")
emails = re.findall(r'[\w\.-]+@[\w\.-]+\.\w+', full_text)
print(f"Total extracted text length: {len(full_text)}")
print(f"Found {len(emails)} emails.")
for email in emails:
print(f"Email: {email}")
# Find context
idx = full_text.find(email)
start = max(0, idx - 50)
end = min(len(full_text), idx + 50 + len(email))
print(f"Context: {full_text[start:end].replace(chr(10), ' ')}")
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
extract_emails_direct()