feat: Implement mobile application and lead processing utilities.

2026-02-19 14:21:51 +01:00
parent fca42db4d2
commit c53a71a5f9
120 changed files with 24080 additions and 851 deletions
--- a/scripts/extract_emails_direct.py
+++ b/scripts/extract_emails_direct.py
@@ -0,0 +1,35 @@
+import pypdf
+import re
+
+pdf_path = 'cologne_duesseldorf_data/duesseldorf_innungen.pdf'
+
+def extract_emails_direct():
+    try:
+        reader = pypdf.PdfReader(pdf_path)
+        print(f"PDF matches {len(reader.pages)} pages.")
+        
+        full_text = ""
+        for i, page in enumerate(reader.pages):
+            text = page.extract_text()
+            full_text += text + "\n"
+            print(f"--- Page {i+1} Text Sample (First 200 chars) ---")
+            print(text[:200])
+            print("------------------------------------------------")
+            
+        emails = re.findall(r'[\w\.-]+@[\w\.-]+\.\w+', full_text)
+        print(f"Total extracted text length: {len(full_text)}")
+        print(f"Found {len(emails)} emails.")
+        
+        for email in emails:
+            print(f"Email: {email}")
+            # Find context
+            idx = full_text.find(email)
+            start = max(0, idx - 50)
+            end = min(len(full_text), idx + 50 + len(email))
+            print(f"Context: {full_text[start:end].replace(chr(10), ' ')}")
+
+    except Exception as e:
+        print(f"Error: {e}")
+
+if __name__ == "__main__":
+    extract_emails_direct()