feat: Implement mobile application and lead processing utilities.
This commit is contained in:
36
scripts/debug_pdf.py
Normal file
36
scripts/debug_pdf.py
Normal file
@@ -0,0 +1,36 @@
|
||||
import pypdf
|
||||
|
||||
pdf_path = 'cologne_duesseldorf_data/duesseldorf_innungen.pdf'
|
||||
|
||||
def debug_pdf():
|
||||
try:
|
||||
reader = pypdf.PdfReader(pdf_path)
|
||||
text = ""
|
||||
for page in reader.pages:
|
||||
text += page.extract_text() + "\n"
|
||||
|
||||
# Search for known name
|
||||
target = "Jens Schulz"
|
||||
idx = text.find(target)
|
||||
if idx != -1:
|
||||
print(f"Found '{target}' at index {idx}")
|
||||
context = text[max(0, idx-200):min(len(text), idx+500)]
|
||||
print("--- CONTEXT AROUND JENS SCHULZ ---")
|
||||
print(context)
|
||||
print("--- END CONTEXT ---")
|
||||
else:
|
||||
print(f"'{target}' not found!")
|
||||
|
||||
# Search for @
|
||||
at_indices = [i for i, c in enumerate(text) if c == '@']
|
||||
print(f"Found {len(at_indices)} '@' symbols.")
|
||||
if at_indices:
|
||||
first_at = at_indices[0]
|
||||
print(f"Context around first '@':")
|
||||
print(text[max(0, first_at-50):min(len(text), first_at+50)])
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
debug_pdf()
|
||||
Reference in New Issue
Block a user