feat: Implement mobile application and lead processing utilities.
This commit is contained in:
15
scripts/dump_duesseldorf_text.py
Normal file
15
scripts/dump_duesseldorf_text.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import pypdf
|
||||
|
||||
pdf_path = 'cologne_duesseldorf_data/duesseldorf_innungen.pdf'
|
||||
|
||||
try:
|
||||
reader = pypdf.PdfReader(pdf_path)
|
||||
text = ""
|
||||
for page in reader.pages:
|
||||
text += page.extract_text() + "\n"
|
||||
|
||||
with open('cologne_duesseldorf_data/duesseldorf_raw.txt', 'w', encoding='utf-8') as f:
|
||||
f.write(text)
|
||||
print(f"Dumped {len(text)} characters to duesseldorf_raw.txt")
|
||||
except Exception as e:
|
||||
print(f"Error: {e}")
|
||||
Reference in New Issue
Block a user