feat: Implement mobile application and lead processing utilities.
This commit is contained in:
40
scripts/prepare_batch6_v2.py
Normal file
40
scripts/prepare_batch6_v2.py
Normal file
@@ -0,0 +1,40 @@
|
||||
import json
|
||||
|
||||
processed_proximates = [
|
||||
"Baugewerbe", "Dachdecker", "Elektro", "Sanitär", "Stahl", "Tischler", "Maler", "Kraftfahrzeug", "Friseur", "Fleischer",
|
||||
"Zimmerer", "Glaser", "Rollladen", "Gebäudereiniger", "Augenoptiker", "Bäcker", "Konditoren", "Schornsteinfeger", "Steinmetz", "Straßenbauer",
|
||||
"Stukkateur", "Boots", "Gold", "Informationstechnik", "Kachel", "Karosserie", "Schneider", "Instrumenten", "Orthopädie", "Parkett", "Sattler", "Werbe", "Zahn"
|
||||
]
|
||||
|
||||
def is_processed(name):
|
||||
for p in processed_proximates:
|
||||
# Check for word boundary or similar to avoid false positives if possible, but simple substring is mostly fine
|
||||
# "Sanitär" matches "Innung Sanitär-Heizung..."
|
||||
if p in name:
|
||||
return True
|
||||
return False
|
||||
|
||||
def prepare_batch6():
|
||||
with open('cologne_duesseldorf_data/duesseldorf_targets.json', 'r', encoding='utf-8') as f:
|
||||
targets = json.load(f)
|
||||
|
||||
new_targets = []
|
||||
skipped_count = 0
|
||||
for t in targets:
|
||||
if not is_processed(t['innung']):
|
||||
new_targets.append(t)
|
||||
else:
|
||||
skipped_count += 1
|
||||
|
||||
print(f"Skipped {skipped_count} processed targets.")
|
||||
print(f"Found {len(new_targets)} unprocessed targets.")
|
||||
|
||||
batch6 = new_targets[:30]
|
||||
with open('cologne_duesseldorf_data/batch6_targets.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(batch6, f, indent=2)
|
||||
|
||||
for i, t in enumerate(batch6):
|
||||
print(f"Target {i+1}: {t['innung']}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
prepare_batch6()
|
||||
Reference in New Issue
Block a user