feat: Implement mobile application and lead processing utilities.
This commit is contained in:
105
leads/identify_missing_leads.py
Normal file
105
leads/identify_missing_leads.py
Normal file
@@ -0,0 +1,105 @@
|
||||
import pandas as pd
|
||||
import json
|
||||
import os
|
||||
|
||||
def normalize(text):
|
||||
if not isinstance(text, str):
|
||||
return ""
|
||||
return text.strip().lower()
|
||||
|
||||
def main():
|
||||
# 1. Load the "Done" list
|
||||
leads_csv_path = 'leads/leads.csv'
|
||||
if os.path.exists(leads_csv_path):
|
||||
leads_df = pd.read_csv(leads_csv_path)
|
||||
done_names = set(leads_df['Firm/Innung'].apply(normalize))
|
||||
done_emails = set(leads_df['Email'].apply(normalize))
|
||||
else:
|
||||
done_names = set()
|
||||
done_emails = set()
|
||||
|
||||
missing_duesseldorf = []
|
||||
missing_cologne = []
|
||||
missing_unterfranken = []
|
||||
|
||||
# 2. Check Düsseldorf Targets
|
||||
duesseldorf_path = 'leads/cologne_duesseldorf_data/duesseldorf_targets.json'
|
||||
if os.path.exists(duesseldorf_path):
|
||||
with open(duesseldorf_path, 'r', encoding='utf-8') as f:
|
||||
targets = json.load(f)
|
||||
seen_d = set()
|
||||
for t in targets:
|
||||
name = t.get('innung', '')
|
||||
if normalize(name) not in done_names and normalize(name) not in seen_d:
|
||||
missing_duesseldorf.append(t)
|
||||
seen_d.add(normalize(name))
|
||||
|
||||
# 3. Check Cologne/Düsseldorf Raw CSV
|
||||
cologne_raw_path = 'leads/raw/innungen_leads_koeln_duesseldorf.csv'
|
||||
if os.path.exists(cologne_raw_path):
|
||||
cologne_df = pd.read_csv(cologne_raw_path)
|
||||
seen_c = set()
|
||||
for _, row in cologne_df.iterrows():
|
||||
name = row.get('organisation', '')
|
||||
region = row.get('region', '')
|
||||
email = row.get('email', '')
|
||||
|
||||
if str(region).lower() == 'koeln':
|
||||
if normalize(name) in seen_c:
|
||||
continue
|
||||
seen_c.add(normalize(name))
|
||||
|
||||
if pd.isna(email) or email.strip() == '':
|
||||
missing_cologne.append({'name': name, 'reason': 'No Email'})
|
||||
else:
|
||||
if normalize(email) not in done_emails and normalize(name) not in done_names:
|
||||
missing_cologne.append({'name': name, 'email': email, 'reason': 'Not in final list'})
|
||||
|
||||
# 4. Check Unterfranken Raw CSV
|
||||
unterfranken_raw_path = 'leads/raw/leads_unterfranken.csv'
|
||||
if os.path.exists(unterfranken_raw_path):
|
||||
uf_df = pd.read_csv(unterfranken_raw_path)
|
||||
name_col = None
|
||||
for col in uf_df.columns:
|
||||
if 'innung' in col.lower() or 'firm' in col.lower() or 'name' in col.lower():
|
||||
name_col = col
|
||||
break
|
||||
|
||||
seen_u = set()
|
||||
if name_col:
|
||||
for _, row in uf_df.iterrows():
|
||||
name = str(row[name_col]).strip()
|
||||
# Filter garbage
|
||||
if len(name) < 5: continue
|
||||
if "regierungsbezirk" in name.lower() and "sitz" in name.lower(): continue # Garbage header line
|
||||
|
||||
if normalize(name) not in done_names and normalize(name) not in seen_u:
|
||||
missing_unterfranken.append(name)
|
||||
seen_u.add(normalize(name))
|
||||
|
||||
# 5. Generate Markdown
|
||||
with open('missing_leads.md', 'w', encoding='utf-8') as f:
|
||||
f.write('# Missing Leads Report\n\n')
|
||||
|
||||
f.write(f'## Düsseldorf (Missing: {len(missing_duesseldorf)})\n')
|
||||
if not missing_duesseldorf:
|
||||
f.write("No missing leads identified (or source file empty).\n")
|
||||
for item in missing_duesseldorf:
|
||||
f.write(f"- {item.get('innung')} (Contact: {item.get('person', 'N/A')})\n")
|
||||
|
||||
f.write(f'\n## Cologne (Missing: {len(missing_cologne)})\n')
|
||||
if not missing_cologne:
|
||||
f.write("No missing leads identified from raw source.\n")
|
||||
for item in missing_cologne:
|
||||
reason = item.get('reason', '')
|
||||
email_part = f" (Email: {item['email']})" if 'email' in item else ""
|
||||
f.write(f"- {item.get('name')}{email_part} [{reason}]\n")
|
||||
|
||||
f.write(f'\n## Unterfranken (Missing: {len(missing_unterfranken)})\n')
|
||||
if not missing_unterfranken:
|
||||
f.write("All raw Unterfranken leads seem to be in the final list.\n")
|
||||
for name in missing_unterfranken:
|
||||
f.write(f"- {name}\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user