feat: Implement mobile application and lead processing utilities.
This commit is contained in:
67
recover_cologne_leads.py
Normal file
67
recover_cologne_leads.py
Normal file
@@ -0,0 +1,67 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
|
||||
def normalize(text):
|
||||
if not isinstance(text, str):
|
||||
return ""
|
||||
return text.strip().lower()
|
||||
|
||||
def recover_cologne():
|
||||
leads_csv_path = 'leads/leads.csv'
|
||||
cologne_raw_path = 'leads/raw/innungen_leads_koeln_duesseldorf.csv'
|
||||
|
||||
if not os.path.exists(leads_csv_path) or not os.path.exists(cologne_raw_path):
|
||||
print("Files not found.")
|
||||
return
|
||||
|
||||
# Load existing leads
|
||||
leads_df = pd.read_csv(leads_csv_path)
|
||||
done_emails = set(leads_df['Email'].apply(normalize))
|
||||
done_names = set(leads_df['Firm/Innung'].apply(normalize))
|
||||
|
||||
# Load raw Cologne data
|
||||
cologne_df = pd.read_csv(cologne_raw_path)
|
||||
|
||||
new_rows = []
|
||||
|
||||
print(f"Scanning {len(cologne_df)} raw entries...")
|
||||
|
||||
for _, row in cologne_df.iterrows():
|
||||
name = row.get('organisation', '')
|
||||
region = row.get('region', '')
|
||||
email = row.get('email', '')
|
||||
|
||||
# We only care about Cologne for this recovery
|
||||
if str(region).lower() != 'koeln':
|
||||
continue
|
||||
|
||||
# Must have a valid email
|
||||
if pd.isna(email) or email.strip() == '':
|
||||
continue
|
||||
|
||||
# Check if already in leads.csv
|
||||
if normalize(email) in done_emails or normalize(name) in done_names:
|
||||
continue
|
||||
|
||||
# It's a valid new lead!
|
||||
new_row = {
|
||||
'Firm/Innung': name,
|
||||
'Contact Person': 'N/A', # Raw data might not have person, or we need to check columns
|
||||
'Email': email,
|
||||
'Region': 'Köln'
|
||||
}
|
||||
new_rows.append(new_row)
|
||||
done_emails.add(normalize(email)) # Prevent dupes within batch
|
||||
|
||||
print(f"Found {len(new_rows)} new Cologne leads to add.")
|
||||
|
||||
if new_rows:
|
||||
new_df = pd.DataFrame(new_rows)
|
||||
# Append to CSV
|
||||
new_df.to_csv(leads_csv_path, mode='a', header=False, index=False)
|
||||
print("Successfully appended to leads.csv")
|
||||
else:
|
||||
print("No new leads found.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
recover_cologne()
|
||||
Reference in New Issue
Block a user