update acc. to bounces

This commit is contained in:
2025-12-19 17:12:40 -06:00
parent 6df8674b72
commit 1d1a384d1b
7 changed files with 838 additions and 177 deletions

188
worker.py
View File

@@ -51,145 +51,99 @@ def get_bucket_name(domain):
"""Konvention: domain.tld -> domain-tld-emails"""
return domain.replace('.', '-') + '-emails'
def is_ses_bounce_or_autoreply(parsed):
"""Erkennt SES Bounces"""
def is_ses_bounce_notification(parsed):
"""
Prüft ob Email von SES MAILER-DAEMON ist
"""
from_h = (parsed.get('From') or '').lower()
auto_sub = (parsed.get('Auto-Submitted') or '').lower()
is_mailer_daemon = 'mailer-daemon@' in from_h and 'amazonses.com' in from_h
is_auto_replied = 'auto-replied' in auto_sub or 'auto-generated' in auto_sub
return is_mailer_daemon or is_auto_replied
return 'mailer-daemon@us-east-2.amazonses.com' in from_h
def extract_original_message_id(parsed):
def get_bounce_info_from_dynamodb(message_id):
"""
Extrahiert Original SES Message-ID aus Email
SES Format: 010f[hex32]-[hex8]-[hex4]-[hex4]-[hex4]-[hex12]-[hex6]
Sucht Bounce-Info in DynamoDB anhand der Message-ID
Returns: dict mit bounce info oder None
"""
import re
# SES Message-ID Pattern (endet immer mit -000000)
ses_pattern = re.compile(r'010f[0-9a-f]{12}-[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}-000000')
# 1. Versuche Standard-Header (In-Reply-To, References)
for header in ['In-Reply-To', 'References']:
value = (parsed.get(header) or '').strip()
if value:
match = ses_pattern.search(value)
if match:
log(f" Found Message-ID in {header}: {match.group(0)}")
return match.group(0)
# 2. Durchsuche Message-ID Header (manchmal steht dort die Original-ID)
msg_id_header = (parsed.get('Message-ID') or '').strip()
if msg_id_header:
match = ses_pattern.search(msg_id_header)
if match:
# Aber nur wenn es nicht die ID der aktuellen Bounce-Message ist
# (die beginnt oft auch mit 010f...)
pass # Wir überspringen das erstmal
# 3. Durchsuche den kompletten Email-Body (inkl. ALLE Attachments/Parts)
# Das fängt auch attached messages, text attachments, etc. ab
try:
body_text = ''
response = msg_table.get_item(Key={'MessageId': message_id})
item = response.get('Item')
# Hole den kompletten Body als String
if parsed.is_multipart():
for part in parsed.walk():
content_type = part.get_content_type()
# Durchsuche ALLE Parts (außer Binärdaten wie images)
# Text-Parts, HTML, attached messages, und auch application/* Parts
if content_type.startswith('text/') or \
content_type == 'message/rfc822' or \
content_type.startswith('application/'):
try:
payload = part.get_payload(decode=True)
if payload:
# Versuche als UTF-8, fallback auf Latin-1
try:
body_text += payload.decode('utf-8', errors='ignore')
except:
try:
body_text += payload.decode('latin-1', errors='ignore')
except:
# Letzter Versuch: als ASCII mit ignore
body_text += str(payload, errors='ignore')
except:
# Falls decode fehlschlägt, String-Payload holen
payload = part.get_payload()
if isinstance(payload, str):
body_text += payload
else:
# Nicht-Multipart Message
payload = parsed.get_payload(decode=True)
if payload:
try:
body_text = payload.decode('utf-8', errors='ignore')
except:
body_text = payload.decode('latin-1', errors='ignore')
if not item:
log(f"⚠ No bounce record found for Message-ID: {message_id}")
return None
# Suche alle SES Message-IDs im Body
matches = ses_pattern.findall(body_text)
if matches:
# Nehme die ERSTE gefundene ID (meist die Original-ID)
# Die letzte ist oft die Bounce-Message selbst
log(f" Found {len(matches)} SES Message-ID(s) in body, using first: {matches[0]}")
return matches[0]
return {
'original_source': item.get('original_source', ''),
'bounceType': item.get('bounceType', 'Unknown'),
'bounceSubType': item.get('bounceSubType', 'Unknown'),
'bouncedRecipients': item.get('bouncedRecipients', []),
'timestamp': item.get('timestamp', '')
}
except Exception as e:
log(f" Warning: Could not search body for Message-ID: {e}", 'WARNING')
return None
log(f"⚠ DynamoDB Error: {e}", 'ERROR')
return None
def apply_bounce_logic(parsed, subject):
"""
Prüft auf Bounce, sucht in DynamoDB und schreibt Header um.
Prüft auf SES Bounce, sucht in DynamoDB und schreibt Header um.
Returns: (parsed_email_object, was_modified_bool)
"""
if not is_ses_bounce_or_autoreply(parsed):
if not is_ses_bounce_notification(parsed):
return parsed, False
log("🔍 Detected auto-response/bounce. Checking DynamoDB...")
original_msg_id = extract_original_message_id(parsed)
log("🔍 Detected SES MAILER-DAEMON bounce notification")
if not original_msg_id:
log("⚠ Could not extract original Message-ID")
# Message-ID aus Header extrahieren
message_id = (parsed.get('Message-ID') or '').strip('<>')
if not message_id:
log("⚠ Could not extract Message-ID from bounce notification")
return parsed, False
try:
# Lookup in DynamoDB
result = msg_table.get_item(Key={'MessageId': original_msg_id})
item = result.get('Item')
log(f" Looking up Message-ID: {message_id}")
# Lookup in DynamoDB
bounce_info = get_bounce_info_from_dynamodb(message_id)
if not bounce_info:
return parsed, False
# Bounce Info ausgeben
original_source = bounce_info['original_source']
bounced_recipients = bounce_info['bouncedRecipients']
bounce_type = bounce_info['bounceType']
bounce_subtype = bounce_info['bounceSubType']
log(f"✓ Found bounce info:")
log(f" Original sender: {original_source}")
log(f" Bounce type: {bounce_type}/{bounce_subtype}")
log(f" Bounced recipients: {bounced_recipients}")
# Nehme den ersten bounced recipient als neuen Absender
# (bei Multiple Recipients kann es mehrere geben)
if bounced_recipients:
new_from = bounced_recipients[0]
if not item:
log(f"⚠ No DynamoDB record found for {original_msg_id}")
return parsed, False
# Treffer!
orig_source = item.get('source', '')
orig_destinations = item.get('destinations', [])
original_recipient = orig_destinations[0] if orig_destinations else ''
if original_recipient:
log(f"✓ Found original sender: {orig_source} -> intended for {original_recipient}")
# Rewrite Headers
parsed['X-Original-SES-From'] = parsed.get('From', '')
parsed.replace_header('From', original_recipient)
if not parsed.get('Reply-To'):
parsed['Reply-To'] = original_recipient
if 'delivery status notification' in subject.lower():
parsed.replace_header('Subject', f"Delivery Status: {original_recipient}")
return parsed, True
except Exception as e:
log(f"⚠ DynamoDB Error: {e}")
# Rewrite Headers
parsed['X-Original-SES-From'] = parsed.get('From', '')
parsed['X-Bounce-Type'] = f"{bounce_type}/{bounce_subtype}"
parsed.replace_header('From', new_from)
if not parsed.get('Reply-To'):
parsed['Reply-To'] = new_from
# Subject anpassen
if 'delivery status notification' in subject.lower() or 'thanks for your submission' in subject.lower():
parsed.replace_header('Subject', f"Delivery Status: {new_from}")
log(f"✓ Rewritten FROM: {new_from}")
return parsed, True
log("⚠ No bounced recipients found in bounce info")
return parsed, False
def signal_handler(signum, frame):
global shutdown_requested
print(f"\n⚠ Shutdown signal received (signal {signum})")