table_chart
code
Python
verified
Free Download
devices
Cross-platform
code Code Preview
Python#!/usr/bin/env python3
"""
Email Extractor for CSV Files
Extract emails from CSV with auto-detection
"""
import csv
import re
EMAIL_PATTERN = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
def detect_email_column(header, sample_row):
"""Auto-detect email column by name or content"""
email_keywords = ['email', 'mail', 'e-mail', 'correo']
# Check column names first
for i, col in enumerate(header):
if any(kw in col.lower() for kw in email_keywords):
return i
# Check sample content
for i, value in enumerate(sample_row):
if re.match(EMAIL_PATTERN, str(value).strip()):
return i
return None
def extract_from_csv(filepath, email_column=None):
"""Extract emails from CSV file"""
emails = set()
with open(filepath, 'r', encoding='utf-8') as f:
reader = csv.reader(f)
header = next(reader, None)
sample = next(reader, None)
if email_column is None:
email_column = detect_email_column(header, sample or [])
if email_column is None:
raise ValueError("Could not detect email column")
# Process sample row
if sample and len(sample) > email_column:
email = sample[email_column].strip()
if re.match(EMAIL_PATTERN, email):
emails.add(email.lower())
# Process remaining rows
for row in reader:
if len(row) > email_column:
email = row[email_column].strip()
if re.match(EMAIL_PATTERN, email):
emails.add(email.lower())
return list(emails)
if __name__ == '__main__':
import sys
emails = extract_from_csv(sys.argv[1])
print(f"Found {len(emails)} unique emails")
info About This Tool
The Email Extractor for CSV Files intelligently extracts email addresses from spreadsheet exports. Automatically detects email columns by name or content pattern.
Key Features
- Auto Column Detection - Finds email columns by header name or content
- Syntax Validation - Validates each email against RFC pattern
- Deduplication - Removes duplicate emails automatically
- Case Normalization - Converts all emails to lowercase
- Large File Support - Streams data for memory efficiency
Supported Formats
- Standard CSV (comma-separated)
- Semicolon-separated values
- Tab-separated values (TSV)
- Excel exports
Requirements
- Python 3.7+
- No external dependencies
download Download Script
Need Full Automation?
Try Postigo for automated email campaigns with AI personalization
rocket_launch Start Free Trial