table_chart

code Python verified Free Download devices Cross-platform

code Code Preview

Python
#!/usr/bin/env python3
"""
Email Extractor for CSV Files
Extract emails from CSV with auto-detection
"""
import csv
import re

EMAIL_PATTERN = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'

def detect_email_column(header, sample_row):
    """Auto-detect email column by name or content"""
    email_keywords = ['email', 'mail', 'e-mail', 'correo']

    # Check column names first
    for i, col in enumerate(header):
        if any(kw in col.lower() for kw in email_keywords):
            return i

    # Check sample content
    for i, value in enumerate(sample_row):
        if re.match(EMAIL_PATTERN, str(value).strip()):
            return i

    return None

def extract_from_csv(filepath, email_column=None):
    """Extract emails from CSV file"""
    emails = set()

    with open(filepath, 'r', encoding='utf-8') as f:
        reader = csv.reader(f)
        header = next(reader, None)
        sample = next(reader, None)

        if email_column is None:
            email_column = detect_email_column(header, sample or [])

        if email_column is None:
            raise ValueError("Could not detect email column")

        # Process sample row
        if sample and len(sample) > email_column:
            email = sample[email_column].strip()
            if re.match(EMAIL_PATTERN, email):
                emails.add(email.lower())

        # Process remaining rows
        for row in reader:
            if len(row) > email_column:
                email = row[email_column].strip()
                if re.match(EMAIL_PATTERN, email):
                    emails.add(email.lower())

    return list(emails)

if __name__ == '__main__':
    import sys
    emails = extract_from_csv(sys.argv[1])
    print(f"Found {len(emails)} unique emails")

info About This Tool

The Email Extractor for CSV Files intelligently extracts email addresses from spreadsheet exports. Automatically detects email columns by name or content pattern.

Key Features

  • Auto Column Detection - Finds email columns by header name or content
  • Syntax Validation - Validates each email against RFC pattern
  • Deduplication - Removes duplicate emails automatically
  • Case Normalization - Converts all emails to lowercase
  • Large File Support - Streams data for memory efficiency

Supported Formats

  • Standard CSV (comma-separated)
  • Semicolon-separated values
  • Tab-separated values (TSV)
  • Excel exports

Requirements

  • Python 3.7+
  • No external dependencies

download Download Script

Need Full Automation?

Try Postigo for automated email campaigns with AI personalization

rocket_launch Start Free Trial