cleaning_services

code Python verified Free Download devices Cross-platform

code Code Preview

Python
#!/usr/bin/env python3
"""
Email Deduplicator - Remove Duplicate Emails
Smart deduplication with case-insensitive matching
"""

def deduplicate_emails(emails):
    """Remove duplicates with case-insensitive matching"""
    seen = set()
    unique = []

    for email in emails:
        normalized = email.lower().strip()
        if normalized not in seen:
            seen.add(normalized)
            unique.append(email.strip())

    return unique

def merge_files(file_paths):
    """Merge and deduplicate multiple email files"""
    all_emails = []

    for filepath in file_paths:
        with open(filepath, 'r') as f:
            all_emails.extend(f.read().splitlines())

    return deduplicate_emails(all_emails)

def process_csv(input_file, output_file, email_column=0):
    """Deduplicate CSV file preserving other columns"""
    import csv

    seen = set()
    rows = []

    with open(input_file, 'r') as f:
        reader = csv.reader(f)
        header = next(reader, None)

        for row in reader:
            email = row[email_column].lower().strip()
            if email not in seen:
                seen.add(email)
                rows.append(row)

    with open(output_file, 'w', newline='') as f:
        writer = csv.writer(f)
        if header:
            writer.writerow(header)
        writer.writerows(rows)

    return len(rows)

if __name__ == '__main__':
    import sys
    emails = open(sys.argv[1]).read().splitlines()
    unique = deduplicate_emails(emails)
    print(f"Original: {len(emails)}, Unique: {len(unique)}")

info About This Tool

The Email Deduplicator removes duplicate email addresses from your lists using smart case-insensitive matching. Perfect for cleaning up merged lists or removing duplicates before email campaigns.

Key Features

  • Case-Insensitive Matching - Treats User@Example.com and user@example.com as duplicates
  • Multi-File Merge - Combine and deduplicate multiple files at once
  • CSV Support - Preserve other columns while removing duplicate emails
  • Fast Processing - Handle millions of emails efficiently
  • Original Case Preserved - Keeps the first occurrence's original formatting

Use Cases

  • Clean up imported contact lists
  • Merge multiple email lists without duplicates
  • Prepare lists for email campaigns
  • Reduce list size for ESP imports

Requirements

  • Python 3.7+
  • No external dependencies for basic usage

Performance: Processes 1 million emails in under 5 seconds using set-based deduplication.

download Download Script

Need Full Automation?

Try Postigo for automated email campaigns with AI personalization

rocket_launch Start Free Trial