description

code Python verified Free Download devices Cross-platform

code Code Preview

Python
#!/usr/bin/env python3
"""
Email Extractor for Text Files
Extract emails from TXT, LOG, and other text files
"""
import re
import os

EMAIL_PATTERN = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'

def extract_from_file(filepath, encoding='utf-8'):
    """Extract emails from a single text file"""
    try:
        with open(filepath, 'r', encoding=encoding) as f:
            content = f.read()
        emails = set(re.findall(EMAIL_PATTERN, content))
        return list(emails)
    except UnicodeDecodeError:
        # Try alternative encoding
        return extract_from_file(filepath, 'latin-1')
    except Exception as e:
        print(f"Error reading {filepath}: {e}")
        return []

def extract_from_directory(directory, extensions=None):
    """Extract emails from all text files in directory"""
    if extensions is None:
        extensions = ['.txt', '.log', '.dat', '.text']

    all_emails = set()

    for root, dirs, files in os.walk(directory):
        for filename in files:
            if any(filename.lower().endswith(ext) for ext in extensions):
                filepath = os.path.join(root, filename)
                emails = extract_from_file(filepath)
                all_emails.update(emails)
                print(f"Found {len(emails)} emails in {filename}")

    return list(all_emails)

if __name__ == '__main__':
    import sys
    if len(sys.argv) > 1:
        emails = extract_from_file(sys.argv[1])
        for email in sorted(emails):
            print(email)

info About This Tool

The Email Extractor for Text Files scans plain text documents and extracts all email addresses using regex pattern matching. Handles multiple file encodings and validates email syntax.

Key Features

  • Multi-encoding Support - Automatically handles UTF-8, Latin-1, and other encodings
  • Directory Scanning - Process entire folders recursively
  • Multiple Extensions - TXT, LOG, DAT, and custom extensions
  • Deduplication - Automatically removes duplicate emails
  • Progress Reporting - Shows emails found per file

Supported File Types

  • .txt - Plain text files
  • .log - Log files
  • .dat - Data files
  • Any text-based format

Requirements

  • Python 3.7+
  • No external dependencies

download Download Script

Need Full Automation?

Try Postigo for automated email campaigns with AI personalization

rocket_launch Start Free Trial