Regular Expressions

Master pattern matching and text processing with Python's re module

Introduction to Regex

Regular expressions are patterns for matching text
Python's re module provides regex functionality
Useful for validation, searching, and text manipulation
Common patterns: \d (digit), \w (word char), \s (whitespace)

import re

# Basic pattern matching
text = "My phone is 123-456-7890"
pattern = r"\d{3}-\d{3}-\d{4}"
match = re.search(pattern, text)

if match:
    print(f"Found: {match.group()}")

# Find all matches
text2 = "Emails: alice@example.com, bob@test.org"
emails = re.findall(r"[\w.-]+@[\w.-]+", text2)
print(f"Emails: {emails}")

Common Regex Methods

re.search() - find first match anywhere in string
re.match() - match at beginning of string only
re.findall() - find all matches, return as list
re.sub() - replace matches with new text

import re

text = "The year is 2024, not 2023"

# search - find first occurrence
result = re.search(r"\d{4}", text)
print(f"First year: {result.group()}")

# findall - find all occurrences
years = re.findall(r"\d{4}", text)
print(f"All years: {years}")

# sub - replace matches
new_text = re.sub(r"\d{4}", "XXXX", text)
print(f"Replaced: {new_text}")

# match - only at start
text2 = "2024 is the year"
match = re.match(r"\d{4}", text2)
print(f"Starts with year: {match.group()}")

Regex Patterns

Character classes: [abc], [0-9], [a-zA-Z]
Quantifiers: * (0+), + (1+), ? (0 or 1), {n} (exactly n)
Anchors: ^ (start), $ (end)
Groups: () for capturing, | for alternation

import re

# Character classes
text = "abc123XYZ"
letters = re.findall(r"[a-z]+", text)
digits = re.findall(r"[0-9]+", text)
print(f"Letters: {letters}, Digits: {digits}")

# Quantifiers
text2 = "color colour"
matches = re.findall(r"colou?r", text2)
print(f"Matches: {matches}")

# Anchors
text3 = "hello world"
start = re.match(r"^hello", text3)
end = re.search(r"world$", text3)
print(f"Starts with hello: {bool(start)}")
print(f"Ends with world: {bool(end)}")

Practical Examples

Email validation
Phone number extraction
URL parsing
Password strength checking

import re

# Email validation
def is_valid_email(email):
    pattern = r"^[\w.-]+@[\w.-]+\.[a-zA-Z]{2,}$"
    return bool(re.match(pattern, email))

print(is_valid_email("user@example.com"))
print(is_valid_email("invalid.email"))

# Extract phone numbers
text = "Call 555-1234 or 555-5678"
phones = re.findall(r"\d{3}-\d{4}", text)
print(f"Phone numbers: {phones}")

# Password validation (8+ chars, 1 digit, 1 upper)
def is_strong_password(pwd):
    if len(pwd) < 8:
        return False
    if not re.search(r"\d", pwd):
        return False
    if not re.search(r"[A-Z]", pwd):
        return False
    return True

print(is_strong_password("Weak1"))
print(is_strong_password("Strong123"))