The Complete Regex Guide — Patterns, Groups, Practical Examples

What Are Regular Expressions?

Regular Expressions (Regex) are a tool for finding or replacing specific patterns in strings. Think of it as using a metal detector to find specific items in a pile of text. Once learned, regex works the same across any programming language, making it one of the highest-ROI skills you can invest in.

Basic Syntax

Metacharacters

Character	Meaning	Example	Matches
`.`	Any single character	`a.c`	abc, a1c, a-c
`\d`	Digit [0-9]	`\d{3}`	123, 456
`\w`	Alphanumeric + underscore [a-zA-Z0-9_]	`\w+`	hello, user_1
`\s`	Whitespace (space, tab, newline)	`a\sb`	a b
`\D`	Non-digit character	`\D+`	hello, ---
`\W`	Non-alphanumeric character	`\W`	@, #, !
`^`	Start of string	`^Hello`	Hello…
`$`	End of string	`world$`	…world
`\b`	Word boundary	`\bcat\b`	cat (not category)

Quantifiers

Quantifier	Meaning	Example	Matches
`*`	0 or more	`ab*c`	ac, abc, abbc
`+`	1 or more	`ab+c`	abc, abbc (not ac)
`?`	0 or 1	`colou?r`	color, colour
`{n}`	Exactly n	`\d{4}`	2026
`{n,}`	n or more	`\d{2,}`	12, 123, 1234
`{n,m}`	Between n and m	`\d{2,4}`	12, 123, 1234

Character Classes

import re

# Character class — matches one character from inside the brackets
pattern_vowel = r"[aeiou]"        # Vowels
pattern_hex = r"[0-9a-fA-F]"      # Hexadecimal characters
pattern_not_digit = r"[^0-9]"     # Non-digit characters (^ = negation)

text = "Hello World 123"
vowels = re.findall(pattern_vowel, text)
print(vowels)  # Output: ['e', 'o', 'o']

non_digits = re.findall(pattern_not_digit, text)
print(non_digits)  # Output: ['H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd', ' ']

Groups and Capturing

Wrapping a pattern in parentheses () lets you extract matched parts separately.

import re

# Extract year, month, day from a date
date_pattern = r"(\d{4})-(\d{2})-(\d{2})"
text = "Today's date is 2026-04-07."

match = re.search(date_pattern, text)
if match:
    print(f"Full: {match.group(0)}")    # Output: Full: 2026-04-07
    print(f"Year: {match.group(1)}")    # Output: Year: 2026
    print(f"Month: {match.group(2)}")   # Output: Month: 04
    print(f"Day:   {match.group(3)}")   # Output: Day:   07

# Named groups — improved readability
named_pattern = r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})"
match = re.search(named_pattern, text)
if match:
    print(f"Year:  {match.group('year')}")   # Output: Year:  2026
    print(f"Month: {match.group('month')}")  # Output: Month: 04

# Non-capturing group — groups without capturing
non_capture = r"(?:http|https)://(\S+)"
url_text = "Visit: https://example.com/path"
match = re.search(non_capture, url_text)
if match:
    print(f"Domain+path: {match.group(1)}")  # Output: Domain+path: example.com/path
    # group(1) is the domain+path, not the protocol (thanks to non-capturing group)

Lookahead and Lookbehind

These check whether a specific condition exists before or after the pattern, without including it in the match result.

import re

# Lookahead — match only when followed by a specific pattern
# Find numbers followed by "USD"
text = "Apple $3000, Pear $5000, Qty 3pcs"
prices = re.findall(r"\d+(?=\$)", "3000$ 5000$ 3pcs")
# Better example:
text = "Apple 3000USD, Pear 5000USD, Qty 3pcs"
prices = re.findall(r"\d+(?=USD)", text)
print(prices)  # Output: ['3000', '5000']  ("3" not included — not followed by "USD")

# Negative lookahead — match when NOT followed by a specific pattern
not_price = re.findall(r"\d+(?!USD)", text)
print(not_price)  # Output: ['300', '500', '3']

# Lookbehind — match only when preceded by a specific pattern
# Extract only numbers after "$"
text2 = "Price: $100, Qty: 50pcs, Discount: $30"
dollar_amounts = re.findall(r"(?<=\$)\d+", text2)
print(dollar_amounts)  # Output: ['100', '30']  ("50" not included)

# Combination — extract content inside specific tags
html = "Name: <b>Alice</b>, Age: <b>30</b>"
bold_contents = re.findall(r"(?<=<b>).+?(?=</b>)", html)
print(bold_contents)  # Output: ['Alice', '30']

Greedy vs. Lazy Matching

import re

text = '<div>First</div><div>Second</div>'

# Greedy — matches as much as possible (default)
greedy = re.findall(r"<div>.*</div>", text)
print(greedy)  # Output: ['<div>First</div><div>Second</div>']  (matched everything as one)

# Lazy — matches as little as possible (add ?)
lazy = re.findall(r"<div>.*?</div>", text)
print(lazy)    # Output: ['<div>First</div>', '<div>Second</div>']  (matched separately)

Real-World Pattern Collection

import re

# 1. Email address validation
email_pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
emails = ["user@example.com", "invalid@", "test@co.kr"]
for email in emails:
    valid = bool(re.match(email_pattern, email))
    print(f"{email}: {'valid' if valid else 'invalid'}")
# Output: user@example.com: valid
#         invalid@: invalid
#         test@co.kr: valid

# 2. US phone number (various formats)
phone_pattern = r"\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}"
phones = ["(555) 123-4567", "555.123.4567", "5551234567"]
for phone in phones:
    match = re.search(phone_pattern, phone)
    print(f"{phone}: {'match' if match else 'no match'}")
# Output: (555) 123-4567: match
#         555.123.4567: match
#         5551234567: match

# 3. Password strength validation (8+ chars, letters + numbers + special chars)
def validate_password(pw):
    """Validates password strength."""
    checks = {
        "8+ chars": r".{8,}",
        "Has letters": r"[a-zA-Z]",
        "Has numbers": r"\d",
        "Has special chars": r"[!@#$%^&*(),.?\":{}|]"
    }
    results = {}
    for name, pattern in checks.items():
        results[name] = bool(re.search(pattern, pw))
    return results

print(validate_password("Abc123!@"))
# Output: {'8+ chars': True, 'Has letters': True, 'Has numbers': True, 'Has special chars': True}

# 4. String substitution — personal info masking
def mask_personal_info(text):
    """Masks phone numbers and emails."""
    # Phone number masking
    text = re.sub(
        r"(\d{3})-(\d{3,4})-(\d{4})",
        r"\1-****-\3",  # Replace middle digits with ****
        text
    )
    # Email masking
    text = re.sub(
        r"([a-zA-Z0-9._%+-]{2})([a-zA-Z0-9._%+-]*)(@\S+)",
        r"\1***\3",  # Keep first 2 chars, replace rest with ***
        text
    )
    return text

sample = "Contact: 010-1234-5678, Email: hong@example.com"
print(mask_personal_info(sample))
# Output: Contact: 010-****-5678, Email: ho***@example.com

# 5. Extract IP addresses from logs
log_text = """
[2026-04-07 10:00:01] 192.168.1.100 GET /api/users 200
[2026-04-07 10:00:02] 10.0.0.55 POST /api/login 401
[2026-04-07 10:00:03] 172.16.0.1 GET /api/health 200
"""
ip_pattern = r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b"
ips = re.findall(ip_pattern, log_text)
print(f"Found IPs: {ips}")
# Output: Found IPs: ['192.168.1.100', '10.0.0.55', '172.16.0.1']

Regular Expressions in JavaScript

// JavaScript regex basics
const emailRegex = /^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/;

console.log(emailRegex.test("user@example.com")); // true
console.log(emailRegex.test("invalid"));           // false

// Named groups (ES2018+)
const dateRegex = /(?<year>\d{4})-(?<month>\d{2})-(?<day>\d{2})/;
const match = "2026-04-07".match(dateRegex);
console.log(match.groups.year);   // "2026"
console.log(match.groups.month);  // "04"

// replaceAll + capture groups
const text = "2026-04-07 and 2026-12-25";
const formatted = text.replaceAll(
  /(\d{4})-(\d{2})-(\d{2})/g,
  "$1/$2/$3"
);
console.log(formatted);
// Output: "2026/04/07 and 2026/12/25"

Summary

Scenario	Recommended Pattern
Check if a string contains text	`str.includes()` (no regex needed)
Simple format validation	Basic metacharacters + quantifiers
Data extraction	Capture groups `()`
Conditional matching	Lookahead/lookbehind
String replacement	`re.sub()` + backreferences `\1`
Complex parsing	Use a dedicated parser instead of regex

Regex is not a silver bullet: For nested structures like HTML or JSON parsing, use a dedicated parser.
Reuse with re.compile(): When using the same pattern repeatedly, a compiled object is faster.
Use the re.VERBOSE flag: Add comments and whitespace to complex regex for improved readability.
Test at regex101.com: Verify match results in real-time and see explanations for each token.