4.4 Comprehensions — list, dict, set, generator

Comprehensions are one of Python's most powerful and Pythonic features. They compress loops and conditions into a single line, making code clearer and faster when used correctly.

List Comprehensions

# Basic syntax: [expression for variable in iterable]
squares = [x**2 for x in range(10)]
print(squares)   # [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

# Conditional filter: [expression for variable in iterable if condition]
evens = [x for x in range(20) if x % 2 == 0]
print(evens)   # [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

# Compare with a for loop:
result = []
for x in range(10):
    if x % 2 == 0:
        result.append(x**2)

# Comprehension equivalent:
result = [x**2 for x in range(10) if x % 2 == 0]
print(result)   # [0, 4, 16, 36, 64]

# Combined with conditional expression (ternary)
numbers = [-3, -1, 0, 2, 5, -7, 8]
abs_values = [x if x >= 0 else -x for x in numbers]
print(abs_values)   # [3, 1, 0, 2, 5, 7, 8]

# String processing
words = ["Hello", "World", "Python", "Programming"]
upper_long = [w.upper() for w in words if len(w) > 5]
print(upper_long)   # ['PYTHON', 'PROGRAMMING']

Dictionary Comprehensions

# Basic syntax: {key: value for variable in iterable}
squares_dict = {x: x**2 for x in range(1, 6)}
print(squares_dict)   # {1: 1, 2: 4, 3: 9, 4: 16, 5: 25}

# Conditional filter
even_squares = {x: x**2 for x in range(1, 11) if x % 2 == 0}
print(even_squares)   # {2: 4, 4: 16, 6: 36, 8: 64, 10: 100}

# Transform an existing dictionary
prices = {"apple": 1000, "banana": 500, "cherry": 2000}

# Apply 10% discount to all values
discounted = {k: int(v * 0.9) for k, v in prices.items()}
print(discounted)   # {'apple': 900, 'banana': 450, 'cherry': 1800}

# Invert key-value pairs
inverted = {v: k for k, v in prices.items()}
print(inverted)   # {1000: 'apple', 500: 'banana', 2000: 'cherry'}

# Build a dictionary from two lists
keys = ["name", "age", "city"]
values = ["Alice", 30, "Seoul"]
person = {k: v for k, v in zip(keys, values)}
print(person)   # {'name': 'Alice', 'age': 30, 'city': 'Seoul'}

# Conditional: keep only public fields
full_data = {"name": "Alice", "age": 30, "password": "secret", "email": "a@b.com"}
public_fields = {"name", "age", "email"}
public_data = {k: v for k, v in full_data.items() if k in public_fields}
print(public_data)   # {'name': 'Alice', 'age': 30, 'email': 'a@b.com'}

Set Comprehensions

# Basic syntax: {expression for variable in iterable}
unique_squares = {x**2 for x in range(-5, 6)}
print(sorted(unique_squares))   # [0, 1, 4, 9, 16, 25] — duplicates removed, unordered

# Use for deduplication
words = ["hello", "world", "Hello", "Python", "python", "HELLO"]
unique_lower = {w.lower() for w in words}
print(unique_lower)   # {'hello', 'world', 'python'}

# Set operations
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7}

intersection = a & b          # {3, 4, 5}
union = a | b                 # {1, 2, 3, 4, 5, 6, 7}
difference = a - b            # {1, 2}
symmetric_diff = a ^ b        # {1, 2, 6, 7}

# Conditional set
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
even_set = {n for n in numbers if n % 2 == 0}
print(even_set)   # {2, 4, 6, 8, 10}

Generator Expressions: Lazy Evaluation

# Basic syntax: (expression for variable in iterable)
# Uses parentheses; does NOT compute immediately — generates values one at a time

# List comprehension: builds the full list immediately (memory O(n))
squares_list = [x**2 for x in range(1_000_000)]
print(type(squares_list))   # <class 'list'>

# Generator expression: returns an iterator (memory O(1))
squares_gen = (x**2 for x in range(1_000_000))
print(type(squares_gen))    # <class 'generator'>

import sys
print(f"List memory:      {sys.getsizeof(squares_list):,} bytes")
print(f"Generator memory: {sys.getsizeof(squares_gen)} bytes")
# List:      ~8 MB
# Generator: 104 bytes (regardless of size!)


# Useful for large data processing
def read_large_file(filepath: str):
    """Memory-efficient file reading"""
    with open(filepath, "r", encoding="utf-8") as f:
        non_empty = (line.strip() for line in f if line.strip())
        for line in non_empty:
            yield line


# Passing a generator directly to built-in functions (can omit parentheses)
total = sum(x**2 for x in range(100))
maximum = max(len(w) for w in ["hello", "world", "python"])

print(total)    # 328350
print(maximum)  # 6


# Generator chaining
data = range(1, 11)
pipeline = (x**2 for x in data if x % 2 == 0)  # Squares of even numbers
filtered = (x for x in pipeline if x > 10)      # Only those > 10
result = list(filtered)
print(result)   # [16, 36, 64, 100]

Nested Comprehensions

# Flatten a 2D list
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

flat = [num for row in matrix for num in row]
print(flat)   # [1, 2, 3, 4, 5, 6, 7, 8, 9]

# Reading order: "for each row in matrix, for each num in row"
# Equivalent to:
# for row in matrix:
#     for num in row:
#         flat.append(num)

# Build a 2D list
grid = [[i * j for j in range(1, 5)] for i in range(1, 5)]
for row in grid:
    print(row)
# [1, 2, 3, 4]
# [2, 4, 6, 8]
# [3, 6, 9, 12]
# [4, 8, 12, 16]

# With conditions
pairs = [(x, y) for x in range(1, 4) for y in range(1, 4) if x != y]
print(pairs)   # [(1, 2), (1, 3), (2, 1), (2, 3), (3, 1), (3, 2)]


# Warning: 3+ levels hurt readability — prefer explicit loops
def flatten(nested):
    result = []
    for item in nested:
        if isinstance(item, list):
            result.extend(flatten(item))
        else:
            result.append(item)
    return result

Comprehension vs Loop: Performance

import timeit

def test_comprehension():
    return [x**2 for x in range(10000)]

def test_loop():
    result = []
    for x in range(10000):
        result.append(x**2)
    return result

def test_map():
    return list(map(lambda x: x**2, range(10000)))

n = 1000
t1 = timeit.timeit(test_comprehension, number=n)
t2 = timeit.timeit(test_loop, number=n)
t3 = timeit.timeit(test_map, number=n)

print(f"Comprehension: {t1:.3f}s")
print(f"Loop:          {t2:.3f}s")
print(f"map():         {t3:.3f}s")
# Typically: comprehension ≈ map() < loop (due to append overhead)

Real-world Example 1: Data Transformation Pipeline

from typing import TypedDict


class Student(TypedDict):
    name: str
    score: int
    grade: str


raw_data = [
    {"name": "Alice", "score": 92},
    {"name": "Bob", "score": 78},
    {"name": "Charlie", "score": 85},
    {"name": "Diana", "score": 61},
    {"name": "Eve", "score": 95},
]

def assign_grade(score: int) -> str:
    return (
        "A" if score >= 90 else
        "B" if score >= 80 else
        "C" if score >= 70 else
        "D" if score >= 60 else "F"
    )

# Transform + filter + sort pipeline
processed: list[Student] = sorted(
    [
        {"name": d["name"], "score": d["score"], "grade": assign_grade(d["score"])}
        for d in raw_data
        if d["score"] >= 70   # Only 70+
    ],
    key=lambda s: s["score"],
    reverse=True
)

for student in processed:
    print(f"  {student['name']}: {student['score']} ({student['grade']})")
# Eve: 95 (A)
# Alice: 92 (A)
# Charlie: 85 (B)
# Bob: 78 (C)

Real-world Example 2: Data Aggregation

transactions = [
    {"category": "food", "amount": 12000},
    {"category": "transport", "amount": 3500},
    {"category": "food", "amount": 8500},
    {"category": "entertainment", "amount": 25000},
    {"category": "transport", "amount": 1500},
    {"category": "food", "amount": 15000},
]

categories = {t["category"] for t in transactions}
totals = {
    cat: sum(t["amount"] for t in transactions if t["category"] == cat)
    for cat in categories
}
print(totals)
# {'food': 35500, 'transport': 5000, 'entertainment': 25000}

large_tx = [t for t in transactions if t["amount"] >= 10000]
print(f"Large transactions: {len(large_tx)}")

tx_counts = {cat: sum(1 for t in transactions if t["category"] == cat)
             for cat in categories}
print(tx_counts)

Readability vs Conciseness

# Overly complex comprehensions hurt readability
# Bad: everything in one line
result = [f"{k}={v}" for d in [{"a": 1, "b": 2}, {"c": 3}] for k, v in d.items() if v > 1]

# Good: split into variables
dicts = [{"a": 1, "b": 2}, {"c": 3}]
all_pairs = [(k, v) for d in dicts for k, v in d.items()]
filtered = [f"{k}={v}" for k, v in all_pairs if v > 1]

# Or extract into a function
def process_dict_list(dicts: list[dict]) -> list[str]:
    all_items = (item for d in dicts for item in d.items())
    return [f"{k}={v}" for k, v in all_items if v > 1]

Pro Tips

1. Pass a Generator Directly to Functions

total = sum(x**2 for x in range(100))
joined = ", ".join(str(x) for x in range(5))
any_neg = any(x < 0 for x in [1, -2, 3])

print(total, joined, any_neg)

2. Walrus Operator (:=) with Comprehensions

import re

strings = ["hello123", "world", "python456", "test", "abc789"]

matches = [
    m.group()
    for s in strings
    if (m := re.search(r"\d+", s))  # Capture match result and use in condition
]
print(matches)   # ['123', '456', '789']

3. Comprehension Variables Don't Leak in Python 3

result = [x for x in range(5)]
# print(x)  # NameError in Python 3 — x doesn't escape the comprehension

# But for-loop variables DO persist after the loop
for y in range(5):
    pass
print(y)   # 4 — accessible after the loop

List Comprehensions​

Dictionary Comprehensions​

Set Comprehensions​

Generator Expressions: Lazy Evaluation​

Nested Comprehensions​

Comprehension vs Loop: Performance​

Real-world Example 1: Data Transformation Pipeline​

Real-world Example 2: Data Aggregation​

Readability vs Conciseness​

Pro Tips​

1. Pass a Generator Directly to Functions​

2. Walrus Operator (:=) with Comprehensions​

3. Comprehension Variables Don't Leak in Python 3​

List Comprehensions

Dictionary Comprehensions

Set Comprehensions

Generator Expressions: Lazy Evaluation

Nested Comprehensions

Comprehension vs Loop: Performance

Real-world Example 1: Data Transformation Pipeline

Real-world Example 2: Data Aggregation

Readability vs Conciseness

Pro Tips

1. Pass a Generator Directly to Functions

2. Walrus Operator (:=) with Comprehensions

3. Comprehension Variables Don't Leak in Python 3