4.5 Pro Tips — Loop Performance & itertools

In performance-critical code or when working with large datasets, the choice of loop implementation can significantly impact performance. Python's standard library itertools module provides memory-efficient, fast iterator operations.

Loop Performance Comparison

Measuring with timeit

import timeit

N = 100_000

# Method 1: for loop + append
def method_loop():
    result = []
    for x in range(N):
        result.append(x * 2)
    return result

# Method 2: list comprehension
def method_comprehension():
    return [x * 2 for x in range(N)]

# Method 3: map() with lambda
def method_map():
    return list(map(lambda x: x * 2, range(N)))

# Method 4: map() with method reference
def method_map_mul():
    return list(map((2).__mul__, range(N)))

# Measure
runs = 100
for name, func in [
    ("loop + append",       method_loop),
    ("list comprehension",  method_comprehension),
    ("map + lambda",        method_map),
    ("map + method",        method_map_mul),
]:
    t = timeit.timeit(func, number=runs)
    print(f"  {name:25s}: {t:.4f}s ({t/runs*1000:.2f}ms per run)")

# Typical results (environment-dependent):
# list comprehension: fastest
# map + method: similar or slightly faster
# loop + append: ~20-30% slower
# map + lambda: similar or slower due to lambda call overhead

Conditional Filter Performance

import timeit

data = list(range(1, 100_001))

def filter_loop():
    result = []
    for x in data:
        if x % 2 == 0:
            result.append(x)
    return result

def filter_comprehension():
    return [x for x in data if x % 2 == 0]

def filter_builtin():
    return list(filter(lambda x: x % 2 == 0, data))

n = 50
t1 = timeit.timeit(filter_loop, number=n)
t2 = timeit.timeit(filter_comprehension, number=n)
t3 = timeit.timeit(filter_builtin, number=n)

print(f"Loop:          {t1:.3f}s")
print(f"Comprehension: {t2:.3f}s")
print(f"filter():      {t3:.3f}s")
# Comprehension is generally fastest

Comparison with NumPy

try:
    import numpy as np
    import timeit

    N = 1_000_000

    def pure_python():
        return [x**2 for x in range(N)]

    def numpy_version():
        return np.arange(N) **2

    t1 = timeit.timeit(pure_python, number=5)
    t2 = timeit.timeit(numpy_version, number=5)
    print(f"Pure Python: {t1:.3f}s")
    print(f"NumPy:       {t2:.3f}s")
    print(f"Speedup:     {t1/t2:.1f}x")
    # NumPy is typically 10-100x faster

except ImportError:
    print("NumPy not installed: pip install numpy")

itertools — Complete Guide

chain(): Combine Multiple Iterables

from itertools import chain

a = [1, 2, 3]
b = [4, 5, 6]
c = [7, 8, 9]

for val in chain(a, b, c):
    print(val, end=" ")   # 1 2 3 4 5 6 7 8 9
print()

# chain.from_iterable(): flatten nested iterables
nested = [[1, 2], [3, 4, 5], [6]]
flat = list(chain.from_iterable(nested))
print(flat)   # [1, 2, 3, 4, 5, 6]

# Real-world: combine lines from multiple files
import io

files = [
    io.StringIO("line1\nline2\n"),
    io.StringIO("line3\nline4\n"),
]
all_lines = list(chain.from_iterable(f.readlines() for f in files))
print([line.strip() for line in all_lines])
# ['line1', 'line2', 'line3', 'line4']

islice(): Slicing an Iterable

from itertools import islice

# Take only the first N items from an infinite iterator
def infinite_counter(start=0, step=1):
    while True:
        yield start
        start += step

first_10 = list(islice(infinite_counter(), 10))
print(first_10)   # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# Start at 5, stop at 20, step 2
sliced = list(islice(infinite_counter(), 5, 20, 2))
print(sliced)   # [5, 7, 9, 11, 13, 15, 17, 19]

# Read only the first N lines from a large file
import io

large_file = io.StringIO("\n".join(str(i) for i in range(1_000_000)))
first_100 = list(islice(large_file, 100))
print(f"Lines read: {len(first_100)}")

product(): Cartesian Product

from itertools import product

colors = ["red", "green", "blue"]
sizes = ["S", "M", "L"]

for color, size in product(colors, sizes):
    print(f"{color}-{size}", end=", ")
print()
# red-S, red-M, red-L, green-S, green-M, green-L, blue-S, blue-M, blue-L

# repeat argument: repeat the same iterable n times
# Generate all 3-bit binary numbers
binary_3bit = list(product([0, 1], repeat=3))
print(binary_3bit)
# [(0,0,0), (0,0,1), (0,1,0), (0,1,1), (1,0,0), (1,0,1), (1,1,0), (1,1,1)]

# Real-world: hyperparameter grid search
learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [32, 64, 128]
epochs = [10, 20]

configs = [
    {"lr": lr, "batch": bs, "epochs": ep}
    for lr, bs, ep in product(learning_rates, batch_sizes, epochs)
]
print(f"Configs to try: {len(configs)}")  # 18

combinations(), permutations()

from itertools import combinations, combinations_with_replacement, permutations

items = ["A", "B", "C", "D"]

# combinations: unordered, no repetition
print("Combinations (2):")
for combo in combinations(items, 2):
    print(combo, end=" ")
print()
# ('A', 'B') ('A', 'C') ('A', 'D') ('B', 'C') ('B', 'D') ('C', 'D')

# permutations: ordered
print("Permutations (2):")
for perm in permutations(items, 2):
    print(perm, end=" ")
print()
# ('A', 'B') ('A', 'C') ... ('D', 'C') — 12 total

# combinations_with_replacement: unordered, with repetition
print("Combinations with replacement (2):")
for combo in combinations_with_replacement(["A", "B", "C"], 2):
    print(combo, end=" ")
print()
# ('A', 'A') ('A', 'B') ('A', 'C') ('B', 'B') ('B', 'C') ('C', 'C')

# Real-world: team combinations
players = ["Alice", "Bob", "Charlie", "Diana", "Eve"]
teams = list(combinations(players, 2))
print(f"Possible team pairs: {len(teams)}")  # 10
for team in teams:
    print(f"  {' & '.join(team)}")

groupby(): Grouping

from itertools import groupby

# Important: groupby only groups consecutive equal keys — sort first!
data = [
    {"category": "A", "value": 1},
    {"category": "A", "value": 2},
    {"category": "B", "value": 3},
    {"category": "A", "value": 4},  # A appears again — new group without sorting
    {"category": "B", "value": 5},
]

# Without sorting
print("Without sort:")
for key, group in groupby(data, key=lambda x: x["category"]):
    values = [d["value"] for d in group]
    print(f"  {key}: {values}")
# A: [1, 2]
# B: [3]
# A: [4]  ← new A group!
# B: [5]

# With sorting
print("After sort:")
sorted_data = sorted(data, key=lambda x: x["category"])
for key, group in groupby(sorted_data, key=lambda x: x["category"]):
    values = [d["value"] for d in group]
    print(f"  {key}: {values}")
# A: [1, 2, 4]
# B: [3, 5]


# Real-world: log analysis
logs = [
    {"level": "INFO", "msg": "Server started"},
    {"level": "INFO", "msg": "Connection accepted"},
    {"level": "WARNING", "msg": "Low memory"},
    {"level": "ERROR", "msg": "DB connection failed"},
    {"level": "INFO", "msg": "Retrying"},
    {"level": "ERROR", "msg": "Timeout"},
]

sorted_logs = sorted(logs, key=lambda x: x["level"])
log_groups = {
    level: [log["msg"] for log in group]
    for level, group in groupby(sorted_logs, key=lambda x: x["level"])
}
print(log_groups)

accumulate(): Running Totals

from itertools import accumulate
import operator

numbers = [1, 2, 3, 4, 5]

# Default: running sum
cumsum = list(accumulate(numbers))
print(cumsum)   # [1, 3, 6, 10, 15]

# Other operators
cumprod = list(accumulate(numbers, operator.mul))
print(cumprod)  # [1, 2, 6, 24, 120]

# Running maximum
data = [3, 1, 4, 1, 5, 9, 2, 6, 5]
cummax = list(accumulate(data, max))
print(cummax)   # [3, 3, 4, 4, 5, 9, 9, 9, 9]

# Initial value (Python 3.8+)
with_initial = list(accumulate(numbers, initial=100))
print(with_initial)   # [100, 101, 103, 106, 110, 115]

# Real-world: daily sales running total
daily_sales = [1200, 800, 1500, 900, 2100, 700, 1800]
running_total = list(accumulate(daily_sales))
print("Daily sales running total:")
for day, (daily, total) in enumerate(zip(daily_sales, running_total), 1):
    print(f"  Day {day}: {daily:,} (cumulative: {total:,})")

Real-world: Processing Large Datasets

from itertools import islice, chain, groupby
import random

def generate_large_dataset(n: int):
    """Simulate a large dataset — uses a generator to save memory"""
    categories = ["electronics", "clothing", "food", "books", "sports"]
    for i in range(n):
        yield {
            "id": i,
            "category": random.choice(categories),
            "amount": round(random.uniform(1000, 100000), 2),
            "region": random.choice(["Seoul", "Busan", "Incheon", "Daegu"]),
        }

def process_in_batches(data_stream, batch_size=1000):
    """Process a large stream in batches"""
    data_iter = iter(data_stream)
    while True:
        batch = list(islice(data_iter, batch_size))
        if not batch:
            break
        yield batch


print("Large dataset processing pipeline:")
total_records = 0
category_totals = {}

dataset = generate_large_dataset(10_000)

for batch_num, batch in enumerate(process_in_batches(dataset, batch_size=500)):
    for record in batch:
        cat = record["category"]
        category_totals[cat] = category_totals.get(cat, 0) + record["amount"]
        total_records += 1

print(f"Records processed: {total_records:,}")
print("Category totals:")
for cat, total in sorted(category_totals.items()):
    print(f"  {cat}: {total:,.0f}")


# Combine multiple data sources
source1 = ({"source": "A", "value": i} for i in range(1, 6))
source2 = ({"source": "B", "value": i} for i in range(6, 11))
source3 = ({"source": "C", "value": i} for i in range(11, 16))

combined = chain(source1, source2, source3)
total = sum(d["value"] for d in combined)
print(f"\nCombined total: {total}")   # 120

Pro Tips

1. Iterators Can Only Be Consumed Once

from itertools import chain

gen = (x for x in range(5))
print(list(gen))   # [0, 1, 2, 3, 4]
print(list(gen))   # [] — already exhausted!

# If you need to reuse, convert to list or use tee()
from itertools import tee

gen = (x for x in range(5))
gen1, gen2 = tee(gen, 2)   # Make 2 copies (be careful with memory)
print(list(gen1))  # [0, 1, 2, 3, 4]
print(list(gen2))  # [0, 1, 2, 3, 4]

2. takewhile / dropwhile

from itertools import takewhile, dropwhile

numbers = [1, 3, 5, 2, 4, 6, 7, 9]

# Take while condition is True (stops at first False)
odd_prefix = list(takewhile(lambda x: x % 2 == 1, numbers))
print(odd_prefix)   # [1, 3, 5] — stops when 2 is encountered

# Skip while condition is True, then yield the rest
after_odd = list(dropwhile(lambda x: x % 2 == 1, numbers))
print(after_odd)    # [2, 4, 6, 7, 9]

3. Performance Summary

Method	Best for	Memory
for loop	Complex logic, stateful	O(n)
list comprehension	Simple transform/filter	O(n)
generator expression	Large data, one-time iteration	O(1)
map/filter	Apply built-in functions, functional style	O(1)
itertools	Complex iterator composition	O(1)
NumPy	Numerical computation, matrices	O(n) but fast

Loop Performance Comparison​

Measuring with timeit​

Conditional Filter Performance​

Comparison with NumPy​

itertools — Complete Guide​

chain(): Combine Multiple Iterables​

islice(): Slicing an Iterable​

product(): Cartesian Product​

combinations(), permutations()​

groupby(): Grouping​

accumulate(): Running Totals​

Real-world: Processing Large Datasets​

Pro Tips​

1. Iterators Can Only Be Consumed Once​

2. takewhile / dropwhile​

3. Performance Summary​