4.5 Pro Tips — Loop Performance & itertools
In performance-critical code or when working with large datasets, the choice of loop implementation can significantly impact performance. Python's standard library itertools module provides memory-efficient, fast iterator operations.
Loop Performance Comparison
Measuring with timeit
import timeit
N = 100_000
# Method 1: for loop + append
def method_loop():
result = []
for x in range(N):
result.append(x * 2)
return result
# Method 2: list comprehension
def method_comprehension():
return [x * 2 for x in range(N)]
# Method 3: map() with lambda
def method_map():
return list(map(lambda x: x * 2, range(N)))
# Method 4: map() with method reference
def method_map_mul():
return list(map((2).__mul__, range(N)))
# Measure
runs = 100
for name, func in [
("loop + append", method_loop),
("list comprehension", method_comprehension),
("map + lambda", method_map),
("map + method", method_map_mul),
]:
t = timeit.timeit(func, number=runs)
print(f" {name:25s}: {t:.4f}s ({t/runs*1000:.2f}ms per run)")
# Typical results (environment-dependent):
# list comprehension: fastest
# map + method: similar or slightly faster
# loop + append: ~20-30% slower
# map + lambda: similar or slower due to lambda call overhead
Conditional Filter Performance
import timeit
data = list(range(1, 100_001))
def filter_loop():
result = []
for x in data:
if x % 2 == 0:
result.append(x)
return result
def filter_comprehension():
return [x for x in data if x % 2 == 0]
def filter_builtin():
return list(filter(lambda x: x % 2 == 0, data))
n = 50
t1 = timeit.timeit(filter_loop, number=n)
t2 = timeit.timeit(filter_comprehension, number=n)
t3 = timeit.timeit(filter_builtin, number=n)
print(f"Loop: {t1:.3f}s")
print(f"Comprehension: {t2:.3f}s")
print(f"filter(): {t3:.3f}s")
# Comprehension is generally fastest
Comparison with NumPy
try:
import numpy as np
import timeit
N = 1_000_000
def pure_python():
return [x**2 for x in range(N)]
def numpy_version():
return np.arange(N) ** 2
t1 = timeit.timeit(pure_python, number=5)
t2 = timeit.timeit(numpy_version, number=5)
print(f"Pure Python: {t1:.3f}s")
print(f"NumPy: {t2:.3f}s")
print(f"Speedup: {t1/t2:.1f}x")
# NumPy is typically 10-100x faster
except ImportError:
print("NumPy not installed: pip install numpy")
itertools — Complete Guide
chain(): Combine Multiple Iterables
from itertools import chain
a = [1, 2, 3]
b = [4, 5, 6]
c = [7, 8, 9]
for val in chain(a, b, c):
print(val, end=" ") # 1 2 3 4 5 6 7 8 9
print()
# chain.from_iterable(): flatten nested iterables
nested = [[1, 2], [3, 4, 5], [6]]
flat = list(chain.from_iterable(nested))
print(flat) # [1, 2, 3, 4, 5, 6]
# Real-world: combine lines from multiple files
import io
files = [
io.StringIO("line1\nline2\n"),
io.StringIO("line3\nline4\n"),
]
all_lines = list(chain.from_iterable(f.readlines() for f in files))
print([line.strip() for line in all_lines])
# ['line1', 'line2', 'line3', 'line4']
islice(): Slicing an Iterable
from itertools import islice
# Take only the first N items from an infinite iterator
def infinite_counter(start=0, step=1):
while True:
yield start
start += step
first_10 = list(islice(infinite_counter(), 10))
print(first_10) # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
# Start at 5, stop at 20, step 2
sliced = list(islice(infinite_counter(), 5, 20, 2))
print(sliced) # [5, 7, 9, 11, 13, 15, 17, 19]
# Read only the first N lines from a large file
import io
large_file = io.StringIO("\n".join(str(i) for i in range(1_000_000)))
first_100 = list(islice(large_file, 100))
print(f"Lines read: {len(first_100)}")
product(): Cartesian Product
from itertools import product
colors = ["red", "green", "blue"]
sizes = ["S", "M", "L"]
for color, size in product(colors, sizes):
print(f"{color}-{size}", end=", ")
print()
# red-S, red-M, red-L, green-S, green-M, green-L, blue-S, blue-M, blue-L
# repeat argument: repeat the same iterable n times
# Generate all 3-bit binary numbers
binary_3bit = list(product([0, 1], repeat=3))
print(binary_3bit)
# [(0,0,0), (0,0,1), (0,1,0), (0,1,1), (1,0,0), (1,0,1), (1,1,0), (1,1,1)]
# Real-world: hyperparameter grid search
learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [32, 64, 128]
epochs = [10, 20]
configs = [
{"lr": lr, "batch": bs, "epochs": ep}
for lr, bs, ep in product(learning_rates, batch_sizes, epochs)
]
print(f"Configs to try: {len(configs)}") # 18
combinations(), permutations()
from itertools import combinations, combinations_with_replacement, permutations
items = ["A", "B", "C", "D"]
# combinations: unordered, no repetition
print("Combinations (2):")
for combo in combinations(items, 2):
print(combo, end=" ")
print()
# ('A', 'B') ('A', 'C') ('A', 'D') ('B', 'C') ('B', 'D') ('C', 'D')
# permutations: ordered
print("Permutations (2):")
for perm in permutations(items, 2):
print(perm, end=" ")
print()
# ('A', 'B') ('A', 'C') ... ('D', 'C') — 12 total
# combinations_with_replacement: unordered, with repetition
print("Combinations with replacement (2):")
for combo in combinations_with_replacement(["A", "B", "C"], 2):
print(combo, end=" ")
print()
# ('A', 'A') ('A', 'B') ('A', 'C') ('B', 'B') ('B', 'C') ('C', 'C')
# Real-world: team combinations
players = ["Alice", "Bob", "Charlie", "Diana", "Eve"]
teams = list(combinations(players, 2))
print(f"Possible team pairs: {len(teams)}") # 10
for team in teams:
print(f" {' & '.join(team)}")
groupby(): Grouping
from itertools import groupby
# Important: groupby only groups consecutive equal keys — sort first!
data = [
{"category": "A", "value": 1},
{"category": "A", "value": 2},
{"category": "B", "value": 3},
{"category": "A", "value": 4}, # A appears again — new group without sorting
{"category": "B", "value": 5},
]
# Without sorting
print("Without sort:")
for key, group in groupby(data, key=lambda x: x["category"]):
values = [d["value"] for d in group]
print(f" {key}: {values}")
# A: [1, 2]
# B: [3]
# A: [4] ← new A group!
# B: [5]
# With sorting
print("After sort:")
sorted_data = sorted(data, key=lambda x: x["category"])
for key, group in groupby(sorted_data, key=lambda x: x["category"]):
values = [d["value"] for d in group]
print(f" {key}: {values}")
# A: [1, 2, 4]
# B: [3, 5]
# Real-world: log analysis
logs = [
{"level": "INFO", "msg": "Server started"},
{"level": "INFO", "msg": "Connection accepted"},
{"level": "WARNING", "msg": "Low memory"},
{"level": "ERROR", "msg": "DB connection failed"},
{"level": "INFO", "msg": "Retrying"},
{"level": "ERROR", "msg": "Timeout"},
]
sorted_logs = sorted(logs, key=lambda x: x["level"])
log_groups = {
level: [log["msg"] for log in group]
for level, group in groupby(sorted_logs, key=lambda x: x["level"])
}
print(log_groups)
accumulate(): Running Totals
from itertools import accumulate
import operator
numbers = [1, 2, 3, 4, 5]
# Default: running sum
cumsum = list(accumulate(numbers))
print(cumsum) # [1, 3, 6, 10, 15]
# Other operators
cumprod = list(accumulate(numbers, operator.mul))
print(cumprod) # [1, 2, 6, 24, 120]
# Running maximum
data = [3, 1, 4, 1, 5, 9, 2, 6, 5]
cummax = list(accumulate(data, max))
print(cummax) # [3, 3, 4, 4, 5, 9, 9, 9, 9]
# Initial value (Python 3.8+)
with_initial = list(accumulate(numbers, initial=100))
print(with_initial) # [100, 101, 103, 106, 110, 115]
# Real-world: daily sales running total
daily_sales = [1200, 800, 1500, 900, 2100, 700, 1800]
running_total = list(accumulate(daily_sales))
print("Daily sales running total:")
for day, (daily, total) in enumerate(zip(daily_sales, running_total), 1):
print(f" Day {day}: {daily:,} (cumulative: {total:,})")
Real-world: Processing Large Datasets
from itertools import islice, chain, groupby
import random
def generate_large_dataset(n: int):
"""Simulate a large dataset — uses a generator to save memory"""
categories = ["electronics", "clothing", "food", "books", "sports"]
for i in range(n):
yield {
"id": i,
"category": random.choice(categories),
"amount": round(random.uniform(1000, 100000), 2),
"region": random.choice(["Seoul", "Busan", "Incheon", "Daegu"]),
}
def process_in_batches(data_stream, batch_size=1000):
"""Process a large stream in batches"""
data_iter = iter(data_stream)
while True:
batch = list(islice(data_iter, batch_size))
if not batch:
break
yield batch
print("Large dataset processing pipeline:")
total_records = 0
category_totals = {}
dataset = generate_large_dataset(10_000)
for batch_num, batch in enumerate(process_in_batches(dataset, batch_size=500)):
for record in batch:
cat = record["category"]
category_totals[cat] = category_totals.get(cat, 0) + record["amount"]
total_records += 1
print(f"Records processed: {total_records:,}")
print("Category totals:")
for cat, total in sorted(category_totals.items()):
print(f" {cat}: {total:,.0f}")
# Combine multiple data sources
source1 = ({"source": "A", "value": i} for i in range(1, 6))
source2 = ({"source": "B", "value": i} for i in range(6, 11))
source3 = ({"source": "C", "value": i} for i in range(11, 16))
combined = chain(source1, source2, source3)
total = sum(d["value"] for d in combined)
print(f"\nCombined total: {total}") # 120
Pro Tips
1. Iterators Can Only Be Consumed Once
from itertools import chain
gen = (x for x in range(5))
print(list(gen)) # [0, 1, 2, 3, 4]
print(list(gen)) # [] — already exhausted!
# If you need to reuse, convert to list or use tee()
from itertools import tee
gen = (x for x in range(5))
gen1, gen2 = tee(gen, 2) # Make 2 copies (be careful with memory)
print(list(gen1)) # [0, 1, 2, 3, 4]
print(list(gen2)) # [0, 1, 2, 3, 4]
2. takewhile / dropwhile
from itertools import takewhile, dropwhile
numbers = [1, 3, 5, 2, 4, 6, 7, 9]
# Take while condition is True (stops at first False)
odd_prefix = list(takewhile(lambda x: x % 2 == 1, numbers))
print(odd_prefix) # [1, 3, 5] — stops when 2 is encountered
# Skip while condition is True, then yield the rest
after_odd = list(dropwhile(lambda x: x % 2 == 1, numbers))
print(after_odd) # [2, 4, 6, 7, 9]
3. Performance Summary
| Method | Best for | Memory |
|---|---|---|
| for loop | Complex logic, stateful | O(n) |
| list comprehension | Simple transform/filter | O(n) |
| generator expression | Large data, one-time iteration | O(1) |
| map/filter | Apply built-in functions, functional style | O(1) |
| itertools | Complex iterator composition | O(1) |
| NumPy | Numerical computation, matrices | O(n) but fast |