Code Optimization
Speed up Python code through algorithm selection, built-in functions, and data structure optimization.
Prefer Built-in Functions and Libraries
import timeit
data = list(range(100_000))
# ❌ Slow — Python loop
def manual_sum(lst):
total = 0
for x in lst:
total += x
return total
# ✅ Fast — C-implemented built-in
total = sum(data)
# Comparison
print(timeit.timeit(lambda: manual_sum(data), number=100)) # ~1.5s
print(timeit.timeit(lambda: sum(data), number=100)) # ~0.2s
# Common optimization patterns
numbers = [3, 1, 4, 1, 5, 9, 2, 6]
# map / filter (C-level iteration)
squares = list(map(lambda x: x ** 2, numbers))
evens = list(filter(lambda x: x % 2 == 0, numbers))
# sorted + key
words = ["banana", "apple", "cherry", "date"]
by_length = sorted(words, key=len)
by_last = sorted(words, key=lambda w: w[-1])
# any / all (short-circuit evaluation)
has_negative = any(x < 0 for x in numbers)
all_positive = all(x > 0 for x in numbers)
# max / min with key
longest = max(words, key=len)
List Comprehension vs Generator
import sys
# List comprehension — stores all results in memory
squares_list = [x ** 2 for x in range(1_000_000)]
print(sys.getsizeof(squares_list)) # ~8MB
# Generator — lazy evaluation, memory efficient
squares_gen = (x ** 2 for x in range(1_000_000))
print(sys.getsizeof(squares_gen)) # ~120 bytes
# Generator function
def chunked(lst: list, size: int):
"""Split list into chunks of given size"""
for i in range(0, len(lst), size):
yield lst[i:i + size]
for chunk in chunked(list(range(100)), 10):
print(chunk) # Process 10 at a time
# itertools utilities
import itertools
# chain — concatenate iterables without copying
combined = list(itertools.chain([1, 2], [3, 4], [5, 6]))
# islice — slicing with generator support
first_5 = list(itertools.islice(squares_gen, 5))
# groupby — group consecutive elements
data = [("A", 1), ("A", 2), ("B", 3), ("B", 4), ("A", 5)]
for key, group in itertools.groupby(data, key=lambda x: x[0]):
print(key, list(group))
Data Structure Selection
import timeit
from collections import deque, defaultdict, Counter, OrderedDict
# ── list vs deque ─────────────────────────────────────────
# list: insert/delete at front is O(n)
lst = list(range(100_000))
t1 = timeit.timeit(lambda: lst.insert(0, -1), number=1000)
# deque: O(1) insert/delete on both ends
dq = deque(range(100_000))
t2 = timeit.timeit(lambda: dq.appendleft(-1), number=1000)
print(f"list insert: {t1:.4f}s | deque appendleft: {t2:.4f}s")
# ── dict vs set for membership ────────────────────────────
big_list = list(range(100_000))
big_set = set(big_list)
t1 = timeit.timeit(lambda: 99999 in big_list, number=1000) # O(n)
t2 = timeit.timeit(lambda: 99999 in big_set, number=1000) # O(1)
print(f"list in: {t1:.6f}s | set in: {t2:.6f}s")
# ── defaultdict ───────────────────────────────────────────
# ❌ Verbose approach
word_count = {}
words = "the quick brown fox jumps over the lazy dog".split()
for word in words:
if word not in word_count:
word_count[word] = 0
word_count[word] += 1
# ✅ defaultdict
word_count = defaultdict(int)
for word in words:
word_count[word] += 1
# ✅ Counter (most concise)
word_count = Counter(words)
top3 = word_count.most_common(3)
# ── heapq — Priority Queue ────────────────────────────────
import heapq
data = [5, 3, 8, 1, 9, 2, 7]
heapq.heapify(data) # O(n)
smallest = heapq.heappop(data) # O(log n)
heapq.heappush(data, 4) # O(log n)
top_3 = heapq.nlargest(3, data) # Top 3
low_3 = heapq.nsmallest(3, data) # Bottom 3
String Optimization
import timeit
parts = [f"item_{i}" for i in range(10_000)]
# ❌ + operator — O(n²) memory copies
def concat_plus(parts):
result = ""
for part in parts:
result += part
return result
# ✅ join — O(n) single allocation
def concat_join(parts):
return "".join(parts)
t1 = timeit.timeit(lambda: concat_plus(parts), number=10)
t2 = timeit.timeit(lambda: concat_join(parts), number=10)
print(f"concat +: {t1:.4f}s | join: {t2:.4f}s")
# f-string vs format vs %
name, score = "Alice", 95.5
s1 = f"Name: {name}, Score: {score:.1f}" # fastest
s2 = "Name: {}, Score: {:.1f}".format(name, score)
s3 = "Name: %s, Score: %.1f" % (name, score)
# Large string building — io.StringIO
import io
buffer = io.StringIO()
for i in range(10_000):
buffer.write(f"line {i}\n")
result = buffer.getvalue()
Caching and Memoization
from functools import lru_cache, cache
import timeit
# lru_cache — cache recent call results
@lru_cache(maxsize=128)
def fibonacci(n: int) -> int:
if n <= 1:
return n
return fibonacci(n - 1) + fibonacci(n - 2)
# cache (Python 3.9+) — unbounded lru_cache
@cache
def expensive_computation(x: int, y: int) -> float:
import math
return math.sqrt(x ** 2 + y ** 2)
# Class method caching
from functools import cached_property
class Circle:
def __init__(self, radius: float):
self.radius = radius
@cached_property # computed on first access, cached thereafter
def area(self) -> float:
import math
return math.pi * self.radius ** 2
@cached_property
def circumference(self) -> float:
import math
return 2 * math.pi * self.radius
c = Circle(5.0)
print(c.area) # computed
print(c.area) # from cache
# TTL cache (time-based expiry)
import time
from typing import Any
class TTLCache:
def __init__(self, ttl: float = 60.0):
self._cache: dict[str, tuple[Any, float]] = {}
self.ttl = ttl
def get(self, key: str) -> Any | None:
if key in self._cache:
value, expires_at = self._cache[key]
if time.monotonic() < expires_at:
return value
del self._cache[key]
return None
def set(self, key: str, value: Any) -> None:
self._cache[key] = (value, time.monotonic() + self.ttl)
Loop Optimization
# ── Use local variable bindings ───────────────────────────
import math
# ❌ Repeated global attribute lookup
def slow_math(data):
return [math.sqrt(x) for x in data]
# ✅ Bind to local variable
def fast_math(data):
sqrt = math.sqrt # one attribute lookup
return [sqrt(x) for x in data]
# ── Condition ordering ────────────────────────────────────
# Fast conditions (often True) → first (short-circuit evaluation)
def process(items):
return [
item for item in items
if item > 0 # passes most → first
and item % 2 == 0 # passes some → second
and is_prime(item) # expensive → last
]
def is_prime(n):
if n < 2: return False
for i in range(2, int(n ** 0.5) + 1):
if n % i == 0: return False
return True
# ── zip / enumerate ───────────────────────────────────────
names = ["Alice", "Bob", "Charlie"]
scores = [95, 87, 92]
# ❌ Index access
for i in range(len(names)):
print(f"{names[i]}: {scores[i]}")
# ✅ zip
for name, score in zip(names, scores):
print(f"{name}: {score}")
# ✅ enumerate
for i, name in enumerate(names, start=1):
print(f"{i}. {name}")
Summary
| Technique | Improvement | When to Apply |
|---|---|---|
| Built-in functions first | 5–10x | When loops can be replaced |
| Generator | Memory savings | Large data processing |
| set/dict lookup | O(n) → O(1) | Membership tests |
"".join() | O(n²) → O(n) | String accumulation |
lru_cache | Eliminate repeated work | Pure functions, recursion |
| Local variable binding | 5–15% | Tight inner loops |