Code Optimization

Speed up Python code through algorithm selection, built-in functions, and data structure optimization.

Prefer Built-in Functions and Libraries

import timeit

data = list(range(100_000))

# ❌ Slow — Python loop
def manual_sum(lst):
    total = 0
    for x in lst:
        total += x
    return total

# ✅ Fast — C-implemented built-in
total = sum(data)

# Comparison
print(timeit.timeit(lambda: manual_sum(data), number=100))  # ~1.5s
print(timeit.timeit(lambda: sum(data), number=100))          # ~0.2s

# Common optimization patterns
numbers = [3, 1, 4, 1, 5, 9, 2, 6]

# map / filter (C-level iteration)
squares = list(map(lambda x: x **2, numbers))
evens = list(filter(lambda x: x % 2 == 0, numbers))

# sorted + key
words = ["banana", "apple", "cherry", "date"]
by_length = sorted(words, key=len)
by_last = sorted(words, key=lambda w: w[-1])

# any / all (short-circuit evaluation)
has_negative = any(x < 0 for x in numbers)
all_positive = all(x > 0 for x in numbers)

# max / min with key
longest = max(words, key=len)

List Comprehension vs Generator

import sys

# List comprehension — stores all results in memory
squares_list = [x **2 for x in range(1_000_000)]
print(sys.getsizeof(squares_list))  # ~8MB

# Generator — lazy evaluation, memory efficient
squares_gen = (x **2 for x in range(1_000_000))
print(sys.getsizeof(squares_gen))   # ~120 bytes

# Generator function
def chunked(lst: list, size: int):
    """Split list into chunks of given size"""
    for i in range(0, len(lst), size):
        yield lst[i:i + size]

for chunk in chunked(list(range(100)), 10):
    print(chunk)  # Process 10 at a time

# itertools utilities
import itertools

# chain — concatenate iterables without copying
combined = list(itertools.chain([1, 2], [3, 4], [5, 6]))

# islice — slicing with generator support
first_5 = list(itertools.islice(squares_gen, 5))

# groupby — group consecutive elements
data = [("A", 1), ("A", 2), ("B", 3), ("B", 4), ("A", 5)]
for key, group in itertools.groupby(data, key=lambda x: x[0]):
    print(key, list(group))

Data Structure Selection

import timeit
from collections import deque, defaultdict, Counter, OrderedDict

# ── list vs deque ─────────────────────────────────────────
# list: insert/delete at front is O(n)
lst = list(range(100_000))
t1 = timeit.timeit(lambda: lst.insert(0, -1), number=1000)

# deque: O(1) insert/delete on both ends
dq = deque(range(100_000))
t2 = timeit.timeit(lambda: dq.appendleft(-1), number=1000)
print(f"list insert: {t1:.4f}s | deque appendleft: {t2:.4f}s")

# ── dict vs set for membership ────────────────────────────
big_list = list(range(100_000))
big_set = set(big_list)

t1 = timeit.timeit(lambda: 99999 in big_list, number=1000)  # O(n)
t2 = timeit.timeit(lambda: 99999 in big_set, number=1000)   # O(1)
print(f"list in: {t1:.6f}s | set in: {t2:.6f}s")

# ── defaultdict ───────────────────────────────────────────
# ❌ Verbose approach
word_count = {}
words = "the quick brown fox jumps over the lazy dog".split()
for word in words:
    if word not in word_count:
        word_count[word] = 0
    word_count[word] += 1

# ✅ defaultdict
word_count = defaultdict(int)
for word in words:
    word_count[word] += 1

# ✅ Counter (most concise)
word_count = Counter(words)
top3 = word_count.most_common(3)

# ── heapq — Priority Queue ────────────────────────────────
import heapq

data = [5, 3, 8, 1, 9, 2, 7]
heapq.heapify(data)           # O(n)
smallest = heapq.heappop(data)  # O(log n)
heapq.heappush(data, 4)       # O(log n)

top_3 = heapq.nlargest(3, data)    # Top 3
low_3 = heapq.nsmallest(3, data)   # Bottom 3

String Optimization

import timeit

parts = [f"item_{i}" for i in range(10_000)]

# ❌ + operator — O(n²) memory copies
def concat_plus(parts):
    result = ""
    for part in parts:
        result += part
    return result

# ✅ join — O(n) single allocation
def concat_join(parts):
    return "".join(parts)

t1 = timeit.timeit(lambda: concat_plus(parts), number=10)
t2 = timeit.timeit(lambda: concat_join(parts), number=10)
print(f"concat +: {t1:.4f}s | join: {t2:.4f}s")

# f-string vs format vs %
name, score = "Alice", 95.5

s1 = f"Name: {name}, Score: {score:.1f}"   # fastest
s2 = "Name: {}, Score: {:.1f}".format(name, score)
s3 = "Name: %s, Score: %.1f" % (name, score)

# Large string building — io.StringIO
import io

buffer = io.StringIO()
for i in range(10_000):
    buffer.write(f"line {i}\n")
result = buffer.getvalue()

Caching and Memoization

from functools import lru_cache, cache
import timeit


# lru_cache — cache recent call results
@lru_cache(maxsize=128)
def fibonacci(n: int) -> int:
    if n <= 1:
        return n
    return fibonacci(n - 1) + fibonacci(n - 2)


# cache (Python 3.9+) — unbounded lru_cache
@cache
def expensive_computation(x: int, y: int) -> float:
    import math
    return math.sqrt(x **2 + y **2)


# Class method caching
from functools import cached_property


class Circle:
    def __init__(self, radius: float):
        self.radius = radius

    @cached_property  # computed on first access, cached thereafter
    def area(self) -> float:
        import math
        return math.pi * self.radius **2

    @cached_property
    def circumference(self) -> float:
        import math
        return 2 * math.pi * self.radius


c = Circle(5.0)
print(c.area)         # computed
print(c.area)         # from cache

# TTL cache (time-based expiry)
import time
from typing import Any


class TTLCache:
    def __init__(self, ttl: float = 60.0):
        self._cache: dict[str, tuple[Any, float]] = {}
        self.ttl = ttl

    def get(self, key: str) -> Any | None:
        if key in self._cache:
            value, expires_at = self._cache[key]
            if time.monotonic() < expires_at:
                return value
            del self._cache[key]
        return None

    def set(self, key: str, value: Any) -> None:
        self._cache[key] = (value, time.monotonic() + self.ttl)

Loop Optimization

# ── Use local variable bindings ───────────────────────────
import math

# ❌ Repeated global attribute lookup
def slow_math(data):
    return [math.sqrt(x) for x in data]

# ✅ Bind to local variable
def fast_math(data):
    sqrt = math.sqrt  # one attribute lookup
    return [sqrt(x) for x in data]

# ── Condition ordering ────────────────────────────────────
# Fast conditions (often True) → first (short-circuit evaluation)
def process(items):
    return [
        item for item in items
        if item > 0          # passes most → first
        and item % 2 == 0    # passes some → second
        and is_prime(item)   # expensive → last
    ]

def is_prime(n):
    if n < 2: return False
    for i in range(2, int(n **0.5) + 1):
        if n % i == 0: return False
    return True

# ── zip / enumerate ───────────────────────────────────────
names = ["Alice", "Bob", "Charlie"]
scores = [95, 87, 92]

# ❌ Index access
for i in range(len(names)):
    print(f"{names[i]}: {scores[i]}")

# ✅ zip
for name, score in zip(names, scores):
    print(f"{name}: {score}")

# ✅ enumerate
for i, name in enumerate(names, start=1):
    print(f"{i}. {name}")

Summary

Technique	Improvement	When to Apply
Built-in functions first	5–10x	When loops can be replaced
Generator	Memory savings	Large data processing
set/dict lookup	O(n) → O(1)	Membership tests
`"".join()`	O(n²) → O(n)	String accumulation
`lru_cache`	Eliminate repeated work	Pure functions, recursion
Local variable binding	5–15%	Tight inner loops

Prefer Built-in Functions and Libraries​

List Comprehension vs Generator​

Data Structure Selection​

String Optimization​

Caching and Memoization​

Loop Optimization​

Summary​

Prefer Built-in Functions and Libraries

List Comprehension vs Generator

Data Structure Selection

String Optimization

Caching and Memoization

Loop Optimization

Summary