Profiling

Find bottlenecks and measure performance using cProfile, line_profiler, and memory_profiler.

Installation

pip install line-profiler memory-profiler pyinstrument

cProfile — CPU Profiler (Standard Library)

import cProfile
import pstats
import io


def slow_function():
    total = 0
    for i in range(1_000_000):
        total += i **2
    return total


def process_data(data: list[int]) -> list[int]:
    return [slow_function() for _ in data]


# Option 1: Command line
# python -m cProfile -s cumulative script.py

# Option 2: Inline profiling
profiler = cProfile.Profile()
profiler.enable()

result = process_data([1, 2, 3])

profiler.disable()

# Print results (top 20 functions sorted by cumulative time)
stream = io.StringIO()
stats = pstats.Stats(profiler, stream=stream)
stats.strip_dirs()
stats.sort_stats("cumulative")
stats.print_stats(20)
print(stream.getvalue())

# Option 3: Context manager
with cProfile.Profile() as pr:
    result = process_data([1, 2, 3])

pr.print_stats(sort="cumulative")

line_profiler — Line-by-Line Profiler

# Add @profile decorator and run with kernprof
# kernprof -l -v script.py

@profile  # type: ignore  # decorator injected by kernprof
def compute_stats(numbers: list[float]) -> dict:
    total = sum(numbers)           # How long does this line take?
    mean = total / len(numbers)

    variance = sum((x - mean) **2 for x in numbers) / len(numbers)
    std_dev = variance **0.5

    sorted_nums = sorted(numbers)
    median = sorted_nums[len(sorted_nums) // 2]

    return {"mean": mean, "std": std_dev, "median": median}


# Programmatic usage
from line_profiler import LineProfiler

def target_function(data):
    result = []
    for item in data:
        result.append(item **2)
    return result

lp = LineProfiler()
lp_wrapper = lp(target_function)
lp_wrapper(list(range(10000)))
lp.print_stats()

memory_profiler — Memory Profiler

# Add @profile decorator and run
# python -m memory_profiler script.py

@profile  # type: ignore
def memory_hungry():
    # Track memory usage
    big_list = [i for i in range(1_000_000)]       # ~8MB
    big_dict = {str(i): i for i in range(100_000)} # ~50MB
    del big_list                                    # freed
    return big_dict


# Programmatic usage
from memory_profiler import memory_usage

def my_func():
    return [i **2 for i in range(1_000_000)]

# Peak memory during execution (in MB)
mem = memory_usage((my_func,), interval=0.1)
print(f"Peak memory: {max(mem):.1f} MB")
print(f"Base memory: {min(mem):.1f} MB")

pyinstrument — Statistical Profiler (Visualization)

from pyinstrument import Profiler


def fibonacci(n: int) -> int:
    if n <= 1:
        return n
    return fibonacci(n - 1) + fibonacci(n - 2)


# Context manager
with Profiler() as profiler:
    fibonacci(35)

profiler.print()  # Console output (flame graph style)
# profiler.open_in_browser()  # Visualize in browser

# Command line: python -m pyinstrument script.py

tracemalloc — Memory Allocation Tracing (Standard Library)

import tracemalloc


def create_objects():
    data = [{"id": i, "value": i **2} for i in range(100_000)]
    return data


# Compare memory snapshots
tracemalloc.start()

snapshot1 = tracemalloc.take_snapshot()
result = create_objects()
snapshot2 = tracemalloc.take_snapshot()

# Top 10 memory increases
top_stats = snapshot2.compare_to(snapshot1, "lineno")
for stat in top_stats[:10]:
    print(stat)

tracemalloc.stop()

Profiling Workflow

# Real-world pattern: slow code → profile → optimize → re-measure

import time
import cProfile
import pstats
from functools import wraps
from typing import Callable


def timeit(func: Callable) -> Callable:
    """Execution time measurement decorator"""
    @wraps(func)
    def wrapper(*args, **kwargs):
        start = time.perf_counter()
        result = func(*args, **kwargs)
        elapsed = time.perf_counter() - start
        print(f"[{func.__name__}] {elapsed:.4f}s")
        return result
    return wrapper


@timeit
def before_optimization(n: int) -> list[int]:
    result = []
    for i in range(n):
        if str(i) not in [str(j) for j in range(i)]:  # O(n²) — very slow
            result.append(i)
    return result


@timeit
def after_optimization(n: int) -> list[int]:
    seen = set()  # O(1) lookup
    result = []
    for i in range(n):
        s = str(i)
        if s not in seen:
            seen.add(s)
            result.append(i)
    return result

Summary

Tool	Measures	Use Case
`cProfile`	CPU time per function	Locate bottleneck functions
`line_profiler`	CPU time per line	Detailed analysis within a function
`memory_profiler`	Memory increase per line	Find memory leaks
`pyinstrument`	Statistical CPU sampling	Quick visualization
`tracemalloc`	Memory allocation by object	Track allocation source

Optimization without measurement is just guessing. Always profile first, optimize second.

Installation​

cProfile — CPU Profiler (Standard Library)​

line_profiler — Line-by-Line Profiler​

memory_profiler — Memory Profiler​

pyinstrument — Statistical Profiler (Visualization)​

tracemalloc — Memory Allocation Tracing (Standard Library)​

Profiling Workflow​

Summary​

Installation

cProfile — CPU Profiler (Standard Library)

line_profiler — Line-by-Line Profiler

memory_profiler — Memory Profiler

pyinstrument — Statistical Profiler (Visualization)

tracemalloc — Memory Allocation Tracing (Standard Library)

Profiling Workflow

Summary