Skip to main content
Advertisement

Profiling

Find bottlenecks and measure performance using cProfile, line_profiler, and memory_profiler.


Installation

pip install line-profiler memory-profiler pyinstrument

cProfile — CPU Profiler (Standard Library)

import cProfile
import pstats
import io


def slow_function():
total = 0
for i in range(1_000_000):
total += i ** 2
return total


def process_data(data: list[int]) -> list[int]:
return [slow_function() for _ in data]


# Option 1: Command line
# python -m cProfile -s cumulative script.py

# Option 2: Inline profiling
profiler = cProfile.Profile()
profiler.enable()

result = process_data([1, 2, 3])

profiler.disable()

# Print results (top 20 functions sorted by cumulative time)
stream = io.StringIO()
stats = pstats.Stats(profiler, stream=stream)
stats.strip_dirs()
stats.sort_stats("cumulative")
stats.print_stats(20)
print(stream.getvalue())

# Option 3: Context manager
with cProfile.Profile() as pr:
result = process_data([1, 2, 3])

pr.print_stats(sort="cumulative")

line_profiler — Line-by-Line Profiler

# Add @profile decorator and run with kernprof
# kernprof -l -v script.py

@profile # type: ignore # decorator injected by kernprof
def compute_stats(numbers: list[float]) -> dict:
total = sum(numbers) # How long does this line take?
mean = total / len(numbers)

variance = sum((x - mean) ** 2 for x in numbers) / len(numbers)
std_dev = variance ** 0.5

sorted_nums = sorted(numbers)
median = sorted_nums[len(sorted_nums) // 2]

return {"mean": mean, "std": std_dev, "median": median}


# Programmatic usage
from line_profiler import LineProfiler

def target_function(data):
result = []
for item in data:
result.append(item ** 2)
return result

lp = LineProfiler()
lp_wrapper = lp(target_function)
lp_wrapper(list(range(10000)))
lp.print_stats()

memory_profiler — Memory Profiler

# Add @profile decorator and run
# python -m memory_profiler script.py

@profile # type: ignore
def memory_hungry():
# Track memory usage
big_list = [i for i in range(1_000_000)] # ~8MB
big_dict = {str(i): i for i in range(100_000)} # ~50MB
del big_list # freed
return big_dict


# Programmatic usage
from memory_profiler import memory_usage

def my_func():
return [i ** 2 for i in range(1_000_000)]

# Peak memory during execution (in MB)
mem = memory_usage((my_func,), interval=0.1)
print(f"Peak memory: {max(mem):.1f} MB")
print(f"Base memory: {min(mem):.1f} MB")

pyinstrument — Statistical Profiler (Visualization)

from pyinstrument import Profiler


def fibonacci(n: int) -> int:
if n <= 1:
return n
return fibonacci(n - 1) + fibonacci(n - 2)


# Context manager
with Profiler() as profiler:
fibonacci(35)

profiler.print() # Console output (flame graph style)
# profiler.open_in_browser() # Visualize in browser

# Command line: python -m pyinstrument script.py

tracemalloc — Memory Allocation Tracing (Standard Library)

import tracemalloc


def create_objects():
data = [{"id": i, "value": i ** 2} for i in range(100_000)]
return data


# Compare memory snapshots
tracemalloc.start()

snapshot1 = tracemalloc.take_snapshot()
result = create_objects()
snapshot2 = tracemalloc.take_snapshot()

# Top 10 memory increases
top_stats = snapshot2.compare_to(snapshot1, "lineno")
for stat in top_stats[:10]:
print(stat)

tracemalloc.stop()

Profiling Workflow

# Real-world pattern: slow code → profile → optimize → re-measure

import time
import cProfile
import pstats
from functools import wraps
from typing import Callable


def timeit(func: Callable) -> Callable:
"""Execution time measurement decorator"""
@wraps(func)
def wrapper(*args, **kwargs):
start = time.perf_counter()
result = func(*args, **kwargs)
elapsed = time.perf_counter() - start
print(f"[{func.__name__}] {elapsed:.4f}s")
return result
return wrapper


@timeit
def before_optimization(n: int) -> list[int]:
result = []
for i in range(n):
if str(i) not in [str(j) for j in range(i)]: # O(n²) — very slow
result.append(i)
return result


@timeit
def after_optimization(n: int) -> list[int]:
seen = set() # O(1) lookup
result = []
for i in range(n):
s = str(i)
if s not in seen:
seen.add(s)
result.append(i)
return result

Summary

ToolMeasuresUse Case
cProfileCPU time per functionLocate bottleneck functions
line_profilerCPU time per lineDetailed analysis within a function
memory_profilerMemory increase per lineFind memory leaks
pyinstrumentStatistical CPU samplingQuick visualization
tracemallocMemory allocation by objectTrack allocation source

Optimization without measurement is just guessing. Always profile first, optimize second.

Advertisement