Skip to main content
Advertisement

C Extensions and Alternatives

Overcome Python's speed limits with Cython, Numba, and ctypes.


Installation

pip install cython numba cffi

Numba — JIT Compiler (Easiest Approach)

from numba import njit, jit, prange
import numpy as np
import time


# ── Basic @njit ───────────────────────────────────────────
@njit
def sum_squares(n: int) -> float:
total = 0.0
for i in range(n):
total += i * i
return total


# First call triggers compilation (warmup)
sum_squares(100)

# Comparison
start = time.perf_counter()
result_py = sum(i * i for i in range(10_000_000))
print(f"Python: {time.perf_counter() - start:.3f}s")

start = time.perf_counter()
result_nb = sum_squares(10_000_000)
print(f"Numba: {time.perf_counter() - start:.3f}s")


# ── NumPy array processing ────────────────────────────────
@njit(parallel=True) # parallel execution
def parallel_sum(arr: np.ndarray) -> float:
total = 0.0
for i in prange(len(arr)): # prange = parallel range
total += arr[i]
return total


arr = np.random.random(10_000_000)
result = parallel_sum(arr)


# ── Vectorized ufunc ──────────────────────────────────────
from numba import vectorize, float64


@vectorize([float64(float64, float64)])
def clip_and_scale(x, threshold):
if x > threshold:
return threshold
return x * 2.0


data = np.array([1.0, 3.0, 5.0, 2.0, 4.0])
result = clip_and_scale(data, 3.0) # works like a NumPy ufunc

Cython — Compile Python to C

# fibonacci.pyx — Cython source file
# cdef: C type declarations for speed

# --- fibonacci.pyx ---
# def fib_py(n): # pure Python function
# if n <= 1: return n
# return fib_py(n-1) + fib_py(n-2)
#
# cpdef long fib_c(long n): # C type declaration
# if n <= 1: return n
# return fib_c(n-1) + fib_c(n-2)
#
# cdef long fib_internal(long n): # C-only (not callable from Python)
# if n <= 1: return n
# return fib_internal(n-1) + fib_internal(n-2)

# --- setup.py ---
# from setuptools import setup
# from Cython.Build import cythonize
#
# setup(ext_modules=cythonize("fibonacci.pyx"))

# Build:
# python setup.py build_ext --inplace

# Usage:
# import fibonacci
# print(fibonacci.fib_c(40))
# optimized_math.pyx — fully typed version
import numpy as np
cimport numpy as cnp

def moving_average(cnp.ndarray[cnp.double_t, ndim=1] arr, int window):
"""Moving average — Cython optimized"""
cdef int n = len(arr)
cdef cnp.ndarray[cnp.double_t, ndim=1] result = np.zeros(n)
cdef double total = 0.0
cdef int i

for i in range(n):
total += arr[i]
if i >= window:
total -= arr[i - window]
if i >= window - 1:
result[i] = total / window

return result

ctypes — Call C Libraries Directly

import ctypes
import ctypes.util
import os
import sys


# ── Standard C library functions ─────────────────────────
if sys.platform == "win32":
libc = ctypes.CDLL("msvcrt.dll")
else:
libc = ctypes.CDLL(ctypes.util.find_library("c"))

# printf
libc.printf(b"Hello from C: %d\n", 42)

# ── Custom C library ──────────────────────────────────────
# mathlib.c:
# double add(double a, double b) { return a + b; }
# int factorial(int n) { return n <= 1 ? 1 : n * factorial(n-1); }

# Build: gcc -shared -fPIC -o mathlib.so mathlib.c

# lib = ctypes.CDLL("./mathlib.so")
# lib.add.argtypes = [ctypes.c_double, ctypes.c_double]
# lib.add.restype = ctypes.c_double
# print(lib.add(3.14, 2.72))

# ── Struct definition ─────────────────────────────────────
class Point(ctypes.Structure):
_fields_ = [
("x", ctypes.c_double),
("y", ctypes.c_double),
]


class Rectangle(ctypes.Structure):
_fields_ = [
("top_left", Point),
("bottom_right", Point),
]


p = Point(1.0, 2.0)
print(f"Point({p.x}, {p.y})")

rect = Rectangle(Point(0, 0), Point(10, 5))
width = rect.bottom_right.x - rect.top_left.x
height = rect.bottom_right.y - rect.top_left.y
print(f"Rectangle {width} x {height}")

# ── Arrays ────────────────────────────────────────────────
IntArray5 = ctypes.c_int * 5
arr = IntArray5(10, 20, 30, 40, 50)
for i in range(5):
print(arr[i])

cffi — Better C Interface

from cffi import FFI

ffi = FFI()

# Declare C function signatures
ffi.cdef("""
double sqrt(double x);
int abs(int n);
""")

# Load library
if __import__("sys").platform == "win32":
lib = ffi.dlopen("msvcrt.dll")
else:
lib = ffi.dlopen(None) # standard library

result = lib.sqrt(16.0)
print(result) # 4.0

neg = lib.abs(-42)
print(neg) # 42

# Inline C compilation
ffi_inline = FFI()
ffi_inline.cdef("int add(int a, int b);")

lib_inline = ffi_inline.verify("""
int add(int a, int b) {
return a + b;
}
""")

print(lib_inline.add(3, 4)) # 7

Performance Comparison

import timeit
import numpy as np

N = 1_000_000

# Pure Python
def py_sum_sq(n):
return sum(i * i for i in range(n))

# NumPy
def np_sum_sq(n):
arr = np.arange(n, dtype=np.float64)
return np.sum(arr * arr)

t_py = timeit.timeit(lambda: py_sum_sq(N), number=3)
t_np = timeit.timeit(lambda: np_sum_sq(N), number=3)

print(f"Python: {t_py:.3f}s")
print(f"NumPy: {t_np:.3f}s ({t_py/t_np:.0f}x faster)")
# Numba @njit is comparable to or faster than NumPy for heavy loops

Summary

ToolDifficultySpeedupBest For
Numba @njitEasy10–100xNumeric loops, NumPy arrays
CythonMedium10–200xComplex logic, type declarations
ctypesHardN/ACalling existing C libraries
cffiMediumN/ASafer C interface

Recommended order: NumPy/Pandas vectorization → Numba @njit → Cython → ctypes/cffi

Advertisement