C 확장과 대안

Cython, Numba, ctypes로 Python의 속도 한계를 극복합니다.

설치

pip install cython numba cffi

Numba — JIT 컴파일러 (가장 쉬운 방법)

from numba import njit, jit, prange
import numpy as np
import time


# ── 기본 @njit ────────────────────────────────────────────
@njit
def sum_squares(n: int) -> float:
    total = 0.0
    for i in range(n):
        total += i * i
    return total


# 첫 호출 시 컴파일 (워밍업)
sum_squares(100)

# 비교
start = time.perf_counter()
result_py = sum(i * i for i in range(10_000_000))
print(f"Python: {time.perf_counter() - start:.3f}s")

start = time.perf_counter()
result_nb = sum_squares(10_000_000)
print(f"Numba:  {time.perf_counter() - start:.3f}s")


# ── NumPy 배열 처리 ───────────────────────────────────────
@njit(parallel=True)  # 병렬 처리
def parallel_sum(arr: np.ndarray) -> float:
    total = 0.0
    for i in prange(len(arr)):  # prange = 병렬 range
        total += arr[i]
    return total


arr = np.random.random(10_000_000)
result = parallel_sum(arr)


# ── 벡터화 ufunc ──────────────────────────────────────────
from numba import vectorize, float64


@vectorize([float64(float64, float64)])
def clip_and_scale(x, threshold):
    if x > threshold:
        return threshold
    return x * 2.0


data = np.array([1.0, 3.0, 5.0, 2.0, 4.0])
result = clip_and_scale(data, 3.0)  # NumPy ufunc처럼 작동

Cython — Python을 C로 컴파일

# fibonacci.pyx — Cython 소스 파일
# cdef: C 타입 선언으로 속도 향상

# --- fibonacci.pyx ---
# def fib_py(n):          # 순수 Python 함수
#     if n <= 1: return n
#     return fib_py(n-1) + fib_py(n-2)
#
# cpdef long fib_c(long n):  # C 타입 선언
#     if n <= 1: return n
#     return fib_c(n-1) + fib_c(n-2)
#
# cdef long fib_internal(long n):  # C 전용 (Python에서 호출 불가)
#     if n <= 1: return n
#     return fib_internal(n-1) + fib_internal(n-2)

# --- setup.py ---
# from setuptools import setup
# from Cython.Build import cythonize
#
# setup(ext_modules=cythonize("fibonacci.pyx"))

# 빌드:
# python setup.py build_ext --inplace

# 사용:
# import fibonacci
# print(fibonacci.fib_c(40))

# optimized_math.pyx — 타입 어노테이션 완전 버전
import numpy as np
cimport numpy as cnp

def moving_average(cnp.ndarray[cnp.double_t, ndim=1] arr, int window):
    """이동 평균 — Cython 최적화"""
    cdef int n = len(arr)
    cdef cnp.ndarray[cnp.double_t, ndim=1] result = np.zeros(n)
    cdef double total = 0.0
    cdef int i

    for i in range(n):
        total += arr[i]
        if i >= window:
            total -= arr[i - window]
        if i >= window - 1:
            result[i] = total / window

    return result

ctypes — C 라이브러리 직접 호출

import ctypes
import ctypes.util
import os
import sys


# ── 표준 C 라이브러리 함수 호출 ───────────────────────────
if sys.platform == "win32":
    libc = ctypes.CDLL("msvcrt.dll")
else:
    libc = ctypes.CDLL(ctypes.util.find_library("c"))

# printf
libc.printf(b"Hello from C: %d\n", 42)

# ── 커스텀 C 라이브러리 ────────────────────────────────────
# mathlib.c:
# double add(double a, double b) { return a + b; }
# int factorial(int n) { return n <= 1 ? 1 : n * factorial(n-1); }

# 빌드: gcc -shared -fPIC -o mathlib.so mathlib.c

# lib = ctypes.CDLL("./mathlib.so")
# lib.add.argtypes = [ctypes.c_double, ctypes.c_double]
# lib.add.restype = ctypes.c_double
# print(lib.add(3.14, 2.72))

# ── 구조체 정의 ───────────────────────────────────────────
class Point(ctypes.Structure):
    _fields_ = [
        ("x", ctypes.c_double),
        ("y", ctypes.c_double),
    ]


class Rectangle(ctypes.Structure):
    _fields_ = [
        ("top_left",     Point),
        ("bottom_right", Point),
    ]


p = Point(1.0, 2.0)
print(f"Point({p.x}, {p.y})")

rect = Rectangle(Point(0, 0), Point(10, 5))
width  = rect.bottom_right.x - rect.top_left.x
height = rect.bottom_right.y - rect.top_left.y
print(f"Rectangle {width} x {height}")

# ── 배열 ──────────────────────────────────────────────────
IntArray5 = ctypes.c_int * 5
arr = IntArray5(10, 20, 30, 40, 50)
for i in range(5):
    print(arr[i])

cffi — 더 나은 C 인터페이스

from cffi import FFI

ffi = FFI()

# C 함수 시그니처 선언
ffi.cdef("""
    double sqrt(double x);
    int    abs(int n);
""")

# 라이브러리 로드
if __import__("sys").platform == "win32":
    lib = ffi.dlopen("msvcrt.dll")
else:
    lib = ffi.dlopen(None)  # 표준 라이브러리

result = lib.sqrt(16.0)
print(result)  # 4.0

neg = lib.abs(-42)
print(neg)     # 42

# 인라인 C 코드 컴파일
ffi_inline = FFI()
ffi_inline.cdef("int add(int a, int b);")

lib_inline = ffi_inline.verify("""
    int add(int a, int b) {
        return a + b;
    }
""")

print(lib_inline.add(3, 4))  # 7

성능 비교

import timeit
import numpy as np

N = 1_000_000

# Pure Python
def py_sum_sq(n):
    return sum(i * i for i in range(n))

# NumPy
def np_sum_sq(n):
    arr = np.arange(n, dtype=np.float64)
    return np.sum(arr * arr)

t_py = timeit.timeit(lambda: py_sum_sq(N), number=3)
t_np = timeit.timeit(lambda: np_sum_sq(N), number=3)

print(f"Python: {t_py:.3f}s")
print(f"NumPy:  {t_np:.3f}s  ({t_py/t_np:.0f}x faster)")
# Numba @njit 는 NumPy와 비슷하거나 더 빠름 (대규모 루프에서 특히)

정리

도구	난이도	속도 향상	적합한 상황
`Numba` @njit	쉬움	10~100x	수치 루프, NumPy 배열
`Cython`	중간	10~200x	복잡한 로직, 타입 선언
`ctypes`	어려움	N/A	기존 C 라이브러리 호출
`cffi`	중간	N/A	더 안전한 C 인터페이스

일반적인 추천 순서: NumPy/Pandas 벡터화 → Numba @njit → Cython → ctypes/cffi

설치​

Numba — JIT 컴파일러 (가장 쉬운 방법)​

Cython — Python을 C로 컴파일​

ctypes — C 라이브러리 직접 호출​

cffi — 더 나은 C 인터페이스​

성능 비교​

정리​

설치

Numba — JIT 컴파일러 (가장 쉬운 방법)

Cython — Python을 C로 컴파일

ctypes — C 라이브러리 직접 호출

cffi — 더 나은 C 인터페이스

성능 비교

정리