메모리 최적화

__slots__, 제너레이터, 약한 참조로 메모리 사용량을 줄입니다.

`slots` — 인스턴스 딕셔너리 제거

import sys
from typing import ClassVar


# ── 일반 클래스 vs __slots__ ─────────────────────────────
class PointNormal:
    def __init__(self, x: float, y: float, z: float):
        self.x = x
        self.y = y
        self.z = z


class PointSlots:
    __slots__ = ("x", "y", "z")  # 허용 속성 목록

    def __init__(self, x: float, y: float, z: float):
        self.x = x
        self.y = y
        self.z = z


p1 = PointNormal(1.0, 2.0, 3.0)
p2 = PointSlots(1.0, 2.0, 3.0)

print(sys.getsizeof(p1))  # ~48 bytes (+ __dict__ ~232 bytes)
print(sys.getsizeof(p2))  # ~64 bytes (딕셔너리 없음)
print(hasattr(p1, "__dict__"))  # True
print(hasattr(p2, "__dict__"))  # False

# 100만 개 객체 비교
normal_objects = [PointNormal(float(i), float(i), float(i)) for i in range(1_000_000)]
slots_objects  = [PointSlots(float(i), float(i), float(i)) for i in range(1_000_000)]

# __slots__ 제약: 동적 속성 추가 불가
# p2.w = 4.0  # AttributeError!

dataclass + `slots` (Python 3.10+)

from dataclasses import dataclass


@dataclass(slots=True)   # Python 3.10+: 자동으로 __slots__ 적용
class Vector3D:
    x: float
    y: float
    z: float

    def magnitude(self) -> float:
        return (self.x **2 + self.y **2 + self.z **2) **0.5

    def __add__(self, other: "Vector3D") -> "Vector3D":
        return Vector3D(self.x + other.x, self.y + other.y, self.z + other.z)


v1 = Vector3D(1.0, 2.0, 3.0)
v2 = Vector3D(4.0, 5.0, 6.0)
print(v1 + v2)         # Vector3D(x=5.0, y=7.0, z=9.0)
print(v1.magnitude())  # 3.74...


# frozen=True: 불변 객체 (hashable, 더 안전)
@dataclass(frozen=True, slots=True)
class ImmutablePoint:
    x: float
    y: float

약한 참조 (weakref) — 순환 참조 방지

import weakref
import gc


class Node:
    def __init__(self, value: int):
        self.value = value
        self.children: list["Node"] = []
        self._parent: weakref.ref["Node"] | None = None

    @property
    def parent(self) -> "Node | None":
        return self._parent() if self._parent else None

    @parent.setter
    def parent(self, node: "Node") -> None:
        self._parent = weakref.ref(node)  # 약한 참조 → 순환 참조 없음


root = Node(0)
child = Node(1)
child.parent = root
root.children.append(child)

# root가 삭제되면 child.parent()는 None 반환
del root
print(child.parent)  # None (약한 참조 → GC 수거됨)

# WeakValueDictionary — 값이 다른 곳에서 참조되지 않으면 자동 제거
cache: weakref.WeakValueDictionary[str, Node] = weakref.WeakValueDictionary()
node = Node(42)
cache["node42"] = node
print("node42" in cache)  # True
del node
gc.collect()
print("node42" in cache)  # False (자동 제거)

array 모듈 — 타입화 배열

import array
import sys

# list — 파이썬 객체 (오버헤드 큼)
py_list = list(range(1_000_000))
print(f"list:  {sys.getsizeof(py_list) / 1024 / 1024:.1f} MB")  # ~8 MB

# array — C 배열 (raw bytes)
arr = array.array("d", range(1_000_000))  # "d" = double (8 bytes)
print(f"array: {sys.getsizeof(arr) / 1024 / 1024:.1f} MB")      # ~8 MB (헤더 절약)

# 타입 코드
# "b" = signed char (1 byte), "i" = int (4 bytes), "d" = double (8 bytes)
# "f" = float (4 bytes), "l" = long (8 bytes)

int_arr = array.array("i", [1, 2, 3, 4, 5])
int_arr.append(6)
int_arr.extend([7, 8, 9])
print(sum(int_arr))

NumPy vs list 메모리 비교

import numpy as np
import sys

# Python list of floats
py_list = [float(i) for i in range(1_000_000)]

# NumPy array
np_arr = np.arange(1_000_000, dtype=np.float64)

py_mem = sum(sys.getsizeof(x) for x in py_list) + sys.getsizeof(py_list)
np_mem = np_arr.nbytes

print(f"Python list: {py_mem / 1024 / 1024:.1f} MB")  # ~28 MB
print(f"NumPy array: {np_mem / 1024 / 1024:.1f} MB")  # ~8 MB

# 메모리 효율적인 NumPy dtype 선택
data_int64 = np.zeros(1_000_000, dtype=np.int64)   # 8MB
data_int32 = np.zeros(1_000_000, dtype=np.int32)   # 4MB
data_int16 = np.zeros(1_000_000, dtype=np.int16)   # 2MB (범위: -32768~32767)
data_int8  = np.zeros(1_000_000, dtype=np.int8)    # 1MB (범위: -128~127)

# dtype 다운캐스팅
import pandas as pd

df = pd.DataFrame({"value": range(100_000)})
print(df.dtypes)           # int64
df["value"] = pd.to_numeric(df["value"], downcast="integer")
print(df.dtypes)           # int8 or int16 (범위 맞는 최소 타입)

제너레이터 파이프라인

from pathlib import Path


# 전체 파일을 메모리에 올리지 않고 스트리밍 처리
def read_lines(filepath: str):
    """파일 라인 스트리밍"""
    with open(filepath, encoding="utf-8") as f:
        yield from f


def parse_csv_row(lines):
    """CSV 파싱"""
    for line in lines:
        yield line.strip().split(",")


def filter_valid(rows):
    """유효한 행만 통과"""
    for row in rows:
        if len(row) >= 3 and row[0].strip():
            yield row


def transform(rows):
    """데이터 변환"""
    for row in rows:
        yield {
            "id": int(row[0]),
            "name": row[1].strip(),
            "value": float(row[2]),
        }


# 파이프라인 조합 — 한 번에 한 줄씩 처리
def process_large_file(filepath: str):
    pipeline = transform(
        filter_valid(
            parse_csv_row(
                read_lines(filepath)
            )
        )
    )
    for record in pipeline:
        yield record  # 또는 DB 저장, API 전송 등

정리

기법	메모리 절약	적용 조건
`__slots__`	20~50% (객체당)	다수의 동일 클래스 인스턴스
`weakref`	순환 참조 제거	부모-자식, 캐시
`array` 모듈	60~80% vs list	숫자 데이터 단순 배열
NumPy dtype 최적화	50~87%	데이터 범위가 좁을 때
제너레이터 파이프라인	99%+ (스트리밍)	대용량 파일/데이터 처리

__slots__ — 인스턴스 딕셔너리 제거​

dataclass + __slots__ (Python 3.10+)​

약한 참조 (weakref) — 순환 참조 방지​

array 모듈 — 타입화 배열​

NumPy vs list 메모리 비교​

제너레이터 파이프라인​

정리​

`slots` — 인스턴스 딕셔너리 제거

dataclass + `slots` (Python 3.10+)

약한 참조 (weakref) — 순환 참조 방지

array 모듈 — 타입화 배열

NumPy vs list 메모리 비교

제너레이터 파이프라인

정리