Memory Optimization

Reduce memory usage with __slots__, generators, and weak references.

`slots` — Remove Instance Dictionary

import sys
from typing import ClassVar


# ── Regular class vs __slots__ ───────────────────────────
class PointNormal:
    def __init__(self, x: float, y: float, z: float):
        self.x = x
        self.y = y
        self.z = z


class PointSlots:
    __slots__ = ("x", "y", "z")  # allowed attributes

    def __init__(self, x: float, y: float, z: float):
        self.x = x
        self.y = y
        self.z = z


p1 = PointNormal(1.0, 2.0, 3.0)
p2 = PointSlots(1.0, 2.0, 3.0)

print(sys.getsizeof(p1))  # ~48 bytes (+ __dict__ ~232 bytes)
print(sys.getsizeof(p2))  # ~64 bytes (no dictionary)
print(hasattr(p1, "__dict__"))  # True
print(hasattr(p2, "__dict__"))  # False

# Comparison with 1 million objects
normal_objects = [PointNormal(float(i), float(i), float(i)) for i in range(1_000_000)]
slots_objects  = [PointSlots(float(i), float(i), float(i)) for i in range(1_000_000)]

# Limitation: cannot add dynamic attributes
# p2.w = 4.0  # AttributeError!

dataclass + `slots` (Python 3.10+)

from dataclasses import dataclass


@dataclass(slots=True)   # Python 3.10+: automatically applies __slots__
class Vector3D:
    x: float
    y: float
    z: float

    def magnitude(self) -> float:
        return (self.x **2 + self.y **2 + self.z **2) **0.5

    def __add__(self, other: "Vector3D") -> "Vector3D":
        return Vector3D(self.x + other.x, self.y + other.y, self.z + other.z)


v1 = Vector3D(1.0, 2.0, 3.0)
v2 = Vector3D(4.0, 5.0, 6.0)
print(v1 + v2)         # Vector3D(x=5.0, y=7.0, z=9.0)
print(v1.magnitude())  # 3.74...


# frozen=True: immutable object (hashable, safer)
@dataclass(frozen=True, slots=True)
class ImmutablePoint:
    x: float
    y: float

Weak References — Prevent Circular References

import weakref
import gc


class Node:
    def __init__(self, value: int):
        self.value = value
        self.children: list["Node"] = []
        self._parent: weakref.ref["Node"] | None = None

    @property
    def parent(self) -> "Node | None":
        return self._parent() if self._parent else None

    @parent.setter
    def parent(self, node: "Node") -> None:
        self._parent = weakref.ref(node)  # weak reference → no circular ref


root = Node(0)
child = Node(1)
child.parent = root
root.children.append(child)

# When root is deleted, child.parent() returns None
del root
print(child.parent)  # None (weak reference → GC collected)

# WeakValueDictionary — entries auto-removed when value is GC'd
cache: weakref.WeakValueDictionary[str, Node] = weakref.WeakValueDictionary()
node = Node(42)
cache["node42"] = node
print("node42" in cache)  # True
del node
gc.collect()
print("node42" in cache)  # False (auto-removed)

array Module — Typed Arrays

import array
import sys

# list — Python objects (high overhead)
py_list = list(range(1_000_000))
print(f"list:  {sys.getsizeof(py_list) / 1024 / 1024:.1f} MB")  # ~8 MB

# array — raw C bytes
arr = array.array("d", range(1_000_000))  # "d" = double (8 bytes)
print(f"array: {sys.getsizeof(arr) / 1024 / 1024:.1f} MB")      # smaller header

# Type codes
# "b" = signed char (1 byte), "i" = int (4 bytes), "d" = double (8 bytes)
# "f" = float (4 bytes), "l" = long (8 bytes)

int_arr = array.array("i", [1, 2, 3, 4, 5])
int_arr.append(6)
int_arr.extend([7, 8, 9])
print(sum(int_arr))

NumPy vs list Memory Comparison

import numpy as np
import sys

# Python list of floats
py_list = [float(i) for i in range(1_000_000)]

# NumPy array
np_arr = np.arange(1_000_000, dtype=np.float64)

py_mem = sum(sys.getsizeof(x) for x in py_list) + sys.getsizeof(py_list)
np_mem = np_arr.nbytes

print(f"Python list: {py_mem / 1024 / 1024:.1f} MB")  # ~28 MB
print(f"NumPy array: {np_mem / 1024 / 1024:.1f} MB")  # ~8 MB

# Choose the right NumPy dtype
data_int64 = np.zeros(1_000_000, dtype=np.int64)   # 8MB
data_int32 = np.zeros(1_000_000, dtype=np.int32)   # 4MB
data_int16 = np.zeros(1_000_000, dtype=np.int16)   # 2MB (range: -32768~32767)
data_int8  = np.zeros(1_000_000, dtype=np.int8)    # 1MB (range: -128~127)

# dtype downcasting with Pandas
import pandas as pd

df = pd.DataFrame({"value": range(100_000)})
print(df.dtypes)           # int64
df["value"] = pd.to_numeric(df["value"], downcast="integer")
print(df.dtypes)           # int8 or int16 (minimum type that fits the range)

Generator Pipeline

from pathlib import Path


# Stream large files without loading everything into memory
def read_lines(filepath: str):
    """Stream file lines"""
    with open(filepath, encoding="utf-8") as f:
        yield from f


def parse_csv_row(lines):
    """Parse CSV"""
    for line in lines:
        yield line.strip().split(",")


def filter_valid(rows):
    """Pass only valid rows"""
    for row in rows:
        if len(row) >= 3 and row[0].strip():
            yield row


def transform(rows):
    """Transform data"""
    for row in rows:
        yield {
            "id": int(row[0]),
            "name": row[1].strip(),
            "value": float(row[2]),
        }


# Pipeline — processes one line at a time
def process_large_file(filepath: str):
    pipeline = transform(
        filter_valid(
            parse_csv_row(
                read_lines(filepath)
            )
        )
    )
    for record in pipeline:
        yield record  # or save to DB, send to API, etc.

Summary

Technique	Memory Savings	When to Apply
`__slots__`	20–50% per object	Many instances of the same class
`weakref`	Removes circular refs	Parent-child relationships, caches
`array` module	60–80% vs list	Simple numeric arrays
NumPy dtype optimization	50–87%	When data range is narrow
Generator pipeline	99%+ (streaming)	Large file / data processing

__slots__ — Remove Instance Dictionary​

dataclass + __slots__ (Python 3.10+)​

Weak References — Prevent Circular References​

array Module — Typed Arrays​

NumPy vs list Memory Comparison​

Generator Pipeline​

Summary​

`slots` — Remove Instance Dictionary

dataclass + `slots` (Python 3.10+)

Weak References — Prevent Circular References

array Module — Typed Arrays

NumPy vs list Memory Comparison

Generator Pipeline

Summary