Skip to main content
Advertisement

Dataclasses

@dataclass (Python 3.7+) is a decorator that lets you define classes primarily for storing data concisely. It automatically generates boilerplate methods like __init__, __repr__, and __eq__.


@dataclass Basics

from dataclasses import dataclass


# Old way — repetitive boilerplate
class PointOld:
def __init__(self, x: float, y: float):
self.x = x
self.y = y

def __repr__(self) -> str:
return f"Point(x={self.x}, y={self.y})"

def __eq__(self, other: object) -> bool:
if not isinstance(other, PointOld):
return NotImplemented
return self.x == other.x and self.y == other.y


# @dataclass way — much more concise
@dataclass
class Point:
x: float
y: float


p1 = Point(1.0, 2.0)
p2 = Point(1.0, 2.0)
p3 = Point(3.0, 4.0)

print(p1) # Point(x=1.0, y=2.0) — __repr__ auto-generated
print(p1 == p2) # True — __eq__ auto-generated
print(p1 == p3) # False
print(p1.x) # 1.0

Auto-generated Methods

from dataclasses import dataclass


@dataclass
class Student:
name: str
student_id: str
gpa: float
major: str = "Undeclared" # Default value

# Auto-generated:
# __init__(self, name, student_id, gpa, major="Undeclared")
# __repr__
# __eq__

s1 = Student("Alice", "2024001", 4.0)
s2 = Student("Bob", "2024002", 3.8, "Computer Science")
s3 = Student("Alice", "2024001", 4.0)

print(s1) # Student(name='Alice', student_id='2024001', gpa=4.0, major='Undeclared')
print(s1 == s3) # True (all fields are equal)
print(s1 == s2) # False
print(s1.major) # Undeclared

field(): Fine-grained Field Control

from dataclasses import dataclass, field
from typing import ClassVar


@dataclass
class Config:
host: str
port: int = 8080

# Mutable defaults must use field(default_factory=...)
# tags: list = [] ← Error! Cannot use mutable default directly
tags: list[str] = field(default_factory=list)
metadata: dict = field(default_factory=dict)

# repr=False: exclude from __repr__
_internal_id: str = field(default="", repr=False)

# compare=False: exclude from __eq__, __lt__, etc.
description: str = field(default="", compare=False)

# init=False: exclude from __init__ parameters
created_at: str = field(default="", init=False)

# ClassVar: class variable (not a dataclass field)
MAX_TAGS: ClassVar[int] = 10

def __post_init__(self):
"""Automatically called after __init__ completes"""
from datetime import datetime
self.created_at = datetime.now().isoformat()
if len(self.tags) > self.MAX_TAGS:
raise ValueError(f"Maximum {self.MAX_TAGS} tags allowed")


cfg1 = Config("localhost")
cfg2 = Config("localhost", tags=["dev", "test"])

print(cfg1) # Config(host='localhost', port=8080, tags=[], ...)
print(cfg1 == cfg2) # False (tags differ)
print(cfg1.created_at) # Creation timestamp

__post_init__: Post-initialization Processing

from dataclasses import dataclass, field
import re


@dataclass
class Email:
address: str

def __post_init__(self):
self.address = self.address.strip().lower()
if not re.match(r"[^@]+@[^@]+\.[^@]+", self.address):
raise ValueError(f"Invalid email: {self.address!r}")


@dataclass
class OrderItem:
product_name: str
unit_price: float
quantity: int
# init=False fields are set in __post_init__
total_price: float = field(init=False)
discount_rate: float = 0.0

def __post_init__(self):
if self.unit_price < 0:
raise ValueError("Unit price cannot be negative.")
if self.quantity < 1:
raise ValueError("Quantity must be at least 1.")
if not (0 <= self.discount_rate <= 1):
raise ValueError("Discount rate must be between 0 and 1.")
self.total_price = self.unit_price * self.quantity * (1 - self.discount_rate)


item = OrderItem("Python Book", 35000, 3, discount_rate=0.1)
print(item)
# OrderItem(product_name='Python Book', unit_price=35000, quantity=3,
# total_price=94500.0, discount_rate=0.1)

email = Email(" USER@EXAMPLE.COM ")
print(email.address) # user@example.com

try:
bad_email = Email("not-an-email")
except ValueError as e:
print(e)

frozen=True: Immutable Dataclass

from dataclasses import dataclass
import hashlib


@dataclass(frozen=True)
class Point3D:
"""Immutable point — cannot be modified after creation"""
x: float
y: float
z: float = 0.0

def distance_to_origin(self) -> float:
return (self.x ** 2 + self.y ** 2 + self.z ** 2) ** 0.5

def translate(self, dx: float, dy: float, dz: float = 0.0) -> "Point3D":
"""Return new Point3D (immutable, so cannot modify in place)"""
return Point3D(self.x + dx, self.y + dy, self.z + dz)


p = Point3D(1.0, 2.0, 3.0)

# Attempting to modify raises FrozenInstanceError
try:
p.x = 10.0
except Exception as e:
print(f"Error: {type(e).__name__}: {e}")

# frozen=True generates __hash__ → can be used as dict key or set element
positions = {Point3D(0, 0), Point3D(1, 1), Point3D(0, 0)} # Deduplication
print(positions) # {Point3D(x=0, y=0, z=0.0), Point3D(x=1, y=1, z=0.0)}

cache: dict[Point3D, str] = {}
cache[p] = "origin"
print(cache[Point3D(1.0, 2.0, 3.0)]) # origin


@dataclass(frozen=True)
class ImmutableConfig:
host: str
port: int
debug: bool = False
allowed_origins: tuple[str, ...] = () # Use immutable collection

def with_port(self, new_port: int) -> "ImmutableConfig":
"""Return new config object"""
from dataclasses import replace
return replace(self, port=new_port)


cfg = ImmutableConfig("localhost", 8080, allowed_origins=("http://localhost:3000",))
new_cfg = cfg.with_port(9090)
print(cfg.port) # 8080 (original unchanged)
print(new_cfg.port) # 9090

order=True: Auto-generate Comparison Operators

from dataclasses import dataclass


@dataclass(order=True)
class Version:
major: int
minor: int
patch: int = 0

def __str__(self) -> str:
return f"v{self.major}.{self.minor}.{self.patch}"


# order=True → auto-generates __lt__, __le__, __gt__, __ge__ (compared field by field)
v1 = Version(1, 0, 0)
v2 = Version(1, 2, 3)
v3 = Version(2, 0, 0)

print(v1 < v2) # True
print(v3 > v2) # True
print(sorted([v3, v1, v2])) # [v1.0.0, v1.2.3, v2.0.0]

dataclass vs NamedTuple vs TypedDict Comparison

from dataclasses import dataclass
from typing import NamedTuple, TypedDict


# 1. dataclass — most flexible, supports mutable and immutable
@dataclass
class DataclassPoint:
x: float
y: float

def magnitude(self) -> float:
return (self.x ** 2 + self.y ** 2) ** 0.5


# 2. NamedTuple — tuple-based, immutable, index access available
class NamedTuplePoint(NamedTuple):
x: float
y: float


# 3. TypedDict — dictionary-based, pairs well with JSON data
class TypedDictPoint(TypedDict):
x: float
y: float


# Comparison
dc = DataclassPoint(1.0, 2.0)
nt = NamedTuplePoint(1.0, 2.0)
td: TypedDictPoint = {"x": 1.0, "y": 2.0}

print(dc) # DataclassPoint(x=1.0, y=2.0)
print(nt) # NamedTuplePoint(x=1.0, y=2.0)
print(td) # {'x': 1.0, 'y': 2.0}

# NamedTuple: index access available
print(nt[0]) # 1.0
print(tuple(nt)) # (1.0, 2.0)

# dataclass: methods can be added
print(dc.magnitude()) # 2.23...

# TypedDict: works like a dictionary
print(td["x"]) # 1.0

Selection Guide

dataclass:
- When you need methods
- Supports both mutable and immutable
- General-purpose data objects

NamedTuple:
- Immutable records
- When tuple unpacking is needed
- Small data like CSV rows or coordinates

TypedDict:
- Modeling JSON API responses
- When the dictionary structure is already fixed
- Integrating with existing dictionary code

Inheritance with dataclass

from dataclasses import dataclass


@dataclass
class Animal:
name: str
age: int


@dataclass
class Dog(Animal):
breed: str
is_trained: bool = False

def bark(self) -> str:
return f"{self.name}: Woof!"


@dataclass
class GuideDog(Dog):
handler: str = ""
certification_id: str = ""

def __post_init__(self):
if not self.is_trained:
raise ValueError("Guide dogs must be trained.")


d = Dog("Buddy", 3, "Jindo")
print(d)
# Dog(name='Buddy', age=3, breed='Jindo', is_trained=False)

gd = GuideDog("Luna", 4, "Retriever", is_trained=True,
handler="Alice", certification_id="GD-001")
print(gd)
print(gd.bark())

Practical Example: API Response Model

from dataclasses import dataclass, field
from typing import Optional
from datetime import datetime


@dataclass
class Address:
street: str
city: str
country: str = "US"
postal_code: str = ""

def __str__(self) -> str:
return f"{self.postal_code} {self.country} {self.city} {self.street}"


@dataclass
class UserProfile:
id: int
username: str
email: str
address: Optional[Address] = None
tags: list[str] = field(default_factory=list)
created_at: datetime = field(default_factory=datetime.now)
is_active: bool = True
# Sensitive info — excluded from repr
_hashed_password: str = field(default="", repr=False, compare=False)

def to_public_dict(self) -> dict:
"""Return only publicly safe information"""
return {
"id": self.id,
"username": self.username,
"email": self.email,
"tags": self.tags,
"created_at": self.created_at.isoformat(),
"is_active": self.is_active,
}

@classmethod
def from_api_response(cls, data: dict) -> "UserProfile":
"""Create from API response dictionary"""
address_data = data.get("address")
address = Address(**address_data) if address_data else None
return cls(
id=data["id"],
username=data["username"],
email=data["email"],
address=address,
tags=data.get("tags", []),
)


# Simulate API response
api_response = {
"id": 1,
"username": "alice_python",
"email": "alice@example.com",
"address": {
"street": "123 Main St",
"city": "New York",
"postal_code": "10001",
},
"tags": ["python", "backend"],
}

user = UserProfile.from_api_response(api_response)
print(user)
print(f"\nPublic info: {user.to_public_dict()}")

Practical Example: Configuration Object

from dataclasses import dataclass, field
from pathlib import Path


@dataclass
class DatabaseConfig:
host: str = "localhost"
port: int = 5432
database: str = "myapp"
user: str = "postgres"
_password: str = field(default="", repr=False)

@property
def url(self) -> str:
return f"postgresql://{self.user}:***@{self.host}:{self.port}/{self.database}"


@dataclass
class ServerConfig:
host: str = "0.0.0.0"
port: int = 8000
workers: int = 4
debug: bool = False
cors_origins: list[str] = field(default_factory=lambda: ["http://localhost:3000"])


@dataclass
class AppConfig:
app_name: str = "MyApp"
version: str = "1.0.0"
database: DatabaseConfig = field(default_factory=DatabaseConfig)
server: ServerConfig = field(default_factory=ServerConfig)
log_level: str = "INFO"
log_file: Path = field(default_factory=lambda: Path("logs/app.log"))

@classmethod
def from_env(cls) -> "AppConfig":
"""Load configuration from environment variables"""
import os
db = DatabaseConfig(
host=os.getenv("DB_HOST", "localhost"),
port=int(os.getenv("DB_PORT", "5432")),
database=os.getenv("DB_NAME", "myapp"),
)
server = ServerConfig(
port=int(os.getenv("PORT", "8000")),
debug=os.getenv("DEBUG", "false").lower() == "true",
)
return cls(database=db, server=server)


cfg = AppConfig.from_env()
print(cfg.database.url)
print(f"Server: {cfg.server.host}:{cfg.server.port}")
print(f"CORS: {cfg.server.cors_origins}")

Expert Tips

1. Copying Immutable Objects with dataclasses.replace()

from dataclasses import dataclass, replace


@dataclass(frozen=True)
class Config:
host: str
port: int
debug: bool = False


original = Config("localhost", 8080)
modified = replace(original, port=9090, debug=True)

print(original) # Config(host='localhost', port=8080, debug=False)
print(modified) # Config(host='localhost', port=9090, debug=True)

2. dataclasses.asdict(), astuple()

from dataclasses import dataclass, asdict, astuple


@dataclass
class Point:
x: float
y: float


p = Point(1.0, 2.0)
print(asdict(p)) # {'x': 1.0, 'y': 2.0}
print(astuple(p)) # (1.0, 2.0)

# Recursively converts nested dataclasses
@dataclass
class Line:
start: Point
end: Point

line = Line(Point(0, 0), Point(3, 4))
print(asdict(line))
# {'start': {'x': 0, 'y': 0}, 'end': {'x': 3, 'y': 4}}

3. Inspecting Field Information with dataclasses.fields()

from dataclasses import dataclass, fields, field


@dataclass
class MyData:
name: str
value: int = 0
tags: list = field(default_factory=list, metadata={"description": "Tag list"})


for f in fields(MyData):
print(f"name: {f.name}, type: {f.type}, default: {f.default}, metadata: {f.metadata}")

Summary

FeatureCodeDescription
Basic dataclass@dataclassAuto-generates __init__, __repr__, __eq__
Immutable@dataclass(frozen=True)Generates __hash__, prevents modification
Sorting support@dataclass(order=True)Auto-generates comparison operators
Mutable defaultfield(default_factory=list)Default values for list/dict
Exclude from reprfield(repr=False)Sensitive info like passwords
Post-init processing__post_init__Validation, computed fields
Copy with changesdataclasses.replace()Copy with some fields changed
Convert to dictdataclasses.asdict()Useful for JSON serialization
Advertisement