Dataclasses
@dataclass (Python 3.7+) is a decorator that lets you define classes primarily for storing data concisely. It automatically generates boilerplate methods like __init__, __repr__, and __eq__.
@dataclass Basics
from dataclasses import dataclass
# Old way — repetitive boilerplate
class PointOld:
def __init__(self, x: float, y: float):
self.x = x
self.y = y
def __repr__(self) -> str:
return f"Point(x={self.x}, y={self.y})"
def __eq__(self, other: object) -> bool:
if not isinstance(other, PointOld):
return NotImplemented
return self.x == other.x and self.y == other.y
# @dataclass way — much more concise
@dataclass
class Point:
x: float
y: float
p1 = Point(1.0, 2.0)
p2 = Point(1.0, 2.0)
p3 = Point(3.0, 4.0)
print(p1) # Point(x=1.0, y=2.0) — __repr__ auto-generated
print(p1 == p2) # True — __eq__ auto-generated
print(p1 == p3) # False
print(p1.x) # 1.0
Auto-generated Methods
from dataclasses import dataclass
@dataclass
class Student:
name: str
student_id: str
gpa: float
major: str = "Undeclared" # Default value
# Auto-generated:
# __init__(self, name, student_id, gpa, major="Undeclared")
# __repr__
# __eq__
s1 = Student("Alice", "2024001", 4.0)
s2 = Student("Bob", "2024002", 3.8, "Computer Science")
s3 = Student("Alice", "2024001", 4.0)
print(s1) # Student(name='Alice', student_id='2024001', gpa=4.0, major='Undeclared')
print(s1 == s3) # True (all fields are equal)
print(s1 == s2) # False
print(s1.major) # Undeclared
field(): Fine-grained Field Control
from dataclasses import dataclass, field
from typing import ClassVar
@dataclass
class Config:
host: str
port: int = 8080
# Mutable defaults must use field(default_factory=...)
# tags: list = [] ← Error! Cannot use mutable default directly
tags: list[str] = field(default_factory=list)
metadata: dict = field(default_factory=dict)
# repr=False: exclude from __repr__
_internal_id: str = field(default="", repr=False)
# compare=False: exclude from __eq__, __lt__, etc.
description: str = field(default="", compare=False)
# init=False: exclude from __init__ parameters
created_at: str = field(default="", init=False)
# ClassVar: class variable (not a dataclass field)
MAX_TAGS: ClassVar[int] = 10
def __post_init__(self):
"""Automatically called after __init__ completes"""
from datetime import datetime
self.created_at = datetime.now().isoformat()
if len(self.tags) > self.MAX_TAGS:
raise ValueError(f"Maximum {self.MAX_TAGS} tags allowed")
cfg1 = Config("localhost")
cfg2 = Config("localhost", tags=["dev", "test"])
print(cfg1) # Config(host='localhost', port=8080, tags=[], ...)
print(cfg1 == cfg2) # False (tags differ)
print(cfg1.created_at) # Creation timestamp
__post_init__: Post-initialization Processing
from dataclasses import dataclass, field
import re
@dataclass
class Email:
address: str
def __post_init__(self):
self.address = self.address.strip().lower()
if not re.match(r"[^@]+@[^@]+\.[^@]+", self.address):
raise ValueError(f"Invalid email: {self.address!r}")
@dataclass
class OrderItem:
product_name: str
unit_price: float
quantity: int
# init=False fields are set in __post_init__
total_price: float = field(init=False)
discount_rate: float = 0.0
def __post_init__(self):
if self.unit_price < 0:
raise ValueError("Unit price cannot be negative.")
if self.quantity < 1:
raise ValueError("Quantity must be at least 1.")
if not (0 <= self.discount_rate <= 1):
raise ValueError("Discount rate must be between 0 and 1.")
self.total_price = self.unit_price * self.quantity * (1 - self.discount_rate)
item = OrderItem("Python Book", 35000, 3, discount_rate=0.1)
print(item)
# OrderItem(product_name='Python Book', unit_price=35000, quantity=3,
# total_price=94500.0, discount_rate=0.1)
email = Email(" USER@EXAMPLE.COM ")
print(email.address) # user@example.com
try:
bad_email = Email("not-an-email")
except ValueError as e:
print(e)
frozen=True: Immutable Dataclass
from dataclasses import dataclass
import hashlib
@dataclass(frozen=True)
class Point3D:
"""Immutable point — cannot be modified after creation"""
x: float
y: float
z: float = 0.0
def distance_to_origin(self) -> float:
return (self.x ** 2 + self.y ** 2 + self.z ** 2) ** 0.5
def translate(self, dx: float, dy: float, dz: float = 0.0) -> "Point3D":
"""Return new Point3D (immutable, so cannot modify in place)"""
return Point3D(self.x + dx, self.y + dy, self.z + dz)
p = Point3D(1.0, 2.0, 3.0)
# Attempting to modify raises FrozenInstanceError
try:
p.x = 10.0
except Exception as e:
print(f"Error: {type(e).__name__}: {e}")
# frozen=True generates __hash__ → can be used as dict key or set element
positions = {Point3D(0, 0), Point3D(1, 1), Point3D(0, 0)} # Deduplication
print(positions) # {Point3D(x=0, y=0, z=0.0), Point3D(x=1, y=1, z=0.0)}
cache: dict[Point3D, str] = {}
cache[p] = "origin"
print(cache[Point3D(1.0, 2.0, 3.0)]) # origin
@dataclass(frozen=True)
class ImmutableConfig:
host: str
port: int
debug: bool = False
allowed_origins: tuple[str, ...] = () # Use immutable collection
def with_port(self, new_port: int) -> "ImmutableConfig":
"""Return new config object"""
from dataclasses import replace
return replace(self, port=new_port)
cfg = ImmutableConfig("localhost", 8080, allowed_origins=("http://localhost:3000",))
new_cfg = cfg.with_port(9090)
print(cfg.port) # 8080 (original unchanged)
print(new_cfg.port) # 9090
order=True: Auto-generate Comparison Operators
from dataclasses import dataclass
@dataclass(order=True)
class Version:
major: int
minor: int
patch: int = 0
def __str__(self) -> str:
return f"v{self.major}.{self.minor}.{self.patch}"
# order=True → auto-generates __lt__, __le__, __gt__, __ge__ (compared field by field)
v1 = Version(1, 0, 0)
v2 = Version(1, 2, 3)
v3 = Version(2, 0, 0)
print(v1 < v2) # True
print(v3 > v2) # True
print(sorted([v3, v1, v2])) # [v1.0.0, v1.2.3, v2.0.0]
dataclass vs NamedTuple vs TypedDict Comparison
from dataclasses import dataclass
from typing import NamedTuple, TypedDict
# 1. dataclass — most flexible, supports mutable and immutable
@dataclass
class DataclassPoint:
x: float
y: float
def magnitude(self) -> float:
return (self.x ** 2 + self.y ** 2) ** 0.5
# 2. NamedTuple — tuple-based, immutable, index access available
class NamedTuplePoint(NamedTuple):
x: float
y: float
# 3. TypedDict — dictionary-based, pairs well with JSON data
class TypedDictPoint(TypedDict):
x: float
y: float
# Comparison
dc = DataclassPoint(1.0, 2.0)
nt = NamedTuplePoint(1.0, 2.0)
td: TypedDictPoint = {"x": 1.0, "y": 2.0}
print(dc) # DataclassPoint(x=1.0, y=2.0)
print(nt) # NamedTuplePoint(x=1.0, y=2.0)
print(td) # {'x': 1.0, 'y': 2.0}
# NamedTuple: index access available
print(nt[0]) # 1.0
print(tuple(nt)) # (1.0, 2.0)
# dataclass: methods can be added
print(dc.magnitude()) # 2.23...
# TypedDict: works like a dictionary
print(td["x"]) # 1.0
Selection Guide
dataclass:
- When you need methods
- Supports both mutable and immutable
- General-purpose data objects
NamedTuple:
- Immutable records
- When tuple unpacking is needed
- Small data like CSV rows or coordinates
TypedDict:
- Modeling JSON API responses
- When the dictionary structure is already fixed
- Integrating with existing dictionary code
Inheritance with dataclass
from dataclasses import dataclass
@dataclass
class Animal:
name: str
age: int
@dataclass
class Dog(Animal):
breed: str
is_trained: bool = False
def bark(self) -> str:
return f"{self.name}: Woof!"
@dataclass
class GuideDog(Dog):
handler: str = ""
certification_id: str = ""
def __post_init__(self):
if not self.is_trained:
raise ValueError("Guide dogs must be trained.")
d = Dog("Buddy", 3, "Jindo")
print(d)
# Dog(name='Buddy', age=3, breed='Jindo', is_trained=False)
gd = GuideDog("Luna", 4, "Retriever", is_trained=True,
handler="Alice", certification_id="GD-001")
print(gd)
print(gd.bark())
Practical Example: API Response Model
from dataclasses import dataclass, field
from typing import Optional
from datetime import datetime
@dataclass
class Address:
street: str
city: str
country: str = "US"
postal_code: str = ""
def __str__(self) -> str:
return f"{self.postal_code} {self.country} {self.city} {self.street}"
@dataclass
class UserProfile:
id: int
username: str
email: str
address: Optional[Address] = None
tags: list[str] = field(default_factory=list)
created_at: datetime = field(default_factory=datetime.now)
is_active: bool = True
# Sensitive info — excluded from repr
_hashed_password: str = field(default="", repr=False, compare=False)
def to_public_dict(self) -> dict:
"""Return only publicly safe information"""
return {
"id": self.id,
"username": self.username,
"email": self.email,
"tags": self.tags,
"created_at": self.created_at.isoformat(),
"is_active": self.is_active,
}
@classmethod
def from_api_response(cls, data: dict) -> "UserProfile":
"""Create from API response dictionary"""
address_data = data.get("address")
address = Address(**address_data) if address_data else None
return cls(
id=data["id"],
username=data["username"],
email=data["email"],
address=address,
tags=data.get("tags", []),
)
# Simulate API response
api_response = {
"id": 1,
"username": "alice_python",
"email": "alice@example.com",
"address": {
"street": "123 Main St",
"city": "New York",
"postal_code": "10001",
},
"tags": ["python", "backend"],
}
user = UserProfile.from_api_response(api_response)
print(user)
print(f"\nPublic info: {user.to_public_dict()}")
Practical Example: Configuration Object
from dataclasses import dataclass, field
from pathlib import Path
@dataclass
class DatabaseConfig:
host: str = "localhost"
port: int = 5432
database: str = "myapp"
user: str = "postgres"
_password: str = field(default="", repr=False)
@property
def url(self) -> str:
return f"postgresql://{self.user}:***@{self.host}:{self.port}/{self.database}"
@dataclass
class ServerConfig:
host: str = "0.0.0.0"
port: int = 8000
workers: int = 4
debug: bool = False
cors_origins: list[str] = field(default_factory=lambda: ["http://localhost:3000"])
@dataclass
class AppConfig:
app_name: str = "MyApp"
version: str = "1.0.0"
database: DatabaseConfig = field(default_factory=DatabaseConfig)
server: ServerConfig = field(default_factory=ServerConfig)
log_level: str = "INFO"
log_file: Path = field(default_factory=lambda: Path("logs/app.log"))
@classmethod
def from_env(cls) -> "AppConfig":
"""Load configuration from environment variables"""
import os
db = DatabaseConfig(
host=os.getenv("DB_HOST", "localhost"),
port=int(os.getenv("DB_PORT", "5432")),
database=os.getenv("DB_NAME", "myapp"),
)
server = ServerConfig(
port=int(os.getenv("PORT", "8000")),
debug=os.getenv("DEBUG", "false").lower() == "true",
)
return cls(database=db, server=server)
cfg = AppConfig.from_env()
print(cfg.database.url)
print(f"Server: {cfg.server.host}:{cfg.server.port}")
print(f"CORS: {cfg.server.cors_origins}")
Expert Tips
1. Copying Immutable Objects with dataclasses.replace()
from dataclasses import dataclass, replace
@dataclass(frozen=True)
class Config:
host: str
port: int
debug: bool = False
original = Config("localhost", 8080)
modified = replace(original, port=9090, debug=True)
print(original) # Config(host='localhost', port=8080, debug=False)
print(modified) # Config(host='localhost', port=9090, debug=True)
2. dataclasses.asdict(), astuple()
from dataclasses import dataclass, asdict, astuple
@dataclass
class Point:
x: float
y: float
p = Point(1.0, 2.0)
print(asdict(p)) # {'x': 1.0, 'y': 2.0}
print(astuple(p)) # (1.0, 2.0)
# Recursively converts nested dataclasses
@dataclass
class Line:
start: Point
end: Point
line = Line(Point(0, 0), Point(3, 4))
print(asdict(line))
# {'start': {'x': 0, 'y': 0}, 'end': {'x': 3, 'y': 4}}
3. Inspecting Field Information with dataclasses.fields()
from dataclasses import dataclass, fields, field
@dataclass
class MyData:
name: str
value: int = 0
tags: list = field(default_factory=list, metadata={"description": "Tag list"})
for f in fields(MyData):
print(f"name: {f.name}, type: {f.type}, default: {f.default}, metadata: {f.metadata}")
Summary
| Feature | Code | Description |
|---|---|---|
| Basic dataclass | @dataclass | Auto-generates __init__, __repr__, __eq__ |
| Immutable | @dataclass(frozen=True) | Generates __hash__, prevents modification |
| Sorting support | @dataclass(order=True) | Auto-generates comparison operators |
| Mutable default | field(default_factory=list) | Default values for list/dict |
| Exclude from repr | field(repr=False) | Sensitive info like passwords |
| Post-init processing | __post_init__ | Validation, computed fields |
| Copy with changes | dataclasses.replace() | Copy with some fields changed |
| Convert to dict | dataclasses.asdict() | Useful for JSON serialization |