JSON Processing — json Module, Custom Serialization, Config File Patterns
JSON (JavaScript Object Notation) is the standard data format for web APIs and configuration files. Python's json module is built into the standard library and can flexibly serialize everything from basic types to custom objects.
Basics — dumps / loads
import json
# Python object → JSON string (serialization)
data = {
"name": "Alice",
"age": 30,
"active": True,
"scores": [95, 87, 92],
"address": None,
}
json_str = json.dumps(data)
# '{"name": "Alice", "age": 30, "active": true, "scores": [95, 87, 92], "address": null}'
# JSON string → Python object (deserialization)
parsed = json.loads(json_str)
print(parsed["name"]) # "Alice"
print(type(parsed)) # dict
Python ↔ JSON Type Mapping
| Python | JSON |
|---|---|
dict | object {} |
list, tuple | array [] |
str | string |
int, float | number |
True / False | true / false |
None | null |
Direct File Processing — dump / load
import json
from pathlib import Path
# Serialize to file
data = {"key": "value", "count": 42}
with open("data.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)
# Deserialize from file
with open("data.json", "r", encoding="utf-8") as f:
loaded = json.load(f)
# Using with pathlib.Path
path = Path("config.json")
with path.open("r", encoding="utf-8") as f:
config = json.load(f)
indent and sort_keys Options
import json
data = {"z_key": 3, "a_key": 1, "m_key": 2}
# No indentation (default — suitable for network transmission)
json.dumps(data)
# '{"z_key": 3, "a_key": 1, "m_key": 2}'
# 4-space indentation
json.dumps(data, indent=4)
# Sort keys alphabetically
json.dumps(data, sort_keys=True)
# '{"a_key": 1, "m_key": 2, "z_key": 3}'
# Preserve non-ASCII characters (default escapes to \uXXXX)
json.dumps({"name": "José"}, ensure_ascii=False)
# '{"name": "José"}'
# Customize separators (minimize whitespace)
json.dumps(data, separators=(",", ":"))
# '{"z_key":3,"a_key":1,"m_key":2}'
Custom Serialization — default Function
json.dumps() only serializes dict, list, str, int, float, bool, and None by default. Handle other types with a default function or JSONEncoder.
default Function Approach
import json
from datetime import datetime, date
from decimal import Decimal
from pathlib import Path
from uuid import UUID
def json_default(obj):
"""Function to pass as the default parameter of json.dumps"""
if isinstance(obj, datetime):
return obj.isoformat() # "2024-01-15T10:30:00"
if isinstance(obj, date):
return obj.isoformat() # "2024-01-15"
if isinstance(obj, Decimal):
return float(obj) # or str(obj) to preserve precision
if isinstance(obj, Path):
return str(obj)
if isinstance(obj, UUID):
return str(obj)
if isinstance(obj, set):
return sorted(obj) # set → sorted list
if hasattr(obj, "__dict__"):
return obj.__dict__ # attribute dictionary of general objects
raise TypeError(f"Not serializable: {type(obj).__name__}")
# Usage
data = {
"created_at": datetime.now(),
"price": Decimal("19.99"),
"tags": {"python", "tutorial"},
"path": Path("/home/user/data.txt"),
}
print(json.dumps(data, default=json_default, ensure_ascii=False, indent=2))
Custom JSONEncoder Class
import json
from datetime import datetime
from decimal import Decimal
from uuid import UUID
class AppJSONEncoder(json.JSONEncoder):
"""Application-specific JSON encoder"""
def default(self, obj):
if isinstance(obj, datetime):
return {"__type__": "datetime", "value": obj.isoformat()}
if isinstance(obj, Decimal):
return {"__type__": "decimal", "value": str(obj)}
if isinstance(obj, UUID):
return {"__type__": "uuid", "value": str(obj)}
return super().default(obj) # Delegate unhandled types to parent
class AppJSONDecoder(json.JSONDecoder):
"""Decoder paired with AppJSONEncoder"""
def __init__(self, **kwargs):
super().__init__(object_hook=self._object_hook, **kwargs)
@staticmethod
def _object_hook(d: dict):
type_tag = d.get("__type__")
if type_tag == "datetime":
return datetime.fromisoformat(d["value"])
if type_tag == "decimal":
return Decimal(d["value"])
if type_tag == "uuid":
return UUID(d["value"])
return d
# Usage
from decimal import Decimal
from uuid import uuid4
original = {
"id": uuid4(),
"price": Decimal("99.99"),
"created": datetime.now(),
}
encoded = json.dumps(original, cls=AppJSONEncoder, ensure_ascii=False)
decoded = json.loads(encoded, cls=AppJSONDecoder)
print(type(decoded["price"])) # <class 'decimal.Decimal'>
print(type(decoded["created"])) # <class 'datetime.datetime'>
datetime and Decimal Serialization Patterns
Recommended patterns for the two types most frequently encountered in production.
import json
from datetime import datetime
from decimal import Decimal
# Decimal → str (preserves precision, recommended)
def decimal_to_str(obj):
if isinstance(obj, Decimal):
return str(obj) # "19.99" — float conversion loses precision
raise TypeError
# datetime → ISO 8601
def datetime_to_iso(obj):
if isinstance(obj, datetime):
return obj.isoformat()
raise TypeError
# Combined encoder
def encode(obj):
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, Decimal):
return str(obj)
raise TypeError(f"{type(obj)}")
# Deserialization: ISO 8601 → datetime
from datetime import datetime
dt = datetime.fromisoformat("2024-01-15T10:30:00")
Config File Pattern
import json
from pathlib import Path
from typing import Any
import copy
class ConfigManager:
"""
JSON-based configuration file manager.
Merges defaults with user configuration.
"""
DEFAULTS: dict[str, Any] = {
"server": {
"host": "0.0.0.0",
"port": 8000,
"debug": False,
"workers": 4,
},
"database": {
"url": "sqlite:///app.db",
"pool_size": 5,
"timeout": 30,
},
"cache": {
"backend": "memory",
"ttl": 300,
},
"logging": {
"level": "INFO",
"format": "json",
},
}
def __init__(self, config_path: str | Path) -> None:
self.config_path = Path(config_path)
self._config = copy.deepcopy(self.DEFAULTS)
self._load()
def _load(self) -> None:
if not self.config_path.exists():
return
with self.config_path.open("r", encoding="utf-8") as f:
user_config = json.load(f)
self._deep_merge(self._config, user_config)
@staticmethod
def _deep_merge(base: dict, override: dict) -> None:
"""Recursively merges override values into base."""
for key, value in override.items():
if key in base and isinstance(base[key], dict) and isinstance(value, dict):
ConfigManager._deep_merge(base[key], value)
else:
base[key] = value
def get(self, *keys: str, default: Any = None) -> Any:
"""Get a value by dot-notation path: get("server", "port")"""
node = self._config
for key in keys:
if not isinstance(node, dict) or key not in node:
return default
node = node[key]
return node
def save(self) -> None:
with self.config_path.open("w", encoding="utf-8") as f:
json.dump(self._config, f, ensure_ascii=False, indent=2)
# Usage
config = ConfigManager("config.json")
port = config.get("server", "port") # 8000 (default)
debug = config.get("server", "debug") # False
Practical Example — API Response Handling
import json
from dataclasses import dataclass, asdict
from datetime import datetime
@dataclass
class User:
id: int
name: str
email: str
created_at: datetime
def to_json(self) -> str:
d = asdict(self)
d["created_at"] = self.created_at.isoformat()
return json.dumps(d, ensure_ascii=False)
@classmethod
def from_json(cls, json_str: str) -> "User":
d = json.loads(json_str)
d["created_at"] = datetime.fromisoformat(d["created_at"])
return cls(**d)
def parse_api_response(response_body: str) -> list[User]:
"""Parses an API JSON response."""
try:
data = json.loads(response_body)
except json.JSONDecodeError as e:
raise ValueError(f"JSON parse failure (position {e.pos}): {e.msg}") from e
if not isinstance(data, list):
raise ValueError(f"Array response required. Got: {type(data).__name__}")
users = []
for i, item in enumerate(data):
try:
users.append(User(
id=int(item["id"]),
name=str(item["name"]),
email=str(item["email"]),
created_at=datetime.fromisoformat(item["created_at"]),
))
except (KeyError, ValueError) as e:
print(f"[Warning] Skipping item {i}: {e}")
return users
Expert Tips
Tip 1 — Position information from json.JSONDecodeError
import json
bad_json = '{"key": "value",}' # Trailing comma
try:
json.loads(bad_json)
except json.JSONDecodeError as e:
print(e.msg) # Expecting property name enclosed in double quotes
print(e.lineno) # Line where error occurred
print(e.colno) # Column where error occurred
print(e.pos) # Character position
Tip 2 — Streaming JSON parsing
Use the ijson package (pip install ijson) for streaming parsing of large JSON files.
# pip install ijson
import ijson
with open("large.json", "rb") as f:
for item in ijson.items(f, "item"): # Each element of the top-level array
process(item)
Tip 3 — JSON5 / JSONC (config files with comments)
JSON does not support comments. For config files that need comments, use json5 or tomllib (Python 3.11+).
# pip install json5
import json5
with open("config.jsonc", "r") as f:
config = json5.load(f) # Supports // comments
Tip 4 — Serializing __slots__ classes to JSON
Classes using __slots__ have no __dict__, so they need special handling.
def encode_slots(obj):
if hasattr(obj, "__slots__"):
return {slot: getattr(obj, slot) for slot in obj.__slots__}
raise TypeError
json.dumps(obj, default=encode_slots)