Skip to main content
Advertisement

JSON Processing — json Module, Custom Serialization, Config File Patterns

JSON (JavaScript Object Notation) is the standard data format for web APIs and configuration files. Python's json module is built into the standard library and can flexibly serialize everything from basic types to custom objects.


Basics — dumps / loads

import json

# Python object → JSON string (serialization)
data = {
"name": "Alice",
"age": 30,
"active": True,
"scores": [95, 87, 92],
"address": None,
}
json_str = json.dumps(data)
# '{"name": "Alice", "age": 30, "active": true, "scores": [95, 87, 92], "address": null}'

# JSON string → Python object (deserialization)
parsed = json.loads(json_str)
print(parsed["name"]) # "Alice"
print(type(parsed)) # dict

Python ↔ JSON Type Mapping

PythonJSON
dictobject {}
list, tuplearray []
strstring
int, floatnumber
True / Falsetrue / false
Nonenull

Direct File Processing — dump / load

import json
from pathlib import Path

# Serialize to file
data = {"key": "value", "count": 42}
with open("data.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)

# Deserialize from file
with open("data.json", "r", encoding="utf-8") as f:
loaded = json.load(f)

# Using with pathlib.Path
path = Path("config.json")
with path.open("r", encoding="utf-8") as f:
config = json.load(f)

indent and sort_keys Options

import json

data = {"z_key": 3, "a_key": 1, "m_key": 2}

# No indentation (default — suitable for network transmission)
json.dumps(data)
# '{"z_key": 3, "a_key": 1, "m_key": 2}'

# 4-space indentation
json.dumps(data, indent=4)

# Sort keys alphabetically
json.dumps(data, sort_keys=True)
# '{"a_key": 1, "m_key": 2, "z_key": 3}'

# Preserve non-ASCII characters (default escapes to \uXXXX)
json.dumps({"name": "José"}, ensure_ascii=False)
# '{"name": "José"}'

# Customize separators (minimize whitespace)
json.dumps(data, separators=(",", ":"))
# '{"z_key":3,"a_key":1,"m_key":2}'

Custom Serialization — default Function

json.dumps() only serializes dict, list, str, int, float, bool, and None by default. Handle other types with a default function or JSONEncoder.

default Function Approach

import json
from datetime import datetime, date
from decimal import Decimal
from pathlib import Path
from uuid import UUID

def json_default(obj):
"""Function to pass as the default parameter of json.dumps"""
if isinstance(obj, datetime):
return obj.isoformat() # "2024-01-15T10:30:00"
if isinstance(obj, date):
return obj.isoformat() # "2024-01-15"
if isinstance(obj, Decimal):
return float(obj) # or str(obj) to preserve precision
if isinstance(obj, Path):
return str(obj)
if isinstance(obj, UUID):
return str(obj)
if isinstance(obj, set):
return sorted(obj) # set → sorted list
if hasattr(obj, "__dict__"):
return obj.__dict__ # attribute dictionary of general objects
raise TypeError(f"Not serializable: {type(obj).__name__}")

# Usage
data = {
"created_at": datetime.now(),
"price": Decimal("19.99"),
"tags": {"python", "tutorial"},
"path": Path("/home/user/data.txt"),
}
print(json.dumps(data, default=json_default, ensure_ascii=False, indent=2))

Custom JSONEncoder Class

import json
from datetime import datetime
from decimal import Decimal
from uuid import UUID

class AppJSONEncoder(json.JSONEncoder):
"""Application-specific JSON encoder"""

def default(self, obj):
if isinstance(obj, datetime):
return {"__type__": "datetime", "value": obj.isoformat()}
if isinstance(obj, Decimal):
return {"__type__": "decimal", "value": str(obj)}
if isinstance(obj, UUID):
return {"__type__": "uuid", "value": str(obj)}
return super().default(obj) # Delegate unhandled types to parent


class AppJSONDecoder(json.JSONDecoder):
"""Decoder paired with AppJSONEncoder"""

def __init__(self, **kwargs):
super().__init__(object_hook=self._object_hook, **kwargs)

@staticmethod
def _object_hook(d: dict):
type_tag = d.get("__type__")
if type_tag == "datetime":
return datetime.fromisoformat(d["value"])
if type_tag == "decimal":
return Decimal(d["value"])
if type_tag == "uuid":
return UUID(d["value"])
return d


# Usage
from decimal import Decimal
from uuid import uuid4

original = {
"id": uuid4(),
"price": Decimal("99.99"),
"created": datetime.now(),
}

encoded = json.dumps(original, cls=AppJSONEncoder, ensure_ascii=False)
decoded = json.loads(encoded, cls=AppJSONDecoder)

print(type(decoded["price"])) # <class 'decimal.Decimal'>
print(type(decoded["created"])) # <class 'datetime.datetime'>

datetime and Decimal Serialization Patterns

Recommended patterns for the two types most frequently encountered in production.

import json
from datetime import datetime
from decimal import Decimal

# Decimal → str (preserves precision, recommended)
def decimal_to_str(obj):
if isinstance(obj, Decimal):
return str(obj) # "19.99" — float conversion loses precision
raise TypeError

# datetime → ISO 8601
def datetime_to_iso(obj):
if isinstance(obj, datetime):
return obj.isoformat()
raise TypeError

# Combined encoder
def encode(obj):
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, Decimal):
return str(obj)
raise TypeError(f"{type(obj)}")

# Deserialization: ISO 8601 → datetime
from datetime import datetime
dt = datetime.fromisoformat("2024-01-15T10:30:00")

Config File Pattern

import json
from pathlib import Path
from typing import Any
import copy

class ConfigManager:
"""
JSON-based configuration file manager.
Merges defaults with user configuration.
"""

DEFAULTS: dict[str, Any] = {
"server": {
"host": "0.0.0.0",
"port": 8000,
"debug": False,
"workers": 4,
},
"database": {
"url": "sqlite:///app.db",
"pool_size": 5,
"timeout": 30,
},
"cache": {
"backend": "memory",
"ttl": 300,
},
"logging": {
"level": "INFO",
"format": "json",
},
}

def __init__(self, config_path: str | Path) -> None:
self.config_path = Path(config_path)
self._config = copy.deepcopy(self.DEFAULTS)
self._load()

def _load(self) -> None:
if not self.config_path.exists():
return
with self.config_path.open("r", encoding="utf-8") as f:
user_config = json.load(f)
self._deep_merge(self._config, user_config)

@staticmethod
def _deep_merge(base: dict, override: dict) -> None:
"""Recursively merges override values into base."""
for key, value in override.items():
if key in base and isinstance(base[key], dict) and isinstance(value, dict):
ConfigManager._deep_merge(base[key], value)
else:
base[key] = value

def get(self, *keys: str, default: Any = None) -> Any:
"""Get a value by dot-notation path: get("server", "port")"""
node = self._config
for key in keys:
if not isinstance(node, dict) or key not in node:
return default
node = node[key]
return node

def save(self) -> None:
with self.config_path.open("w", encoding="utf-8") as f:
json.dump(self._config, f, ensure_ascii=False, indent=2)

# Usage
config = ConfigManager("config.json")
port = config.get("server", "port") # 8000 (default)
debug = config.get("server", "debug") # False

Practical Example — API Response Handling

import json
from dataclasses import dataclass, asdict
from datetime import datetime

@dataclass
class User:
id: int
name: str
email: str
created_at: datetime

def to_json(self) -> str:
d = asdict(self)
d["created_at"] = self.created_at.isoformat()
return json.dumps(d, ensure_ascii=False)

@classmethod
def from_json(cls, json_str: str) -> "User":
d = json.loads(json_str)
d["created_at"] = datetime.fromisoformat(d["created_at"])
return cls(**d)


def parse_api_response(response_body: str) -> list[User]:
"""Parses an API JSON response."""
try:
data = json.loads(response_body)
except json.JSONDecodeError as e:
raise ValueError(f"JSON parse failure (position {e.pos}): {e.msg}") from e

if not isinstance(data, list):
raise ValueError(f"Array response required. Got: {type(data).__name__}")

users = []
for i, item in enumerate(data):
try:
users.append(User(
id=int(item["id"]),
name=str(item["name"]),
email=str(item["email"]),
created_at=datetime.fromisoformat(item["created_at"]),
))
except (KeyError, ValueError) as e:
print(f"[Warning] Skipping item {i}: {e}")
return users

Expert Tips

Tip 1 — Position information from json.JSONDecodeError

import json

bad_json = '{"key": "value",}' # Trailing comma
try:
json.loads(bad_json)
except json.JSONDecodeError as e:
print(e.msg) # Expecting property name enclosed in double quotes
print(e.lineno) # Line where error occurred
print(e.colno) # Column where error occurred
print(e.pos) # Character position

Tip 2 — Streaming JSON parsing

Use the ijson package (pip install ijson) for streaming parsing of large JSON files.

# pip install ijson
import ijson

with open("large.json", "rb") as f:
for item in ijson.items(f, "item"): # Each element of the top-level array
process(item)

Tip 3 — JSON5 / JSONC (config files with comments)

JSON does not support comments. For config files that need comments, use json5 or tomllib (Python 3.11+).

# pip install json5
import json5
with open("config.jsonc", "r") as f:
config = json5.load(f) # Supports // comments

Tip 4 — Serializing __slots__ classes to JSON

Classes using __slots__ have no __dict__, so they need special handling.

def encode_slots(obj):
if hasattr(obj, "__slots__"):
return {slot: getattr(obj, slot) for slot in obj.__slots__}
raise TypeError

json.dumps(obj, default=encode_slots)
Advertisement