pathlib.Path — Modern File Path Handling Mastery
Introduced in Python 3.4, the pathlib module treats file paths as objects rather than strings. It replaces os.path functions with methods and lets you combine paths intuitively using the / operator.
pathlib.Path Basics
from pathlib import Path
# Current directory
p = Path(".")
p = Path.cwd() # Current directory as absolute path
# Home directory
home = Path.home() # /home/user or C:\Users\user
# Absolute path
p = Path("/etc/hosts") # Unix
p = Path("C:/Users/user/doc") # Windows (forward slashes work)
# Relative path
p = Path("data/input.csv")
Path Construction — Path() and / Operator
Use the / operator to join path components.
from pathlib import Path
base = Path("/home/user")
data_dir = base / "projects" / "myapp" / "data"
# PosixPath('/home/user/projects/myapp/data')
config = base / "config" / "settings.json"
# PosixPath('/home/user/config/settings.json')
# Mix with strings
filename = "report.csv"
output = data_dir / filename
Use Path.resolve() to get the absolute path with symlinks and .. resolved.
p = Path("../sibling/file.txt")
print(p.resolve()) # /home/user/sibling/file.txt
Path Attributes
p = Path("/home/user/projects/app/main.py")
p.name # 'main.py' — filename with extension
p.stem # 'main' — filename without extension
p.suffix # '.py' — extension
p.suffixes # ['.py'] — multiple extensions (e.g. ['.tar', '.gz'])
p.parent # /home/user/projects/app
p.parents # sequence of all parent paths
p.parts # ('/', 'home', 'user', 'projects', 'app', 'main.py')
p.root # '/'
p.drive # '' (Unix) or 'C:' (Windows)
# Status checks
p.exists() # Whether it exists
p.is_file() # Whether it's a file
p.is_dir() # Whether it's a directory
p.is_symlink() # Whether it's a symbolic link
p.is_absolute() # Whether it's an absolute path
Reading and Writing Files
from pathlib import Path
p = Path("data.txt")
# Text read/write
content = p.read_text(encoding="utf-8")
p.write_text("New content\n", encoding="utf-8")
# Binary read/write
data = p.read_bytes()
p.write_bytes(b"\x89PNG\r\n")
# Usable just like open()
with p.open("r", encoding="utf-8") as f:
for line in f:
print(line.rstrip())
Directory Traversal — iterdir, glob, rglob
iterdir — Direct Children
from pathlib import Path
p = Path(".")
for child in p.iterdir():
kind = "DIR " if child.is_dir() else "FILE"
print(f"{kind} {child.name}")
glob — Pattern Matching (Including Single Depth)
from pathlib import Path
p = Path("src")
# .py files in the current directory
for f in p.glob("*.py"):
print(f)
# .py files in all subdirectories (** recursive)
for f in p.glob("**/*.py"):
print(f)
rglob — Recursive glob
# Same as p.glob("**/*.py")
for f in Path("src").rglob("*.py"):
print(f)
# Directories only
for d in Path(".").rglob("*"):
if d.is_dir():
print(d)
File Operations — mkdir, rename, unlink, copy
from pathlib import Path
import shutil
p = Path("new_dir")
# Create directory
p.mkdir() # Simple creation
p.mkdir(parents=True, exist_ok=True) # Including parents, OK if already exists
# Rename / move
p.rename("renamed_dir") # Same directory
p.replace("other/dir") # Move to different location (overwrites)
# Delete file
Path("temp.txt").unlink()
Path("temp.txt").unlink(missing_ok=True) # No error if missing
# Delete directory (only when empty)
Path("empty_dir").rmdir()
# Delete entire directory (requires shutil)
shutil.rmtree(Path("full_dir"))
# Copy file (using shutil)
shutil.copy2(Path("src.txt"), Path("dst.txt"))
File Metadata
from pathlib import Path
import datetime
p = Path("data.txt")
stat = p.stat()
print(stat.st_size) # File size (bytes)
print(stat.st_mtime) # Modification time (Unix timestamp)
# Human-readable format
mtime = datetime.datetime.fromtimestamp(stat.st_mtime)
print(mtime.strftime("%Y-%m-%d %H:%M:%S"))
# Size in KB/MB
size_kb = stat.st_size / 1024
size_mb = stat.st_size / 1024 / 1024
os.path vs pathlib Comparison
| Operation | os.path (old-style) | pathlib (modern) |
|---|---|---|
| Path join | os.path.join(a, b) | a / b |
| Filename | os.path.basename(p) | p.name |
| Directory | os.path.dirname(p) | p.parent |
| Extension | os.path.splitext(p)[1] | p.suffix |
| Exists | os.path.exists(p) | p.exists() |
| Absolute path | os.path.abspath(p) | p.resolve() |
| Is file | os.path.isfile(p) | p.is_file() |
| Is directory | os.path.isdir(p) | p.is_dir() |
Practical Example — File Organization Script
from pathlib import Path
from datetime import datetime, timedelta
import shutil
def organize_downloads(
downloads: Path,
archive: Path,
days_old: int = 30
) -> dict[str, int]:
"""
Categorizes files in the downloads folder by extension
and moves old files to the archive.
"""
archive.mkdir(parents=True, exist_ok=True)
cutoff = datetime.now() - timedelta(days=days_old)
stats = {"moved": 0, "organized": 0, "errors": 0}
# Categorization rules by extension
categories = {
"images": {".jpg", ".jpeg", ".png", ".gif", ".webp"},
"documents": {".pdf", ".docx", ".xlsx", ".pptx", ".txt"},
"videos": {".mp4", ".mkv", ".avi", ".mov"},
"archives": {".zip", ".tar", ".gz", ".7z"},
}
for file in downloads.iterdir():
if not file.is_file():
continue
mtime = datetime.fromtimestamp(file.stat().st_mtime)
# Old file → move to archive
if mtime < cutoff:
dest = archive / file.name
try:
file.rename(dest)
stats["moved"] += 1
continue
except OSError as e:
print(f"Move failed {file.name}: {e}")
stats["errors"] += 1
continue
# Categorize by extension
suffix = file.suffix.lower()
for category, extensions in categories.items():
if suffix in extensions:
dest_dir = downloads / category
dest_dir.mkdir(exist_ok=True)
try:
file.rename(dest_dir / file.name)
stats["organized"] += 1
except OSError:
stats["errors"] += 1
break
return stats
def find_duplicates(directory: Path) -> dict[int, list[Path]]:
"""
Finds files with the same size as duplicate candidates.
"""
size_map: dict[int, list[Path]] = {}
for file in directory.rglob("*"):
if not file.is_file():
continue
size = file.stat().st_size
size_map.setdefault(size, []).append(file)
# Only cases where 2 or more files have the same size
return {
size: files
for size, files in size_map.items()
if len(files) > 1
}
Practical Example 2 — Project Directory Analysis
from pathlib import Path
from collections import Counter
from dataclasses import dataclass
@dataclass
class ProjectStats:
total_files: int
total_size_bytes: int
extension_counts: Counter
largest_files: list[tuple[Path, int]]
def analyze_project(root: Path, top_n: int = 10) -> ProjectStats:
"""Analyzes project directory structure."""
ext_counter: Counter = Counter()
file_sizes: list[tuple[Path, int]] = []
# Exclude .git, __pycache__, node_modules
IGNORE = {".git", "__pycache__", "node_modules", ".venv", "venv"}
def should_skip(p: Path) -> bool:
return any(part in IGNORE for part in p.parts)
for file in root.rglob("*"):
if not file.is_file() or should_skip(file):
continue
size = file.stat().st_size
ext_counter[file.suffix or "(no ext)"] += 1
file_sizes.append((file, size))
file_sizes.sort(key=lambda x: x[1], reverse=True)
return ProjectStats(
total_files=len(file_sizes),
total_size_bytes=sum(s for _, s in file_sizes),
extension_counts=ext_counter,
largest_files=file_sizes[:top_n],
)
Expert Tips
Tip 1 — with_suffix() / with_name() / with_stem()
p = Path("report.csv")
p.with_suffix(".xlsx") # Path('report.xlsx')
p.with_name("output.csv") # Path('output.csv')
p.with_stem("summary") # Path('summary.csv') — Python 3.9+
Tip 2 — When you need to pass a path as a string
import os
p = Path("data/file.txt")
str(p) # 'data/file.txt'
p.as_posix() # 'data/file.txt' (Unix-style slashes)
os.fspath(p) # Same as str — official protocol
Tip 3 — Path objects work directly with most standard library functions
In Python 3.6+, os.path, open(), and shutil functions accept Path objects directly. No need to explicitly convert with str().
Tip 4 — Computing relative paths
base = Path("/home/user/projects")
target = Path("/home/user/projects/app/main.py")
relative = target.relative_to(base)
# PosixPath('app/main.py')