Skip to main content
Advertisement

pathlib.Path — Modern File Path Handling Mastery

Introduced in Python 3.4, the pathlib module treats file paths as objects rather than strings. It replaces os.path functions with methods and lets you combine paths intuitively using the / operator.


pathlib.Path Basics

from pathlib import Path

# Current directory
p = Path(".")
p = Path.cwd() # Current directory as absolute path

# Home directory
home = Path.home() # /home/user or C:\Users\user

# Absolute path
p = Path("/etc/hosts") # Unix
p = Path("C:/Users/user/doc") # Windows (forward slashes work)

# Relative path
p = Path("data/input.csv")

Path Construction — Path() and / Operator

Use the / operator to join path components.

from pathlib import Path

base = Path("/home/user")
data_dir = base / "projects" / "myapp" / "data"
# PosixPath('/home/user/projects/myapp/data')

config = base / "config" / "settings.json"
# PosixPath('/home/user/config/settings.json')

# Mix with strings
filename = "report.csv"
output = data_dir / filename

Use Path.resolve() to get the absolute path with symlinks and .. resolved.

p = Path("../sibling/file.txt")
print(p.resolve()) # /home/user/sibling/file.txt

Path Attributes

p = Path("/home/user/projects/app/main.py")

p.name # 'main.py' — filename with extension
p.stem # 'main' — filename without extension
p.suffix # '.py' — extension
p.suffixes # ['.py'] — multiple extensions (e.g. ['.tar', '.gz'])
p.parent # /home/user/projects/app
p.parents # sequence of all parent paths
p.parts # ('/', 'home', 'user', 'projects', 'app', 'main.py')
p.root # '/'
p.drive # '' (Unix) or 'C:' (Windows)

# Status checks
p.exists() # Whether it exists
p.is_file() # Whether it's a file
p.is_dir() # Whether it's a directory
p.is_symlink() # Whether it's a symbolic link
p.is_absolute() # Whether it's an absolute path

Reading and Writing Files

from pathlib import Path

p = Path("data.txt")

# Text read/write
content = p.read_text(encoding="utf-8")
p.write_text("New content\n", encoding="utf-8")

# Binary read/write
data = p.read_bytes()
p.write_bytes(b"\x89PNG\r\n")

# Usable just like open()
with p.open("r", encoding="utf-8") as f:
for line in f:
print(line.rstrip())

Directory Traversal — iterdir, glob, rglob

iterdir — Direct Children

from pathlib import Path

p = Path(".")
for child in p.iterdir():
kind = "DIR " if child.is_dir() else "FILE"
print(f"{kind} {child.name}")

glob — Pattern Matching (Including Single Depth)

from pathlib import Path

p = Path("src")

# .py files in the current directory
for f in p.glob("*.py"):
print(f)

# .py files in all subdirectories (** recursive)
for f in p.glob("**/*.py"):
print(f)

rglob — Recursive glob

# Same as p.glob("**/*.py")
for f in Path("src").rglob("*.py"):
print(f)

# Directories only
for d in Path(".").rglob("*"):
if d.is_dir():
print(d)

from pathlib import Path
import shutil

p = Path("new_dir")

# Create directory
p.mkdir() # Simple creation
p.mkdir(parents=True, exist_ok=True) # Including parents, OK if already exists

# Rename / move
p.rename("renamed_dir") # Same directory
p.replace("other/dir") # Move to different location (overwrites)

# Delete file
Path("temp.txt").unlink()
Path("temp.txt").unlink(missing_ok=True) # No error if missing

# Delete directory (only when empty)
Path("empty_dir").rmdir()

# Delete entire directory (requires shutil)
shutil.rmtree(Path("full_dir"))

# Copy file (using shutil)
shutil.copy2(Path("src.txt"), Path("dst.txt"))

File Metadata

from pathlib import Path
import datetime

p = Path("data.txt")
stat = p.stat()

print(stat.st_size) # File size (bytes)
print(stat.st_mtime) # Modification time (Unix timestamp)

# Human-readable format
mtime = datetime.datetime.fromtimestamp(stat.st_mtime)
print(mtime.strftime("%Y-%m-%d %H:%M:%S"))

# Size in KB/MB
size_kb = stat.st_size / 1024
size_mb = stat.st_size / 1024 / 1024

os.path vs pathlib Comparison

Operationos.path (old-style)pathlib (modern)
Path joinos.path.join(a, b)a / b
Filenameos.path.basename(p)p.name
Directoryos.path.dirname(p)p.parent
Extensionos.path.splitext(p)[1]p.suffix
Existsos.path.exists(p)p.exists()
Absolute pathos.path.abspath(p)p.resolve()
Is fileos.path.isfile(p)p.is_file()
Is directoryos.path.isdir(p)p.is_dir()

Practical Example — File Organization Script

from pathlib import Path
from datetime import datetime, timedelta
import shutil

def organize_downloads(
downloads: Path,
archive: Path,
days_old: int = 30
) -> dict[str, int]:
"""
Categorizes files in the downloads folder by extension
and moves old files to the archive.
"""
archive.mkdir(parents=True, exist_ok=True)
cutoff = datetime.now() - timedelta(days=days_old)
stats = {"moved": 0, "organized": 0, "errors": 0}

# Categorization rules by extension
categories = {
"images": {".jpg", ".jpeg", ".png", ".gif", ".webp"},
"documents": {".pdf", ".docx", ".xlsx", ".pptx", ".txt"},
"videos": {".mp4", ".mkv", ".avi", ".mov"},
"archives": {".zip", ".tar", ".gz", ".7z"},
}

for file in downloads.iterdir():
if not file.is_file():
continue

mtime = datetime.fromtimestamp(file.stat().st_mtime)

# Old file → move to archive
if mtime < cutoff:
dest = archive / file.name
try:
file.rename(dest)
stats["moved"] += 1
continue
except OSError as e:
print(f"Move failed {file.name}: {e}")
stats["errors"] += 1
continue

# Categorize by extension
suffix = file.suffix.lower()
for category, extensions in categories.items():
if suffix in extensions:
dest_dir = downloads / category
dest_dir.mkdir(exist_ok=True)
try:
file.rename(dest_dir / file.name)
stats["organized"] += 1
except OSError:
stats["errors"] += 1
break

return stats


def find_duplicates(directory: Path) -> dict[int, list[Path]]:
"""
Finds files with the same size as duplicate candidates.
"""
size_map: dict[int, list[Path]] = {}

for file in directory.rglob("*"):
if not file.is_file():
continue
size = file.stat().st_size
size_map.setdefault(size, []).append(file)

# Only cases where 2 or more files have the same size
return {
size: files
for size, files in size_map.items()
if len(files) > 1
}

Practical Example 2 — Project Directory Analysis

from pathlib import Path
from collections import Counter
from dataclasses import dataclass

@dataclass
class ProjectStats:
total_files: int
total_size_bytes: int
extension_counts: Counter
largest_files: list[tuple[Path, int]]

def analyze_project(root: Path, top_n: int = 10) -> ProjectStats:
"""Analyzes project directory structure."""
ext_counter: Counter = Counter()
file_sizes: list[tuple[Path, int]] = []

# Exclude .git, __pycache__, node_modules
IGNORE = {".git", "__pycache__", "node_modules", ".venv", "venv"}

def should_skip(p: Path) -> bool:
return any(part in IGNORE for part in p.parts)

for file in root.rglob("*"):
if not file.is_file() or should_skip(file):
continue
size = file.stat().st_size
ext_counter[file.suffix or "(no ext)"] += 1
file_sizes.append((file, size))

file_sizes.sort(key=lambda x: x[1], reverse=True)

return ProjectStats(
total_files=len(file_sizes),
total_size_bytes=sum(s for _, s in file_sizes),
extension_counts=ext_counter,
largest_files=file_sizes[:top_n],
)

Expert Tips

Tip 1 — with_suffix() / with_name() / with_stem()

p = Path("report.csv")
p.with_suffix(".xlsx") # Path('report.xlsx')
p.with_name("output.csv") # Path('output.csv')
p.with_stem("summary") # Path('summary.csv') — Python 3.9+

Tip 2 — When you need to pass a path as a string

import os
p = Path("data/file.txt")

str(p) # 'data/file.txt'
p.as_posix() # 'data/file.txt' (Unix-style slashes)
os.fspath(p) # Same as str — official protocol

Tip 3 — Path objects work directly with most standard library functions

In Python 3.6+, os.path, open(), and shutil functions accept Path objects directly. No need to explicitly convert with str().

Tip 4 — Computing relative paths

base = Path("/home/user/projects")
target = Path("/home/user/projects/app/main.py")

relative = target.relative_to(base)
# PosixPath('app/main.py')
Advertisement