pathlib.Path — Modern File Path Handling Mastery

Introduced in Python 3.4, the pathlib module treats file paths as objects rather than strings. It replaces os.path functions with methods and lets you combine paths intuitively using the / operator.

pathlib.Path Basics

from pathlib import Path

# Current directory
p = Path(".")
p = Path.cwd()       # Current directory as absolute path

# Home directory
home = Path.home()   # /home/user  or  C:\Users\user

# Absolute path
p = Path("/etc/hosts")           # Unix
p = Path("C:/Users/user/doc")    # Windows (forward slashes work)

# Relative path
p = Path("data/input.csv")

Path Construction — Path() and / Operator

Use the / operator to join path components.

from pathlib import Path

base = Path("/home/user")
data_dir = base / "projects" / "myapp" / "data"
# PosixPath('/home/user/projects/myapp/data')

config = base / "config" / "settings.json"
# PosixPath('/home/user/config/settings.json')

# Mix with strings
filename = "report.csv"
output = data_dir / filename

Use Path.resolve() to get the absolute path with symlinks and .. resolved.

p = Path("../sibling/file.txt")
print(p.resolve())  # /home/user/sibling/file.txt

Path Attributes

p = Path("/home/user/projects/app/main.py")

p.name       # 'main.py'           — filename with extension
p.stem       # 'main'              — filename without extension
p.suffix     # '.py'               — extension
p.suffixes   # ['.py']             — multiple extensions (e.g. ['.tar', '.gz'])
p.parent     # /home/user/projects/app
p.parents    # sequence of all parent paths
p.parts      # ('/', 'home', 'user', 'projects', 'app', 'main.py')
p.root       # '/'
p.drive      # '' (Unix) or 'C:' (Windows)

# Status checks
p.exists()          # Whether it exists
p.is_file()         # Whether it's a file
p.is_dir()          # Whether it's a directory
p.is_symlink()      # Whether it's a symbolic link
p.is_absolute()     # Whether it's an absolute path

Reading and Writing Files

from pathlib import Path

p = Path("data.txt")

# Text read/write
content = p.read_text(encoding="utf-8")
p.write_text("New content\n", encoding="utf-8")

# Binary read/write
data = p.read_bytes()
p.write_bytes(b"\x89PNG\r\n")

# Usable just like open()
with p.open("r", encoding="utf-8") as f:
    for line in f:
        print(line.rstrip())

Directory Traversal — iterdir, glob, rglob

iterdir — Direct Children

from pathlib import Path

p = Path(".")
for child in p.iterdir():
    kind = "DIR " if child.is_dir() else "FILE"
    print(f"{kind} {child.name}")

glob — Pattern Matching (Including Single Depth)

from pathlib import Path

p = Path("src")

# .py files in the current directory
for f in p.glob("*.py"):
    print(f)

# .py files in all subdirectories (**recursive)
for f in p.glob("**/*.py"):
    print(f)

rglob — Recursive glob

# Same as p.glob("**/*.py")
for f in Path("src").rglob("*.py"):
    print(f)

# Directories only
for d in Path(".").rglob("*"):
    if d.is_dir():
        print(d)

File Operations — mkdir, rename, unlink, copy

from pathlib import Path
import shutil

p = Path("new_dir")

# Create directory
p.mkdir()                       # Simple creation
p.mkdir(parents=True, exist_ok=True)  # Including parents, OK if already exists

# Rename / move
p.rename("renamed_dir")                # Same directory
p.replace("other/dir")                 # Move to different location (overwrites)

# Delete file
Path("temp.txt").unlink()
Path("temp.txt").unlink(missing_ok=True)  # No error if missing

# Delete directory (only when empty)
Path("empty_dir").rmdir()

# Delete entire directory (requires shutil)
shutil.rmtree(Path("full_dir"))

# Copy file (using shutil)
shutil.copy2(Path("src.txt"), Path("dst.txt"))

File Metadata

from pathlib import Path
import datetime

p = Path("data.txt")
stat = p.stat()

print(stat.st_size)   # File size (bytes)
print(stat.st_mtime)  # Modification time (Unix timestamp)

# Human-readable format
mtime = datetime.datetime.fromtimestamp(stat.st_mtime)
print(mtime.strftime("%Y-%m-%d %H:%M:%S"))

# Size in KB/MB
size_kb = stat.st_size / 1024
size_mb = stat.st_size / 1024 / 1024

os.path vs pathlib Comparison

Operation	os.path (old-style)	pathlib (modern)
Path join	`os.path.join(a, b)`	`a / b`
Filename	`os.path.basename(p)`	`p.name`
Directory	`os.path.dirname(p)`	`p.parent`
Extension	`os.path.splitext(p)[1]`	`p.suffix`
Exists	`os.path.exists(p)`	`p.exists()`
Absolute path	`os.path.abspath(p)`	`p.resolve()`
Is file	`os.path.isfile(p)`	`p.is_file()`
Is directory	`os.path.isdir(p)`	`p.is_dir()`

Practical Example — File Organization Script

from pathlib import Path
from datetime import datetime, timedelta
import shutil

def organize_downloads(
    downloads: Path,
    archive: Path,
    days_old: int = 30
) -> dict[str, int]:
    """
    Categorizes files in the downloads folder by extension
    and moves old files to the archive.
    """
    archive.mkdir(parents=True, exist_ok=True)
    cutoff = datetime.now() - timedelta(days=days_old)
    stats = {"moved": 0, "organized": 0, "errors": 0}

    # Categorization rules by extension
    categories = {
        "images": {".jpg", ".jpeg", ".png", ".gif", ".webp"},
        "documents": {".pdf", ".docx", ".xlsx", ".pptx", ".txt"},
        "videos": {".mp4", ".mkv", ".avi", ".mov"},
        "archives": {".zip", ".tar", ".gz", ".7z"},
    }

    for file in downloads.iterdir():
        if not file.is_file():
            continue

        mtime = datetime.fromtimestamp(file.stat().st_mtime)

        # Old file → move to archive
        if mtime < cutoff:
            dest = archive / file.name
            try:
                file.rename(dest)
                stats["moved"] += 1
                continue
            except OSError as e:
                print(f"Move failed {file.name}: {e}")
                stats["errors"] += 1
                continue

        # Categorize by extension
        suffix = file.suffix.lower()
        for category, extensions in categories.items():
            if suffix in extensions:
                dest_dir = downloads / category
                dest_dir.mkdir(exist_ok=True)
                try:
                    file.rename(dest_dir / file.name)
                    stats["organized"] += 1
                except OSError:
                    stats["errors"] += 1
                break

    return stats


def find_duplicates(directory: Path) -> dict[int, list[Path]]:
    """
    Finds files with the same size as duplicate candidates.
    """
    size_map: dict[int, list[Path]] = {}

    for file in directory.rglob("*"):
        if not file.is_file():
            continue
        size = file.stat().st_size
        size_map.setdefault(size, []).append(file)

    # Only cases where 2 or more files have the same size
    return {
        size: files
        for size, files in size_map.items()
        if len(files) > 1
    }

Practical Example 2 — Project Directory Analysis

from pathlib import Path
from collections import Counter
from dataclasses import dataclass

@dataclass
class ProjectStats:
    total_files: int
    total_size_bytes: int
    extension_counts: Counter
    largest_files: list[tuple[Path, int]]

def analyze_project(root: Path, top_n: int = 10) -> ProjectStats:
    """Analyzes project directory structure."""
    ext_counter: Counter = Counter()
    file_sizes: list[tuple[Path, int]] = []

    # Exclude .git, __pycache__, node_modules
    IGNORE = {".git", "__pycache__", "node_modules", ".venv", "venv"}

    def should_skip(p: Path) -> bool:
        return any(part in IGNORE for part in p.parts)

    for file in root.rglob("*"):
        if not file.is_file() or should_skip(file):
            continue
        size = file.stat().st_size
        ext_counter[file.suffix or "(no ext)"] += 1
        file_sizes.append((file, size))

    file_sizes.sort(key=lambda x: x[1], reverse=True)

    return ProjectStats(
        total_files=len(file_sizes),
        total_size_bytes=sum(s for _, s in file_sizes),
        extension_counts=ext_counter,
        largest_files=file_sizes[:top_n],
    )

Expert Tips

Tip 1 — with_suffix() / with_name() / with_stem()

p = Path("report.csv")
p.with_suffix(".xlsx")    # Path('report.xlsx')
p.with_name("output.csv") # Path('output.csv')
p.with_stem("summary")    # Path('summary.csv')  — Python 3.9+

Tip 2 — When you need to pass a path as a string

import os
p = Path("data/file.txt")

str(p)          # 'data/file.txt'
p.as_posix()    # 'data/file.txt' (Unix-style slashes)
os.fspath(p)    # Same as str — official protocol

Tip 3 — Path objects work directly with most standard library functions

In Python 3.6+, os.path, open(), and shutil functions accept Path objects directly. No need to explicitly convert with str().

Tip 4 — Computing relative paths

base = Path("/home/user/projects")
target = Path("/home/user/projects/app/main.py")

relative = target.relative_to(base)
# PosixPath('app/main.py')

pathlib.Path Basics​

Path Construction — Path() and / Operator​

Path Attributes​

Reading and Writing Files​

Directory Traversal — iterdir, glob, rglob​

iterdir — Direct Children​

glob — Pattern Matching (Including Single Depth)​

rglob — Recursive glob​

File Operations — mkdir, rename, unlink, copy​

File Metadata​

os.path vs pathlib Comparison​

Practical Example — File Organization Script​

Practical Example 2 — Project Directory Analysis​

Expert Tips​