pathlib.Path — 현대적 파일 경로 처리 완전 정복

Python 3.4에서 도입된 pathlib 모듈은 파일 경로를 문자열이 아닌 객체 로 다룹니다. os.path의 함수들을 메서드로 대체하고, / 연산자로 경로를 직관적으로 조합할 수 있습니다.

pathlib.Path 기본 사용법

from pathlib import Path

# 현재 디렉터리
p = Path(".")
p = Path.cwd()       # 절대 경로로 현재 디렉터리

# 홈 디렉터리
home = Path.home()   # /home/user  or  C:\Users\user

# 절대 경로
p = Path("/etc/hosts")           # Unix
p = Path("C:/Users/user/doc")    # Windows (슬래시도 작동)

# 상대 경로
p = Path("data/input.csv")

경로 생성 — Path()와 / 연산자

/ 연산자로 경로 구성요소를 이어 붙입니다.

from pathlib import Path

base = Path("/home/user")
data_dir = base / "projects" / "myapp" / "data"
# PosixPath('/home/user/projects/myapp/data')

config = base / "config" / "settings.json"
# PosixPath('/home/user/config/settings.json')

# 문자열과 혼합
filename = "report.csv"
output = data_dir / filename

Path.resolve() 로 심볼릭 링크와 ..을 해소한 절대 경로를 얻습니다.

p = Path("../sibling/file.txt")
print(p.resolve())  # /home/user/sibling/file.txt

경로 속성

p = Path("/home/user/projects/app/main.py")

p.name       # 'main.py'           — 파일명 (확장자 포함)
p.stem       # 'main'              — 파일명 (확장자 제외)
p.suffix     # '.py'               — 확장자
p.suffixes   # ['.py']             — 복수 확장자 (e.g. ['.tar', '.gz'])
p.parent     # /home/user/projects/app
p.parents    # 모든 상위 경로들의 시퀀스
p.parts      # ('/', 'home', 'user', 'projects', 'app', 'main.py')
p.root       # '/'
p.drive      # '' (Unix) or 'C:' (Windows)

# 상태 확인
p.exists()          # 존재 여부
p.is_file()         # 파일인지
p.is_dir()          # 디렉터리인지
p.is_symlink()      # 심볼릭 링크인지
p.is_absolute()     # 절대 경로인지

파일 읽기/쓰기

from pathlib import Path

p = Path("data.txt")

# 텍스트 읽기/쓰기
content = p.read_text(encoding="utf-8")
p.write_text("새 내용\n", encoding="utf-8")

# 바이너리 읽기/쓰기
data = p.read_bytes()
p.write_bytes(b"\x89PNG\r\n")

# open()과 동일하게 사용 가능
with p.open("r", encoding="utf-8") as f:
    for line in f:
        print(line.rstrip())

디렉터리 탐색 — iterdir, glob, rglob

iterdir — 직접 자식 목록

from pathlib import Path

p = Path(".")
for child in p.iterdir():
    kind = "DIR " if child.is_dir() else "FILE"
    print(f"{kind} {child.name}")

glob — 패턴 매칭 (단일 깊이 포함)

from pathlib import Path

p = Path("src")

# 현재 디렉터리의 .py 파일
for f in p.glob("*.py"):
    print(f)

# 모든 하위 디렉터리의 .py 파일 (**재귀)
for f in p.glob("**/*.py"):
    print(f)

rglob — 재귀 glob

# p.glob("**/*.py")와 동일
for f in Path("src").rglob("*.py"):
    print(f)

# 디렉터리만
for d in Path(".").rglob("*"):
    if d.is_dir():
        print(d)

파일 조작 — mkdir, rename, unlink, copy

from pathlib import Path
import shutil

p = Path("new_dir")

# 디렉터리 생성
p.mkdir()                       # 단순 생성
p.mkdir(parents=True, exist_ok=True)  # 상위 포함, 이미 있어도 OK

# 이름 변경 / 이동
p.rename("renamed_dir")                # 같은 디렉터리
p.replace("other/dir")                 # 다른 위치 이동 (덮어씀 허용)

# 파일 삭제
Path("temp.txt").unlink()
Path("temp.txt").unlink(missing_ok=True)  # 없어도 오류 없음

# 디렉터리 삭제 (비어있을 때만)
Path("empty_dir").rmdir()

# 디렉터리 전체 삭제 (shutil 필요)
shutil.rmtree(Path("full_dir"))

# 파일 복사 (shutil 사용)
shutil.copy2(Path("src.txt"), Path("dst.txt"))

파일 메타데이터

from pathlib import Path
import datetime

p = Path("data.txt")
stat = p.stat()

print(stat.st_size)   # 파일 크기 (바이트)
print(stat.st_mtime)  # 수정 시각 (Unix timestamp)

# 사람이 읽기 좋은 형식
mtime = datetime.datetime.fromtimestamp(stat.st_mtime)
print(mtime.strftime("%Y-%m-%d %H:%M:%S"))

# 크기를 KB/MB로
size_kb = stat.st_size / 1024
size_mb = stat.st_size / 1024 / 1024

os.path vs pathlib 비교

작업	os.path (구식)	pathlib (현대식)
경로 결합	`os.path.join(a, b)`	`a / b`
파일명	`os.path.basename(p)`	`p.name`
디렉터리	`os.path.dirname(p)`	`p.parent`
확장자	`os.path.splitext(p)[1]`	`p.suffix`
존재 여부	`os.path.exists(p)`	`p.exists()`
절대 경로	`os.path.abspath(p)`	`p.resolve()`
파일인지	`os.path.isfile(p)`	`p.is_file()`
디렉터리인지	`os.path.isdir(p)`	`p.is_dir()`

실전 예제 — 파일 정리 스크립트

from pathlib import Path
from datetime import datetime, timedelta
import shutil

def organize_downloads(
    downloads: Path,
    archive: Path,
    days_old: int = 30
) -> dict[str, int]:
    """
    다운로드 폴더의 파일을 확장자별로 분류하고
    오래된 파일을 아카이브로 이동합니다.
    """
    archive.mkdir(parents=True, exist_ok=True)
    cutoff = datetime.now() - timedelta(days=days_old)
    stats = {"moved": 0, "organized": 0, "errors": 0}

    # 확장자별 분류 규칙
    categories = {
        "images": {".jpg", ".jpeg", ".png", ".gif", ".webp"},
        "documents": {".pdf", ".docx", ".xlsx", ".pptx", ".txt"},
        "videos": {".mp4", ".mkv", ".avi", ".mov"},
        "archives": {".zip", ".tar", ".gz", ".7z"},
    }

    for file in downloads.iterdir():
        if not file.is_file():
            continue

        mtime = datetime.fromtimestamp(file.stat().st_mtime)

        # 오래된 파일 → 아카이브 이동
        if mtime < cutoff:
            dest = archive / file.name
            try:
                file.rename(dest)
                stats["moved"] += 1
                continue
            except OSError as e:
                print(f"이동 실패 {file.name}: {e}")
                stats["errors"] += 1
                continue

        # 확장자별 분류
        suffix = file.suffix.lower()
        for category, extensions in categories.items():
            if suffix in extensions:
                dest_dir = downloads / category
                dest_dir.mkdir(exist_ok=True)
                try:
                    file.rename(dest_dir / file.name)
                    stats["organized"] += 1
                except OSError:
                    stats["errors"] += 1
                break

    return stats


def find_duplicates(directory: Path) -> dict[int, list[Path]]:
    """
    같은 크기의 파일을 중복 후보로 찾습니다.
    """
    size_map: dict[int, list[Path]] = {}

    for file in directory.rglob("*"):
        if not file.is_file():
            continue
        size = file.stat().st_size
        size_map.setdefault(size, []).append(file)

    # 크기가 같은 파일이 2개 이상인 경우만
    return {
        size: files
        for size, files in size_map.items()
        if len(files) > 1
    }

실전 예제 2 — 프로젝트 디렉터리 분석

from pathlib import Path
from collections import Counter
from dataclasses import dataclass

@dataclass
class ProjectStats:
    total_files: int
    total_size_bytes: int
    extension_counts: Counter
    largest_files: list[tuple[Path, int]]

def analyze_project(root: Path, top_n: int = 10) -> ProjectStats:
    """프로젝트 디렉터리 구조를 분석합니다."""
    ext_counter: Counter = Counter()
    file_sizes: list[tuple[Path, int]] = []

    # .git, __pycache__, node_modules 제외
    IGNORE = {".git", "__pycache__", "node_modules", ".venv", "venv"}

    def should_skip(p: Path) -> bool:
        return any(part in IGNORE for part in p.parts)

    for file in root.rglob("*"):
        if not file.is_file() or should_skip(file):
            continue
        size = file.stat().st_size
        ext_counter[file.suffix or "(no ext)"] += 1
        file_sizes.append((file, size))

    file_sizes.sort(key=lambda x: x[1], reverse=True)

    return ProjectStats(
        total_files=len(file_sizes),
        total_size_bytes=sum(s for _, s in file_sizes),
        extension_counts=ext_counter,
        largest_files=file_sizes[:top_n],
    )

고수 팁

팁 1 — with_suffix() / with_name() / with_stem()

p = Path("report.csv")
p.with_suffix(".xlsx")   # Path('report.xlsx')
p.with_name("output.csv")  # Path('output.csv')
p.with_stem("summary")   # Path('summary.csv')  — Python 3.9+

팁 2 — 경로를 문자열로 전달해야 할 때

import os
p = Path("data/file.txt")

str(p)          # 'data/file.txt'
p.as_posix()    # 'data/file.txt' (Unix 스타일 슬래시)
os.fspath(p)    # str과 동일 — 공식 프로토콜

팁 3 — Path 객체는 대부분의 표준 라이브러리 함수에서 직접 사용 가능

Python 3.6+에서 os.path, open(), shutil 함수들은 Path 객체를 직접 받습니다. 명시적으로 str()로 변환할 필요가 없습니다.

팁 4 — 상대 경로 계산

base = Path("/home/user/projects")
target = Path("/home/user/projects/app/main.py")

relative = target.relative_to(base)
# PosixPath('app/main.py')

pathlib.Path 기본 사용법​

경로 생성 — Path()와 / 연산자​

경로 속성​

파일 읽기/쓰기​

디렉터리 탐색 — iterdir, glob, rglob​

iterdir — 직접 자식 목록​

glob — 패턴 매칭 (단일 깊이 포함)​

rglob — 재귀 glob​

파일 조작 — mkdir, rename, unlink, copy​

파일 메타데이터​

os.path vs pathlib 비교​

실전 예제 — 파일 정리 스크립트​

실전 예제 2 — 프로젝트 디렉터리 분석​

고수 팁​