파일 읽기/쓰기 — open(), 모드, 인코딩, with 문 패턴

파일은 데이터를 영속적으로 저장하는 가장 기본적인 수단입니다. Python의 open() 함수는 텍스트와 바이너리 파일 모두를 다룰 수 있으며, with 문과 결합하면 안전하고 간결한 파일 처리 코드를 작성할 수 있습니다.

open() 함수 — 모드 완전 정리

open(file, mode='r', encoding=None, errors=None, buffering=-1)

모드	의미	파일 없으면
`'r'`	읽기 (기본값)	FileNotFoundError
`'w'`	쓰기 (덮어씀)	새로 생성
`'a'`	추가 (append)	새로 생성
`'x'`	배타적 생성	새로 생성; 이미 있으면 FileExistsError
`'b'`	바이너리 모드 (r/w/a와 조합)	—
`'+'`	읽기+쓰기 (r/w/a와 조합)	—

# 자주 쓰이는 조합
open("file.txt", "r")        # 텍스트 읽기
open("file.txt", "w")        # 텍스트 쓰기 (덮어씀)
open("file.txt", "a")        # 텍스트 추가
open("file.bin", "rb")       # 바이너리 읽기
open("file.bin", "wb")       # 바이너리 쓰기
open("file.txt", "r+")       # 텍스트 읽기+쓰기
open("file.txt", "x")        # 새 파일만 생성 (기존 파일 있으면 에러)

인코딩 — utf-8과 cp949

텍스트 파일은 반드시 올바른 인코딩을 지정해야 합니다.

# UTF-8 (Linux, macOS, 대부분의 현대 시스템)
with open("data.txt", "r", encoding="utf-8") as f:
    content = f.read()

# CP949 / EUC-KR (Windows 한국어 레거시)
with open("legacy.txt", "r", encoding="cp949") as f:
    content = f.read()

# 인코딩 오류 처리 옵션
open("file.txt", "r", encoding="utf-8", errors="ignore")   # 오류 무시
open("file.txt", "r", encoding="utf-8", errors="replace")  # ? 로 대체
open("file.txt", "r", encoding="utf-8", errors="strict")   # 오류 시 예외 (기본값)

권장 사항: 새 파일은 항상 encoding="utf-8"을 명시하십시오. encoding을 지정하지 않으면 시스템 기본 인코딩(Windows는 CP949, Linux는 UTF-8)이 사용되어 플랫폼 간 이식성이 떨어집니다.

with 문으로 파일 안전 처리

# with 없이 — 예외 발생 시 file.close() 누락 위험
file = open("data.txt", "r")
content = file.read()
file.close()  # 예외 발생 시 이 줄이 실행되지 않음

# with 문 사용 — 예외 발생 시에도 자동으로 파일 닫힘
with open("data.txt", "r", encoding="utf-8") as f:
    content = f.read()
# 이 지점에서 f는 자동으로 닫힘

# 여러 파일 동시에 열기 (Python 3.10+ 괄호 문법)
with (
    open("input.txt", "r", encoding="utf-8") as fin,
    open("output.txt", "w", encoding="utf-8") as fout,
):
    fout.write(fin.read())

읽기 메서드

with open("data.txt", "r", encoding="utf-8") as f:
    # read(): 전체 내용을 문자열로
    content = f.read()

    # read(n): n바이트/글자 읽기
    f.seek(0)              # 처음으로 되돌리기
    chunk = f.read(100)    # 100글자 읽기

    # readline(): 한 줄씩 읽기 (줄바꿈 포함)
    f.seek(0)
    line = f.readline()    # "첫 번째 줄\n"

    # readlines(): 모든 줄을 리스트로
    f.seek(0)
    lines = f.readlines()  # ["첫 번째 줄\n", "두 번째 줄\n", ...]

대용량 파일은 반복자(iterator)를 사용 하십시오. readlines()는 전체 파일을 메모리에 올립니다.

# 메모리 효율적인 줄 단위 읽기
with open("large.log", "r", encoding="utf-8") as f:
    for line in f:              # 파일 객체 자체가 이터레이터
        line = line.rstrip("\n")
        process(line)

쓰기 메서드

lines = ["첫 번째 줄", "두 번째 줄", "세 번째 줄"]

with open("output.txt", "w", encoding="utf-8") as f:
    # write(): 문자열 쓰기 (줄바꿈 포함 X — 직접 추가)
    f.write("Hello, World!\n")

    # writelines(): 리스트 내 문자열을 차례로 쓰기 (줄바꿈 포함 X)
    f.writelines(line + "\n" for line in lines)

# 추가 모드
with open("log.txt", "a", encoding="utf-8") as f:
    f.write("새 로그 항목\n")

tell() / seek() — 파일 포인터

with open("data.txt", "r", encoding="utf-8") as f:
    print(f.tell())    # 0 — 파일 시작

    f.read(10)
    print(f.tell())    # 현재 위치 (바이트)

    f.seek(0)          # 처음으로
    f.seek(0, 2)       # 파일 끝으로 (2 = SEEK_END)
    print(f.tell())    # 파일 크기 (바이트)

    f.seek(5, 0)       # 처음에서 5바이트 위치 (0 = SEEK_SET)
    f.seek(5, 1)       # 현재에서 5바이트 뒤 (1 = SEEK_CUR)

주의: 텍스트 모드에서 seek()는 바이트 위치 기준입니다. 멀티바이트 인코딩(UTF-8 한글 등)에서는 tell()이 반환한 값만 seek()에 전달해야 안전합니다.

실전 예제 1 — 로그 파일 처리

from pathlib import Path
from datetime import datetime
import re

def parse_access_log(log_path: str) -> list[dict]:
    """
    Apache/Nginx 액세스 로그를 파싱합니다.
    각 줄: IP - - [날짜] "METHOD /path HTTP/1.1" STATUS SIZE
    """
    pattern = re.compile(
        r'(?P<ip>\S+) \S+ \S+ \[(?P<time>[^\]]+)\] '
        r'"(?P<method>\S+) (?P<path>\S+) \S+" '
        r'(?P<status>\d+) (?P<size>\S+)'
    )
    records = []

    with open(log_path, "r", encoding="utf-8", errors="replace") as f:
        for lineno, line in enumerate(f, 1):
            m = pattern.match(line.strip())
            if m:
                records.append({
                    "ip": m.group("ip"),
                    "time": m.group("time"),
                    "method": m.group("method"),
                    "path": m.group("path"),
                    "status": int(m.group("status")),
                    "size": m.group("size"),
                })
            else:
                print(f"[경고] {lineno}행 파싱 실패: {line[:60]!r}")

    return records

실전 예제 2 — 설정 파일 읽기 (INI 스타일)

import configparser
from pathlib import Path

def load_config(config_path: str | Path) -> dict:
    """
    INI 형식 설정 파일을 읽어 딕셔너리로 반환합니다.
    파일이 없으면 기본값을 반환합니다.
    """
    defaults = {
        "database": {"host": "localhost", "port": "5432"},
        "cache": {"ttl": "300", "max_size": "1000"},
    }

    config = configparser.ConfigParser()
    try:
        files_read = config.read(config_path, encoding="utf-8")
        if not files_read:
            print(f"설정 파일 없음. 기본값 사용: {config_path}")
            return defaults
    except configparser.Error as e:
        print(f"설정 파일 파싱 실패: {e}")
        return defaults

    return {
        section: dict(config[section])
        for section in config.sections()
    }

실전 예제 3 — 원자적 파일 쓰기

파일 쓰기 도중 오류가 발생하면 기존 파일이 손상될 수 있습니다. 임시 파일에 쓴 뒤 교체하는 방식으로 원자성을 보장합니다.

import os
import tempfile
from pathlib import Path

def atomic_write(path: str | Path, content: str, encoding: str = "utf-8") -> None:
    """
    임시 파일에 쓴 뒤 원자적으로 대상 파일과 교체합니다.
    쓰기 도중 오류가 발생해도 원본 파일은 손상되지 않습니다.
    """
    path = Path(path)
    dir_path = path.parent

    # 같은 디렉터리에 임시 파일 생성 (같은 파일시스템)
    fd, tmp_path = tempfile.mkstemp(dir=dir_path, suffix=".tmp")
    try:
        with os.fdopen(fd, "w", encoding=encoding) as f:
            f.write(content)
        # 쓰기 완료 후 원자적 교체
        os.replace(tmp_path, path)
    except Exception:
        # 실패 시 임시 파일 정리
        try:
            os.unlink(tmp_path)
        except OSError:
            pass
        raise

고수 팁

팁 1 — print() 함수로 파일에 쓰기

with open("output.txt", "w", encoding="utf-8") as f:
    print("안녕하세요", file=f)       # \n 자동 추가
    print("이름:", "Alice", file=f)
    print("---", file=f, flush=True)  # 즉시 디스크에 씀

팁 2 — 바이너리 파일 처리

# 이미지 파일 복사
with open("src.jpg", "rb") as src, open("dst.jpg", "wb") as dst:
    while chunk := src.read(8192):  # Walrus 연산자 + 청크 읽기
        dst.write(chunk)

팁 3 — io.StringIO — 메모리 파일

import io

# 문자열을 파일처럼 다루기 (테스트, 데이터 변환)
buf = io.StringIO()
buf.write("Hello\n")
buf.write("World\n")
content = buf.getvalue()   # "Hello\nWorld\n"

# CSV 파싱 테스트에 유용
import csv
csv_data = "name,age\nAlice,30\nBob,25"
reader = csv.DictReader(io.StringIO(csv_data))
for row in reader:
    print(row)

팁 4 — 파일 존재 여부와 권한 확인

from pathlib import Path

p = Path("data.txt")
p.exists()        # 존재 여부
p.is_file()       # 파일인지
p.is_dir()        # 디렉터리인지
os.access(p, os.R_OK)  # 읽기 권한
os.access(p, os.W_OK)  # 쓰기 권한

open() 함수 — 모드 완전 정리​

인코딩 — utf-8과 cp949​

with 문으로 파일 안전 처리​

읽기 메서드​

쓰기 메서드​

tell() / seek() — 파일 포인터​

실전 예제 1 — 로그 파일 처리​

실전 예제 2 — 설정 파일 읽기 (INI 스타일)​

실전 예제 3 — 원자적 파일 쓰기​

고수 팁​