File Read/Write — open(), Modes, Encoding, with Statement Patterns
Files are the most basic means of persistently storing data. Python's open() function can handle both text and binary files, and when combined with the with statement, you can write safe and concise file processing code.
open() Function — Complete Mode Reference
open(file, mode='r', encoding=None, errors=None, buffering=-1)
| Mode | Meaning | If file doesn't exist |
|---|---|---|
'r' | Read (default) | FileNotFoundError |
'w' | Write (overwrites) | Creates new |
'a' | Append | Creates new |
'x' | Exclusive creation | Creates new; FileExistsError if exists |
'b' | Binary mode (combine with r/w/a) | — |
'+' | Read+write (combine with r/w/a) | — |
# Common combinations
open("file.txt", "r") # Text read
open("file.txt", "w") # Text write (overwrites)
open("file.txt", "a") # Text append
open("file.bin", "rb") # Binary read
open("file.bin", "wb") # Binary write
open("file.txt", "r+") # Text read+write
open("file.txt", "x") # Create new file only (error if exists)
Encoding — utf-8 and cp949
Text files must always specify the correct encoding.
# UTF-8 (Linux, macOS, most modern systems)
with open("data.txt", "r", encoding="utf-8") as f:
content = f.read()
# CP949 / EUC-KR (Windows Korean legacy)
with open("legacy.txt", "r", encoding="cp949") as f:
content = f.read()
# Encoding error handling options
open("file.txt", "r", encoding="utf-8", errors="ignore") # Ignore errors
open("file.txt", "r", encoding="utf-8", errors="replace") # Replace with ?
open("file.txt", "r", encoding="utf-8", errors="strict") # Raise on error (default)
Recommendation: Always explicitly specify encoding="utf-8" for new files. Without specifying encoding, the system default encoding is used (CP949 on Windows, UTF-8 on Linux), which reduces cross-platform portability.
Safe File Handling with the with Statement
# Without with — risk of missing file.close() on exception
file = open("data.txt", "r")
content = file.read()
file.close() # This line won't execute if an exception occurs
# Using with — file is automatically closed even on exception
with open("data.txt", "r", encoding="utf-8") as f:
content = f.read()
# f is automatically closed at this point
# Opening multiple files at once (Python 3.10+ parenthesis syntax)
with (
open("input.txt", "r", encoding="utf-8") as fin,
open("output.txt", "w", encoding="utf-8") as fout,
):
fout.write(fin.read())
Read Methods
with open("data.txt", "r", encoding="utf-8") as f:
# read(): entire content as a string
content = f.read()
# read(n): read n bytes/characters
f.seek(0) # Return to start
chunk = f.read(100) # Read 100 characters
# readline(): read one line at a time (includes newline)
f.seek(0)
line = f.readline() # "first line\n"
# readlines(): all lines as a list
f.seek(0)
lines = f.readlines() # ["first line\n", "second line\n", ...]
Use an iterator for large files. readlines() loads the entire file into memory.
# Memory-efficient line-by-line reading
with open("large.log", "r", encoding="utf-8") as f:
for line in f: # The file object itself is an iterator
line = line.rstrip("\n")
process(line)
Write Methods
lines = ["first line", "second line", "third line"]
with open("output.txt", "w", encoding="utf-8") as f:
# write(): write string (no automatic newline)
f.write("Hello, World!\n")
# writelines(): write strings in list sequentially (no automatic newline)
f.writelines(line + "\n" for line in lines)
# Append mode
with open("log.txt", "a", encoding="utf-8") as f:
f.write("New log entry\n")
tell() / seek() — File Pointer
with open("data.txt", "r", encoding="utf-8") as f:
print(f.tell()) # 0 — start of file
f.read(10)
print(f.tell()) # Current position (bytes)
f.seek(0) # Return to start
f.seek(0, 2) # Move to end of file (2 = SEEK_END)
print(f.tell()) # File size (bytes)
f.seek(5, 0) # 5 bytes from start (0 = SEEK_SET)
f.seek(5, 1) # 5 bytes from current position (1 = SEEK_CUR)
Warning: In text mode, seek() uses byte positions. For multi-byte encodings (UTF-8 Korean, etc.), only values returned by tell() should be passed to seek() for safety.
Practical Example 1 — Log File Processing
from pathlib import Path
from datetime import datetime
import re
def parse_access_log(log_path: str) -> list[dict]:
"""
Parses Apache/Nginx access logs.
Each line: IP - - [date] "METHOD /path HTTP/1.1" STATUS SIZE
"""
pattern = re.compile(
r'(?P<ip>\S+) \S+ \S+ \[(?P<time>[^\]]+)\] '
r'"(?P<method>\S+) (?P<path>\S+) \S+" '
r'(?P<status>\d+) (?P<size>\S+)'
)
records = []
with open(log_path, "r", encoding="utf-8", errors="replace") as f:
for lineno, line in enumerate(f, 1):
m = pattern.match(line.strip())
if m:
records.append({
"ip": m.group("ip"),
"time": m.group("time"),
"method": m.group("method"),
"path": m.group("path"),
"status": int(m.group("status")),
"size": m.group("size"),
})
else:
print(f"[Warning] Line {lineno} parse failure: {line[:60]!r}")
return records
Practical Example 2 — Reading Config Files (INI Style)
import configparser
from pathlib import Path
def load_config(config_path: str | Path) -> dict:
"""
Reads an INI format config file and returns it as a dictionary.
Returns defaults if file doesn't exist.
"""
defaults = {
"database": {"host": "localhost", "port": "5432"},
"cache": {"ttl": "300", "max_size": "1000"},
}
config = configparser.ConfigParser()
try:
files_read = config.read(config_path, encoding="utf-8")
if not files_read:
print(f"No config file. Using defaults: {config_path}")
return defaults
except configparser.Error as e:
print(f"Config file parse failure: {e}")
return defaults
return {
section: dict(config[section])
for section in config.sections()
}
Practical Example 3 — Atomic File Write
If an error occurs during file writing, the original file may become corrupted. Writing to a temporary file and then replacing guarantees atomicity.
import os
import tempfile
from pathlib import Path
def atomic_write(path: str | Path, content: str, encoding: str = "utf-8") -> None:
"""
Writes to a temporary file and then atomically replaces the target file.
The original file is not corrupted even if writing fails.
"""
path = Path(path)
dir_path = path.parent
# Create temporary file in the same directory (same filesystem)
fd, tmp_path = tempfile.mkstemp(dir=dir_path, suffix=".tmp")
try:
with os.fdopen(fd, "w", encoding=encoding) as f:
f.write(content)
# Atomically replace after write completes
os.replace(tmp_path, path)
except Exception:
# Clean up temp file on failure
try:
os.unlink(tmp_path)
except OSError:
pass
raise
Expert Tips
Tip 1 — Writing to a file with print()
with open("output.txt", "w", encoding="utf-8") as f:
print("Hello", file=f) # Automatic \n
print("Name:", "Alice", file=f)
print("---", file=f, flush=True) # Immediately flush to disk
Tip 2 — Binary file processing
# Copy an image file
with open("src.jpg", "rb") as src, open("dst.jpg", "wb") as dst:
while chunk := src.read(8192): # Walrus operator + chunk reading
dst.write(chunk)
Tip 3 — io.StringIO — In-memory file
import io
# Use string as a file object (testing, data transformation)
buf = io.StringIO()
buf.write("Hello\n")
buf.write("World\n")
content = buf.getvalue() # "Hello\nWorld\n"
# Useful for CSV parsing tests
import csv
csv_data = "name,age\nAlice,30\nBob,25"
reader = csv.DictReader(io.StringIO(csv_data))
for row in reader:
print(row)
Tip 4 — Checking file existence and permissions
from pathlib import Path
p = Path("data.txt")
p.exists() # Whether it exists
p.is_file() # Whether it's a file
p.is_dir() # Whether it's a directory
os.access(p, os.R_OK) # Read permission
os.access(p, os.W_OK) # Write permission