Ch 3.3 문자열 고급 활용

파이썬의 문자열은 단순한 텍스트 컨테이너를 넘어 강력한 포매팅, 조작 기능을 제공합니다. 현대 파이썬 개발에서 f-string은 필수입니다.

1. f-string 완전 정복 (Python 3.6+)

f-string은 f"..." 또는 f'...' 형식으로 문자열 앞에 f를 붙입니다. 중괄호 {} 안에 모든 파이썬 표현식을 넣을 수 있습니다.

기본 사용법

name = "Alice"
age = 30
score = 95.6789

# 기본 변수 삽입
print(f"이름: {name}, 나이: {age}")
# 이름: Alice, 나이: 30

# 표현식 지원
print(f"내년 나이: {age + 1}")
print(f"합격 여부: {'합격' if score >= 60 else '불합격'}")
print(f"이름 길이: {len(name)}")
print(f"대문자: {name.upper()}")

포매팅 스펙

pi = 3.14159265358979
price = 29900
text = "python"
number = 42

# 소수점 자리수
print(f"{pi:.2f}")          # 3.14 (소수점 2자리)
print(f"{pi:.5f}")          # 3.14159
print(f"{pi:10.2f}")        # '      3.14' (전체 10자리)

# 천 단위 구분자
print(f"{price:,}")         # 29,900
print(f"{price:,.2f}")      # 29,900.00

# 정렬
print(f"{text:<10}")        # 'python    ' (왼쪽 정렬, 10자리)
print(f"{text:>10}")        # '    python' (오른쪽 정렬)
print(f"{text:^10}")        # '  python  ' (가운데 정렬)
print(f"{text:*^10}")       # '**python**' (채움 문자 *)

# 정수 포매팅
print(f"{number:d}")        # 42    (10진수)
print(f"{number:b}")        # 101010 (2진수)
print(f"{number:o}")        # 52    (8진수)
print(f"{number:x}")        # 2a    (16진수)
print(f"{number:#x}")       # 0x2a  (접두사 포함)
print(f"{number:08b}")      # 00101010 (8자리 2진수)

# 부호 표시
print(f"{42:+d}")           # +42
print(f"{-42:+d}")          # -42
print(f"{42: d}")           # ' 42' (양수에 공백)

f-string 디버깅 (`f"{x=}"`) Python 3.8+

x = 10
y = 20
result = x + y

# 변수명과 값을 함께 출력 — 디버깅에 매우 유용!
print(f"{x=}")              # x=10
print(f"{y=}")              # y=20
print(f"{result=}")         # result=30
print(f"{x+y=}")            # x+y=30
print(f"{x * 2 + 1=}")      # x * 2 + 1=21

# 포매팅과 함께 사용
pi = 3.14159
print(f"{pi=:.3f}")         # pi=3.142

# 실전: 함수 디버깅
def calculate(a: float, b: float) -> float:
    result = a * b + a / b
    print(f"DEBUG: {a=}, {b=}, {result=:.4f}")
    return result

calculate(3.0, 4.0)
# DEBUG: a=3.0, b=4.0, result=12.7500

f-string 중첩

width = 10
fill_char = "*"
text = "hello"

# 중괄호 안에 변수 사용
print(f"{text:{fill_char}^{width}}")   # **hello***

# 동적 포매팅
for i in range(1, 4):
    decimals = i
    print(f"{3.14159:.{decimals}f}")
# 3.1
# 3.14
# 3.142

2. `str.format()`과 `%` 포매팅 (레거시)

현대 코드에서는 f-string을 권장하지만, 기존 코드를 읽을 때 필요합니다.

# str.format()
name, age = "Bob", 25
print("이름: {}, 나이: {}".format(name, age))
print("이름: {0}, 나이: {1}, 이름 반복: {0}".format(name, age))
print("이름: {name}, 나이: {age}".format(name=name, age=age))
print("π = {:.4f}".format(3.14159))
print("{:>10}".format("right"))   # '     right'

# % 포매팅 (가장 오래된 방식)
print("이름: %s, 나이: %d" % (name, age))
print("π = %.4f" % 3.14159)
print("%10s" % "right")           # '     right'

# 비교
value = 42
pi = 3.14159

# f-string (Python 3.6+) — 권장
print(f"{value:05d} | {pi:.3f}")    # 00042 | 3.142

# str.format()
print("{:05d} | {:.3f}".format(value, pi))

# % 포매팅
print("%05d | %.3f" % (value, pi))

3. 주요 문자열 메서드

text = "  Hello, Python World!  "

# 대소문자
print(text.upper())          # '  HELLO, PYTHON WORLD!  '
print(text.lower())          # '  hello, python world!  '
print(text.title())          # '  Hello, Python World!  '
print(text.capitalize())     # '  hello, python world!  ' → '  Hello, python world!  '
print(text.swapcase())       # 대소문자 교환

# 공백 제거
print(text.strip())          # 'Hello, Python World!'
print(text.lstrip())         # 'Hello, Python World!  '
print(text.rstrip())         # '  Hello, Python World!'
print(text.strip("! "))      # 'Hello, Python World'

# 분리와 결합
sentence = "Hello,World,Python"
words = sentence.split(",")
print(words)                 # ['Hello', 'World', 'Python']

lines = "줄1\n줄2\n줄3"
print(lines.splitlines())    # ['줄1', '줄2', '줄3']

# split의 최대 분리 횟수
print("a-b-c-d".split("-", 2))  # ['a', 'b', 'c-d']

# join — 리스트를 문자열로 결합
parts = ["사과", "바나나", "딸기"]
print(", ".join(parts))      # '사과, 바나나, 딸기'
print("-".join(["2024", "01", "15"]))   # '2024-01-15'
print("".join(["H", "e", "l", "l", "o"]))  # 'Hello'

# 검색
s = "Hello, World!"
print(s.find("World"))       # 7  (인덱스, 없으면 -1)
print(s.find("Java"))        # -1
print(s.index("World"))      # 7  (없으면 ValueError!)
print(s.count("l"))          # 3  (개수 세기)

# 확인
print(s.startswith("Hello")) # True
print(s.endswith("!"))       # True
print("12345".isdigit())     # True
print("hello".isalpha())     # True
print("hello123".isalnum())  # True
print("   ".isspace())       # True

# 치환
print(s.replace("World", "Python"))   # 'Hello, Python!'
print("aaa".replace("a", "b", 2))    # 'bba' (최대 2회)

# 채우기
print("42".zfill(5))         # '00042' (0으로 채우기)
print("hi".center(10, "-"))  # '----hi----'
print("hi".ljust(10, "."))   # 'hi........'
print("hi".rjust(10, "."))   # '........hi'

4. 문자열 슬라이싱 심화

s = "Hello, Python!"

# 기본 슬라이싱 [start:stop:step]
print(s[0:5])       # 'Hello'
print(s[7:])        # 'Python!'
print(s[:5])        # 'Hello'
print(s[-7:])       # 'Python!'
print(s[:-1])       # 'Hello, Python'

# step 활용
print(s[::2])       # 'Hlo yhn'  (2칸씩 건너뜀)
print(s[1::2])      # 'el,Pto!'  (1부터 2칸씩)
print(s[::-1])      # '!nohtyP ,olleH' (역순)

# 회문(Palindrome) 검사
def is_palindrome(text: str) -> bool:
    cleaned = "".join(c.lower() for c in text if c.isalnum())
    return cleaned == cleaned[::-1]

print(is_palindrome("racecar"))          # True
print(is_palindrome("A man a plan a canal Panama"))  # True
print(is_palindrome("hello"))            # False

# 슬라이싱으로 부분 문자열 추출
log_line = "2024-01-15 10:30:45 INFO User logged in"
date = log_line[:10]
time = log_line[11:19]
level = log_line[20:24]
message = log_line[25:]

print(f"날짜: {date}, 시간: {time}, 레벨: {level}, 메시지: {message}")

5. 문자열 불변성 — 왜 변경 불가인가?

text = "hello"
# text[0] = "H"  # TypeError: 'str' object does not support item assignment

# 문자열 수정은 항상 새 문자열 생성
text = "H" + text[1:]    # "Hello"

# 불변성의 장점: 딕셔너리 키, 집합 원소로 사용 가능
word_count = {"hello": 5, "world": 3}
word_set = {"apple", "banana", "apple"}   # 중복 제거됨

# 성능 시사점: 많은 수정이 필요할 때는 list로 변환 후 처리
def reverse_string(s: str) -> str:
    chars = list(s)       # 리스트로 변환 (가변)
    chars.reverse()
    return "".join(chars) # 다시 문자열로

# 또는 단순히
def reverse_string_v2(s: str) -> str:
    return s[::-1]

6. `str.encode()` / `bytes.decode()` — 인코딩 기초

# 문자열 → bytes (인코딩)
text = "안녕하세요, Python!"

utf8_bytes = text.encode("utf-8")
print(utf8_bytes)
print(type(utf8_bytes))      # <class 'bytes'>
print(len(utf8_bytes))       # 바이트 길이 (한글은 3바이트)

# bytes → 문자열 (디코딩)
decoded = utf8_bytes.decode("utf-8")
print(decoded)               # 안녕하세요, Python!

# 다양한 인코딩
euc_kr_bytes = text.encode("euc-kr")    # 한국어 EUC-KR
cp949_bytes = text.encode("cp949")      # Windows 한국어

# 파일 읽기/쓰기에서의 인코딩
# open("file.txt", "r", encoding="utf-8")  # 인코딩 명시 권장

# bytes 리터럴
raw_bytes = b"Hello, World!"
print(raw_bytes[0])           # 72 (H의 ASCII 코드)
print(raw_bytes.decode())     # 'Hello, World!'

# 인코딩 오류 처리
faulty = b"\xff\xfe 테스트"
print(faulty.decode("utf-8", errors="ignore"))   # 오류 문자 무시
print(faulty.decode("utf-8", errors="replace"))  # 오류 문자 → ?

고수 팁: str.join() vs + 성능, textwrap.dedent, 문자열 인터닝

join() vs + 연결 성능 차이:

import timeit

words = ["python"] * 10000

# + 연산자: O(n²) — 매번 새 문자열 생성
def concat_plus(words):
    result = ""
    for word in words:
        result += word + " "   # 매 루프마다 새 문자열 생성!
    return result

# join(): O(n) — 한 번에 처리
def concat_join(words):
    return " ".join(words)

time_plus = timeit.timeit(lambda: concat_plus(words), number=100)
time_join = timeit.timeit(lambda: concat_join(words), number=100)

print(f"+  연산: {time_plus:.4f}초")
print(f"join(): {time_join:.4f}초")
# join()이 수십~수백 배 빠름!

textwrap.dedent 로 들여쓰기 제거:

import textwrap

def get_sql_query() -> str:
    query = """
        SELECT user_id, user_name, email
        FROM users
        WHERE is_active = true
          AND created_at > '2024-01-01'
        ORDER BY created_at DESC
        LIMIT 100
    """
    return textwrap.dedent(query).strip()

print(get_sql_query())
# SELECT user_id, user_name, email
# FROM users
# ...  (들여쓰기 제거됨)

# textwrap.wrap — 긴 텍스트 줄바꿈
long_text = "파이썬은 1991년 귀도 반 로섬이 발표한 고급 프로그래밍 언어입니다. 읽기 쉽고 간결한 문법으로 다양한 분야에서 활용됩니다."
wrapped = textwrap.fill(long_text, width=40)
print(wrapped)

문자열 인터닝(Interning):

import sys

# 파이썬은 짧은 식별자 같은 문자열을 자동 인터닝
a = "hello"
b = "hello"
print(a is b)          # True (같은 객체!)

# 공백 포함 문자열은 인터닝 안 됨
c = "hello world"
d = "hello world"
print(c is d)          # False (보장 안 됨)

# sys.intern()으로 강제 인터닝
e = sys.intern("hello world")
f = sys.intern("hello world")
print(e is f)          # True

# 딕셔너리 키가 많은 경우 인터닝으로 메모리와 비교 속도 향상
# (수십만 개의 반복 문자열 처리 시 유용)

파이썬 문자열의 강력한 기능을 모두 살펴보았습니다. 다음 챕터에서는 조건 표현식과 Walrus 연산자를 통해 더욱 간결한 파이썬 코드 작성법을 알아보겠습니다.

1. f-string 완전 정복 (Python 3.6+)​

기본 사용법​

포매팅 스펙​

f-string 디버깅 (f"{x=}") Python 3.8+​

f-string 중첩​

2. str.format()과 % 포매팅 (레거시)​

3. 주요 문자열 메서드​

4. 문자열 슬라이싱 심화​

5. 문자열 불변성 — 왜 변경 불가인가?​

6. str.encode() / bytes.decode() — 인코딩 기초​