Ch 2.3 컬렉션 타입 심화

파이썬은 강력한 내장 컬렉션 타입을 제공합니다. 올바른 자료구조 선택이 코드의 성능과 가독성을 결정합니다.

1. list — 가변 순서 컬렉션

리스트는 순서가 있고 변경 가능(Mutable) 한 컬렉션입니다. 가장 자주 사용되는 자료구조입니다.

# 리스트 생성
fruits = ["apple", "banana", "cherry"]
numbers = [1, 2, 3, 4, 5]
mixed = [1, "hello", True, 3.14, None]   # 여러 타입 혼합 가능
empty = []
from_range = list(range(5))              # [0, 1, 2, 3, 4]

# 인덱싱
print(fruits[0])     # apple (첫 번째)
print(fruits[-1])    # cherry (마지막)

# 슬라이싱 [start:stop:step]
print(numbers[1:4])   # [2, 3, 4]
print(numbers[::2])   # [1, 3, 5] (2칸씩 건너뜀)
print(numbers[::-1])  # [5, 4, 3, 2, 1] (역순)

주요 메서드

fruits = ["apple", "banana"]

# 추가
fruits.append("cherry")            # 끝에 추가
fruits.insert(1, "blueberry")      # 인덱스 위치에 삽입
fruits.extend(["date", "elderberry"])  # 여러 요소 추가

# 제거
fruits.remove("banana")            # 값으로 제거 (첫 번째 일치)
popped = fruits.pop()              # 마지막 요소 제거 후 반환
popped_at = fruits.pop(0)         # 인덱스 위치 요소 제거

# 검색
print("apple" in fruits)           # True
print(fruits.index("cherry"))      # cherry의 인덱스
print(fruits.count("apple"))       # apple 개수

# 정렬
nums = [3, 1, 4, 1, 5, 9, 2, 6]
nums.sort()                        # 원본 수정 (오름차순)
nums.sort(reverse=True)           # 내림차순
sorted_nums = sorted(nums)        # 새 리스트 반환 (원본 유지)

# 커스텀 정렬
words = ["banana", "apple", "cherry", "date"]
words.sort(key=len)                # 길이 기준 정렬
print(words)  # ['date', 'apple', 'banana', 'cherry']

# 기타
fruits.reverse()                   # 뒤집기
print(len(fruits))                 # 길이
fruits.clear()                     # 전체 삭제
copy = fruits.copy()              # 얕은 복사

리스트 컴프리헨션

# 기본 컴프리헨션
squares = [x**2 for x in range(10)]
print(squares)   # [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

# 조건부 컴프리헨션
evens = [x for x in range(20) if x % 2 == 0]
print(evens)     # [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

# 중첩 컴프리헨션
matrix = [[i * j for j in range(1, 4)] for i in range(1, 4)]
print(matrix)    # [[1, 2, 3], [2, 4, 6], [3, 6, 9]]

# 실전 예시: 단어 길이 필터링
words = ["python", "is", "a", "great", "language"]
long_words = [w.upper() for w in words if len(w) > 3]
print(long_words)  # ['PYTHON', 'GREAT', 'LANGUAGE']

2. tuple — 불변 순서 컬렉션

튜플은 순서가 있고 변경 불가(Immutable) 한 컬렉션입니다. 한 번 만들면 수정할 수 없습니다.

# 튜플 생성
point = (3, 5)
rgb = (255, 128, 0)
single = (42,)          # 요소 하나짜리 — 반드시 쉼표 필요!
empty = ()

# 괄호 없어도 튜플
coordinates = 10, 20    # (10, 20)으로 자동 패킹

# 불변성 확인
# point[0] = 10  # TypeError: 'tuple' object does not support item assignment

# 언패킹
x, y = point
print(f"x={x}, y={y}")   # x=3, y=5

# 함수에서 여러 값 반환 시 자주 사용
def get_min_max(numbers):
    return min(numbers), max(numbers)

minimum, maximum = get_min_max([3, 1, 4, 1, 5, 9])
print(f"최솟값: {minimum}, 최댓값: {maximum}")

네임드 튜플 (namedtuple)

from collections import namedtuple

# 네임드 튜플 정의
Point = namedtuple("Point", ["x", "y"])
Color = namedtuple("Color", ["red", "green", "blue"])

# 생성
p = Point(3, 5)
orange = Color(255, 165, 0)

# 이름으로 접근 (가독성 향상)
print(p.x, p.y)                    # 3 5
print(orange.red, orange.green)    # 255 165

# 인덱스로도 접근 가능
print(p[0], p[1])                  # 3 5

# dict로 변환
print(p._asdict())                 # {'x': 3, 'y': 5}

# Python 3.6+ typing.NamedTuple (더 현대적)
from typing import NamedTuple

class Employee(NamedTuple):
    name: str
    department: str
    salary: float = 50000.0

emp = Employee("Alice", "Engineering", 80000)
print(emp.name)      # Alice
print(emp.salary)    # 80000

3. set — 중복 없는 집합

셋은 순서가 없고 중복 없는 컬렉션입니다.

# 집합 생성
fruits = {"apple", "banana", "cherry"}
numbers = {1, 2, 3, 3, 3}         # 중복 자동 제거
print(numbers)                     # {1, 2, 3}

# 리스트의 중복 제거에 자주 사용
data = [1, 2, 2, 3, 3, 3, 4]
unique = list(set(data))
print(sorted(unique))              # [1, 2, 3, 4]

# 빈 집합 — {} 는 딕셔너리! set()을 사용
empty_set = set()
print(type(empty_set))             # <class 'set'>
print(type({}))                    # <class 'dict'>

집합 연산

a = {1, 2, 3, 4, 5}
b = {4, 5, 6, 7, 8}

# 합집합
print(a | b)            # {1, 2, 3, 4, 5, 6, 7, 8}
print(a.union(b))       # 동일

# 교집합
print(a & b)            # {4, 5}
print(a.intersection(b))

# 차집합
print(a - b)            # {1, 2, 3}
print(a.difference(b))

# 대칭 차집합 (합집합 - 교집합)
print(a ^ b)            # {1, 2, 3, 6, 7, 8}
print(a.symmetric_difference(b))

# 부분집합 확인
print({1, 2}.issubset(a))    # True
print(a.issuperset({1, 2}))  # True

frozenset — 불변 집합

# frozenset: 변경 불가능한 집합, 딕셔너리 키로 사용 가능
fs = frozenset([1, 2, 3])
# fs.add(4)  # AttributeError: 수정 불가

# 딕셔너리 키로 활용
cache = {frozenset([1, 2]): "pair_12"}
print(cache[frozenset([1, 2])])    # pair_12

4. dict — 키-값 쌍

딕셔너리는 키-값(Key-Value) 쌍 으로 데이터를 저장하는 컬렉션입니다. Python 3.7+부터 삽입 순서 유지가 보장됩니다.

# 딕셔너리 생성
person = {"name": "Alice", "age": 30, "city": "Seoul"}
empty = {}
from_keys = dict.fromkeys(["a", "b", "c"], 0)   # {'a': 0, 'b': 0, 'c': 0}

# 접근
print(person["name"])              # Alice
print(person.get("email"))         # None (KeyError 없음)
print(person.get("email", "N/A"))  # N/A (기본값 지정)

# 수정 / 추가
person["age"] = 31                 # 기존 키 수정
person["email"] = "alice@example.com"  # 새 키 추가

# 삭제
del person["city"]
removed = person.pop("email")     # 제거 후 값 반환

딕셔너리 주요 메서드

config = {"host": "localhost", "port": 5432, "db": "mydb"}

# 키, 값, 아이템 조회
print(list(config.keys()))         # ['host', 'port', 'db']
print(list(config.values()))       # ['localhost', 5432, 'mydb']
print(list(config.items()))        # [('host', 'localhost'), ...]

# 순회
for key, value in config.items():
    print(f"{key}: {value}")

# update — 다른 딕셔너리로 업데이트
config.update({"port": 3306, "charset": "utf8"})

# setdefault — 키가 없을 때만 기본값 설정
config.setdefault("timeout", 30)
print(config["timeout"])           # 30
config.setdefault("timeout", 60)   # 이미 있으면 무시
print(config["timeout"])           # 30 (변경 안 됨)

# Python 3.9+ 딕셔너리 병합
defaults = {"timeout": 30, "retry": 3}
custom = {"timeout": 60, "host": "example.com"}

merged = defaults | custom         # custom이 우선
print(merged)
# {'timeout': 60, 'retry': 3, 'host': 'example.com'}

딕셔너리 컴프리헨션

# 기본 딕셔너리 컴프리헨션
squares = {x: x**2 for x in range(1, 6)}
print(squares)   # {1: 1, 2: 4, 3: 9, 4: 16, 5: 25}

# 조건부
even_squares = {x: x**2 for x in range(10) if x % 2 == 0}

# 딕셔너리 반전 (키-값 교환)
original = {"a": 1, "b": 2, "c": 3}
inverted = {v: k for k, v in original.items()}
print(inverted)  # {1: 'a', 2: 'b', 3: 'c'}

# 실전: 단어 빈도 계산
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]
frequency = {word: words.count(word) for word in set(words)}
print(frequency)  # {'apple': 3, 'banana': 2, 'cherry': 1}

5. 컬렉션 선택 가이드

상황	권장 자료구조
순서 있는 수정 가능 데이터	`list`
변경 없는 고정 데이터, 딕셔너리 키	`tuple`
중복 제거, 멤버십 테스트	`set`
키로 빠른 조회	`dict`
양쪽 끝 삽입/삭제 빠름	`collections.deque`
빈도 계산	`collections.Counter`
키 없을 때 기본값	`collections.defaultdict`

6. `collections` 모듈

from collections import Counter, defaultdict, OrderedDict, deque

# Counter — 요소 빈도 계산
scores = [85, 92, 85, 78, 92, 85, 90]
counter = Counter(scores)
print(counter)                         # Counter({85: 3, 92: 2, 78: 1, 90: 1})
print(counter.most_common(2))         # [(85, 3), (92, 2)]

text = "hello world"
char_count = Counter(text)
print(char_count['l'])                 # 3

# defaultdict — 없는 키 접근 시 기본값 자동 생성
word_list = ["apple", "banana", "apple", "cherry", "banana", "apple"]

# 일반 dict에서 키 없으면 KeyError
# 하지만 defaultdict은 기본값 생성
word_freq = defaultdict(int)
for word in word_list:
    word_freq[word] += 1
print(dict(word_freq))   # {'apple': 3, 'banana': 2, 'cherry': 1}

graph = defaultdict(list)
graph["A"].append("B")
graph["A"].append("C")
print(dict(graph))       # {'A': ['B', 'C']}

# deque — 양방향 큐 (앞/뒤 O(1) 삽입/삭제)
dq = deque([1, 2, 3])
dq.appendleft(0)         # 앞에 추가
dq.append(4)             # 뒤에 추가
print(dq)                # deque([0, 1, 2, 3, 4])

dq.popleft()             # 앞에서 제거
dq.rotate(1)             # 오른쪽으로 1칸 회전
print(dq)                # deque([4, 1, 2, 3])

# maxlen 지정 시 슬라이딩 윈도우로 활용
recent = deque(maxlen=3)
for x in range(7):
    recent.append(x)
    print(list(recent))
# [0]
# [0, 1]
# [0, 1, 2]
# [1, 2, 3]  ← maxlen 초과 시 가장 오래된 것 자동 제거
# ...

고수 팁: 자료구조 성능 비교

in 연산자 성능 차이:

import time

large_list = list(range(1_000_000))
large_set = set(range(1_000_000))
large_dict = {x: x for x in range(1_000_000)}

# 리스트 검색 O(n) — 느림
start = time.perf_counter()
999_999 in large_list
print(f"list: {time.perf_counter() - start:.6f}초")   # ~0.01초

# 셋 검색 O(1) — 빠름 (해시 기반)
start = time.perf_counter()
999_999 in large_set
print(f"set: {time.perf_counter() - start:.6f}초")    # ~0.000001초

# 딕셔너리 키 검색 O(1) — 빠름
start = time.perf_counter()
999_999 in large_dict
print(f"dict: {time.perf_counter() - start:.6f}초")   # ~0.000001초

리스트 vs 튜플 메모리:

import sys
lst = [1, 2, 3, 4, 5]
tpl = (1, 2, 3, 4, 5)
print(sys.getsizeof(lst))  # 104 bytes
print(sys.getsizeof(tpl))  # 80 bytes — 튜플이 더 작음

# 튜플 생성도 더 빠름
import timeit
print(timeit.timeit("[1, 2, 3]", number=10_000_000))  # ~0.8초
print(timeit.timeit("(1, 2, 3)", number=10_000_000))  # ~0.2초

결론: 변경이 필요 없는 데이터는 튜플을, 멤버십 테스트가 많은 경우 셋을 사용하세요.

컬렉션 타입을 마스터했습니다. 다음 챕터에서는 타입 간 형변환(Type Casting)을 다루겠습니다.

1. list — 가변 순서 컬렉션​

주요 메서드​

리스트 컴프리헨션​

2. tuple — 불변 순서 컬렉션​

네임드 튜플 (namedtuple)​

3. set — 중복 없는 집합​

집합 연산​

frozenset — 불변 집합​

4. dict — 키-값 쌍​

딕셔너리 주요 메서드​

딕셔너리 컴프리헨션​

5. 컬렉션 선택 가이드​

6. collections 모듈​