딥러닝 입문 — PyTorch 기초

PyTorch는 Meta(Facebook)가 개발한 딥러닝 프레임워크입니다. 동적 계산 그래프 와 직관적인 Python 스타일로 연구와 실무 모두에서 널리 사용됩니다.

설치

# CPU 버전
pip install torch torchvision torchaudio

# GPU 버전 (CUDA 12.1)
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

텐서 (Tensor)

import torch
import numpy as np

# 텐서 생성
t1 = torch.tensor([1, 2, 3, 4, 5])
t2 = torch.tensor([[1.0, 2.0], [3.0, 4.0]])

print(t2.shape)   # torch.Size([2, 2])
print(t2.dtype)   # torch.float32
print(t2.device)  # cpu

# 특수 텐서
torch.zeros(3, 4)           # 0으로 채움
torch.ones(2, 3)            # 1로 채움
torch.eye(4)                # 단위행렬
torch.rand(3, 3)            # 균등분포 [0, 1)
torch.randn(3, 3)           # 표준정규분포
torch.arange(0, 10, 2)      # [0, 2, 4, 6, 8]

# NumPy ↔ Tensor 변환
arr = np.array([1, 2, 3])
t = torch.from_numpy(arr)   # 메모리 공유
arr2 = t.numpy()            # 다시 NumPy로

# GPU로 이동
device = "cuda" if torch.cuda.is_available() else "cpu"
t_gpu = t2.to(device)

텐서 연산

a = torch.tensor([[1., 2.], [3., 4.]])
b = torch.tensor([[5., 6.], [7., 8.]])

# 원소별 연산
print(a + b)
print(a * b)

# 행렬 곱
print(a @ b)           # matmul
print(torch.mm(a, b))  # 동일

# 통계
print(a.sum())         # 10.0
print(a.mean())        # 2.5
print(a.max())         # 4.0
print(a.argmax())      # 3 (전체 인덱스)

# 형태 변환
c = torch.arange(12).reshape(3, 4)
print(c.shape)   # [3, 4]
print(c.T.shape) # [4, 3]
print(c.view(-1).shape)   # [12] — reshape (연속 메모리 필요)
print(c.flatten().shape)  # [12] — 항상 복사

# 차원 조작
x = torch.randn(3, 4)
print(x.unsqueeze(0).shape)  # [1, 3, 4] 차원 추가
print(x.unsqueeze(1).shape)  # [3, 1, 4]
y = torch.randn(1, 3, 4)
print(y.squeeze(0).shape)    # [3, 4] 차원 제거

자동 미분 (Autograd)

# requires_grad=True: 기울기 추적
x = torch.tensor(2.0, requires_grad=True)
y = x **3 + 2 * x + 1   # y = x³ + 2x + 1

y.backward()              # 역전파
print(x.grad)             # dy/dx = 3x² + 2 = 14

# 기울기 초기화 (반드시 필요)
x.grad.zero_()

# 기울기 추적 비활성화 (추론 시)
with torch.no_grad():
    pred = model(x)  # 메모리 효율적

신경망 구현 — nn.Module

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader


# 모델 정의
class MLPClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, dropout=0.3):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.BatchNorm1d(hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, output_dim),
        )

    def forward(self, x):
        return self.network(x)


# 모델 생성
model = MLPClassifier(input_dim=30, hidden_dim=128, output_dim=2)
print(model)

# 파라미터 수 확인
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"전체 파라미터: {total_params:,}")
print(f"학습 가능 파라미터: {trainable_params:,}")

학습 루프

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 데이터 준비
X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)

# Tensor 변환
X_train_t = torch.FloatTensor(X_train_s)
y_train_t = torch.LongTensor(y_train)
X_test_t = torch.FloatTensor(X_test_s)
y_test_t = torch.LongTensor(y_test)

# DataLoader
train_ds = TensorDataset(X_train_t, y_train_t)
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)

# 모델, 손실함수, 옵티마이저
model = MLPClassifier(input_dim=30, hidden_dim=64, output_dim=2)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5)

# 학습 루프
def train_epoch(model, loader, criterion, optimizer):
    model.train()
    total_loss, correct = 0.0, 0
    for X_batch, y_batch in loader:
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * len(X_batch)
        correct += (output.argmax(1) == y_batch).sum().item()
    return total_loss / len(loader.dataset), correct / len(loader.dataset)


def evaluate(model, X, y, criterion):
    model.eval()
    with torch.no_grad():
        output = model(X)
        loss = criterion(output, y).item()
        acc = (output.argmax(1) == y).float().mean().item()
    return loss, acc


# 학습
best_val_acc = 0
for epoch in range(100):
    train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer)
    val_loss, val_acc = evaluate(model, X_test_t, y_test_t, criterion)
    scheduler.step(val_loss)

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), "best_model.pt")

    if (epoch + 1) % 20 == 0:
        print(f"Epoch {epoch+1:3d} | 학습 손실: {train_loss:.4f} 정확도: {train_acc:.4f} | "
              f"검증 손실: {val_loss:.4f} 정확도: {val_acc:.4f}")

print(f"\n최고 검증 정확도: {best_val_acc:.4f}")

모델 저장과 불러오기

# 저장
torch.save(model.state_dict(), "model_weights.pt")    # 가중치만 (권장)
torch.save(model, "model_full.pt")                    # 전체 모델

# 불러오기
model_loaded = MLPClassifier(input_dim=30, hidden_dim=64, output_dim=2)
model_loaded.load_state_dict(torch.load("model_weights.pt", map_location=device))
model_loaded.eval()

# 추론
with torch.no_grad():
    predictions = model_loaded(X_test_t)
    probs = torch.softmax(predictions, dim=1)
    predicted_classes = predictions.argmax(1)

활성화 함수와 손실 함수

# 활성화 함수
nn.ReLU()       # max(0, x) — 은닉층 표준
nn.GELU()       # Transformer에서 사용
nn.Sigmoid()    # 이진 분류 출력
nn.Softmax(dim=1)  # 다중 분류 출력

# 손실 함수
nn.CrossEntropyLoss()    # 다중 분류 (Softmax 내장)
nn.BCEWithLogitsLoss()   # 이진 분류 (Sigmoid 내장)
nn.MSELoss()             # 회귀
nn.L1Loss()              # MAE 회귀

# 옵티마이저
optim.Adam(params, lr=0.001)         # 대부분의 경우 기본값
optim.AdamW(params, lr=0.001, weight_decay=0.01)  # Transformer
optim.SGD(params, lr=0.01, momentum=0.9)

정리

개념	설명
Tensor	GPU 가속 배열 (`requires_grad`로 자동 미분)
`nn.Module`	신경망 기본 클래스
`nn.Sequential`	레이어 순차 연결
`forward()`	순전파 정의
`loss.backward()`	역전파 (기울기 계산)
`optimizer.step()`	파라미터 업데이트
`model.eval()` + `no_grad()`	추론 모드 (Dropout/BN 비활성화)

딥러닝의 핵심 흐름: 순전파 → 손실 계산 → 역전파 → 파라미터 업데이트

설치​

텐서 (Tensor)​

텐서 연산​

자동 미분 (Autograd)​

신경망 구현 — nn.Module​

학습 루프​

모델 저장과 불러오기​

활성화 함수와 손실 함수​

정리​

설치

텐서 (Tensor)

텐서 연산

자동 미분 (Autograd)

신경망 구현 — nn.Module

학습 루프

모델 저장과 불러오기

활성화 함수와 손실 함수

정리