실전 패턴과 커리큘럼 정리

신경망에서의 텐서 흐름 추적

간단한 분류 모델을 통해 데이터가 텐서로 어떻게 변환되고 흘러가는지 추적합니다.

import torch
import torch.nn as nn

# 간단한 2층 분류 네트워크
class SimpleClassifier(nn.Module):
    def __init__(self, in_features, hidden, n_classes):
        super().__init__()
        self.fc1 = nn.Linear(in_features, hidden)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden, n_classes)

    def forward(self, x):
        # x: (배치, in_features)
        print(f"  입력:    {x.shape}")

        h = self.fc1(x)
        print(f"  선형 1:  {h.shape}")  # (배치, hidden)

        h = self.relu(h)
        print(f"  ReLU:    {h.shape}")  # (배치, hidden) — shape 불변

        out = self.fc2(h)
        print(f"  출력:    {out.shape}")  # (배치, n_classes)
        return out

model = SimpleClassifier(784, 256, 10)

x = torch.randn(32, 784)   # 32개 샘플, 784 특성 (MNIST 이미지)
print("순전파 shape 추적:")
out = model(x)
# 입력:    torch.Size([32, 784])
# 선형 1:  torch.Size([32, 256])
# ReLU:    torch.Size([32, 256])
# 출력:    torch.Size([32, 10])

완전한 학습 루프 패턴

실제 학습 루프에서 텐서가 어떻게 사용되는지 단계별로 확인합니다.

import torch
import torch.nn as nn
import torch.optim as optim
from torch.cuda.amp import autocast, GradScaler

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = SimpleClassifier(784, 256, 10).to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()
scaler = GradScaler()   # 혼합 정밀도용

# 더미 데이터 (실제로는 DataLoader 사용)
def get_batch(batch_size=64):
    x = torch.randn(batch_size, 784)
    y = torch.randint(0, 10, (batch_size,))
    return x, y

print("=== 학습 루프 ===")
for epoch in range(3):
    model.train()   # 학습 모드 (드롭아웃 등 활성화)

    # 스텝 1: 그래디언트 초기화
    optimizer.zero_grad()

    # 스텝 2: 데이터를 GPU로 이동
    x, y = get_batch()
    x = x.to(device)   # (64, 784) — CPU → GPU
    y = y.to(device)   # (64,)    — 레이블도 같은 디바이스

    # 스텝 3: 순전파 (혼합 정밀도)
    with autocast():
        logits = model(x)           # (64, 10) — 각 클래스 점수
        loss = criterion(logits, y)  # 스칼라 — 교차 엔트로피 손실

    # 스텝 4: 역전파
    scaler.scale(loss).backward()   # 그래디언트 계산

    # 스텝 5: 파라미터 업데이트
    scaler.step(optimizer)
    scaler.update()

    print(f"에폭 {epoch+1}: loss = {loss.item():.4f}")

검증 루프 패턴

검증 시에는 그래디언트가 필요 없으므로 torch.no_grad() 또는 torch.inference_mode() 를 사용합니다.

def validate(model, device, n_batches=10):
    model.eval()   # 평가 모드 (드롭아웃 비활성화)
    total_correct = 0
    total_samples = 0

    with torch.no_grad():   # 그래디언트 추적 비활성화
        for _ in range(n_batches):
            x, y = get_batch()
            x = x.to(device)
            y = y.to(device)

            logits = model(x)                    # (64, 10)
            preds = logits.argmax(dim=1)         # (64,) — 최대값 인덱스
            correct = (preds == y).sum().item()  # 정답 수

            total_correct += correct
            total_samples += y.size(0)

    accuracy = total_correct / total_samples
    return accuracy

acc = validate(model, device)
print(f"검증 정확도: {acc:.2%}")

torch.inference_mode() — 최적화된 추론 모드

torch.no_grad() 보다 더 엄격한 버전으로, 추론 전용 코드에서 더 빠릅니다.

# no_grad vs inference_mode 비교
x = torch.randn(32, 784).to(device)

# no_grad: 그래디언트 추적만 비활성화
with torch.no_grad():
    out1 = model(x)
    print(out1.requires_grad)  # False

# inference_mode: 그래디언트 + 버전 추적 모두 비활성화 (더 빠름)
with torch.inference_mode():
    out2 = model(x)
    print(out2.requires_grad)   # False
    # out2를 requires_grad=True 텐서 연산에 쓰면 오류 — 완전한 분리

# 데코레이터 방식도 지원
@torch.inference_mode()
def predict(model, x):
    return model(x).argmax(dim=1)

preds = predict(model, x)
print(preds.shape)  # torch.Size([32])

모델 저장과 로드

state_dict 방식 — 권장

# 저장: 파라미터만 저장 (모델 구조는 코드에서 정의)
save_path = '/tmp/model_weights.pth'
torch.save(model.state_dict(), save_path)
print("모델 저장 완료")

# 로드: 동일한 구조의 모델에 파라미터 적용
loaded_model = SimpleClassifier(784, 256, 10)
loaded_model.load_state_dict(torch.load(save_path, map_location='cpu'))
loaded_model.to(device)
loaded_model.eval()
print("모델 로드 완료")

# 결과 비교
with torch.inference_mode():
    x_test = torch.randn(8, 784).to(device)
    out_orig = model(x_test)
    out_load = loaded_model(x_test)
    print(torch.allclose(out_orig, out_load))  # True

체크포인트 저장 — 학습 재개용

# 에폭, 옵티마이저 상태까지 포함한 체크포인트
checkpoint = {
    'epoch': 3,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss.item(),
}
torch.save(checkpoint, '/tmp/checkpoint.pth')

# 체크포인트에서 학습 재개
ckpt = torch.load('/tmp/checkpoint.pth', map_location=device)
model.load_state_dict(ckpt['model_state_dict'])
optimizer.load_state_dict(ckpt['optimizer_state_dict'])
start_epoch = ckpt['epoch']
print(f"에폭 {start_epoch}부터 재개")

전체 커리큘럼 정리

이 강좌에서 배운 내용을 한눈에 정리합니다.

섹션	주제	핵심 개념
01	텐서 이해하기	스칼라/벡터/행렬/고차원 텐서
02	텐서 생성	zeros, ones, rand, arange
03	속성과 dtype	shape, device, dtype, 형변환
04	인덱싱과 슬라이싱	기본/고급/불리언 인덱싱
05	텐서 연산	수학/행렬/집계 연산
06	형태 변환	reshape, view, squeeze, permute
07	브로드캐스팅	3가지 규칙, 실전 패턴
08	Autograd와 GPU	자동 미분, CUDA, 최적화

다음 학습 가이드

PyTorch 텐서의 기초를 마쳤다면 다음 주제로 나아갈 수 있습니다.

단기 (1~2주):

torch.nn 모듈 심화: Conv2d, BatchNorm, Dropout, Attention
데이터 파이프라인: Dataset, DataLoader, 커스텀 변환

중기 (1~2개월):

컴퓨터 비전: torchvision, ResNet, EfficientNet 직접 구현
자연어 처리: 트랜스포머, 어텐션 메커니즘 구현
강화학습: 정책 그래디언트, DQN

장기 (3개월+):

분산 학습: DDP, FSDP, DeepSpeed
모델 최적화: 양자화(Quantization), 프루닝(Pruning), 지식 증류
커스텀 CUDA 커널 작성

퀴즈를 불러오는 중...