섹션 요약: 변환 패턴과 실전 레시피

expand() vs repeat()

두 함수 모두 텐서를 더 큰 크기로 확장하지만, 메모리 처리 방식이 다릅니다.

expand(): 메모리 공유 확장

expand() 는 크기 1인 차원만 확장할 수 있으며, 새 메모리를 할당하지 않습니다. 확장된 값은 원본 데이터를 가리킵니다.

import torch

t = torch.tensor([[1.0],
                  [2.0],
                  [3.0]])   # shape: (3, 1)

# (3, 1) → (3, 4): dim 1을 4로 확장
expanded = t.expand(3, 4)
print(expanded.shape)   # torch.Size([3, 4])
print(expanded)
# tensor([[1., 1., 1., 1.],
#         [2., 2., 2., 2.],
#         [3., 3., 3., 3.]])

# -1을 쓰면 해당 차원은 그대로 유지
expanded2 = t.expand(-1, 5)   # (3, 1) → (3, 5)
print(expanded2.shape)         # torch.Size([3, 5])

# 메모리 공유 확인 — 스토리지 크기가 원본과 동일
print(t.storage().size())       # 3
print(expanded.storage().size()) # 3  — 새 메모리 없음

repeat(): 실제 데이터 복사

repeat() 는 텐서를 지정한 횟수만큼 실제로 복사 해 새 텐서를 만듭니다. 크기 1 제약이 없습니다.

t = torch.tensor([1, 2, 3])   # shape: (3,)

# 2번 반복
print(t.repeat(2))             # tensor([1, 2, 3, 1, 2, 3])   shape: (6,)

# 2D 반복: (행 반복, 열 반복)
print(t.repeat(2, 3))
# tensor([[1, 2, 3, 1, 2, 3, 1, 2, 3],
#         [1, 2, 3, 1, 2, 3, 1, 2, 3]])
# shape: (2, 9)

expand vs repeat 선택 기준

상황	권장
크기 1 차원을 확장, 메모리 절약 중요	`expand()`
브로드캐스팅 전 명시적 확장	`expand()`
크기 1이 아닌 차원도 반복 필요	`repeat()`
실제 복사본이 필요한 경우	`repeat()`

# 브로드캐스팅 전처리 패턴
bias = torch.tensor([0.1, 0.2, 0.3])   # shape: (3,)
batch = torch.randn(8, 3)              # shape: (8, 3)

# expand로 메모리 효율적 확장
bias_expanded = bias.unsqueeze(0).expand(8, -1)   # shape: (8, 3)
result = batch + bias_expanded

# 또는 브로드캐스팅으로 더 간단하게
result = batch + bias   # 자동 브로드캐스팅

텐서 패딩: F.pad()

torch.nn.functional.pad() 는 텐서의 가장자리에 값을 채웁니다. NLP의 시퀀스 패딩, 이미지의 테두리 추가 등에 활용합니다.

패딩은 마지막 차원부터 지정하며, (왼쪽, 오른쪽, 위, 아래, ...) 순으로 쌍으로 입력합니다.

import torch.nn.functional as F

t = torch.tensor([[1.0, 2.0, 3.0],
                  [4.0, 5.0, 6.0]])   # shape: (2, 3)

# 마지막 차원(열)에 왼쪽 1, 오른쪽 2 패딩
padded = F.pad(t, pad=(1, 2))
print(padded.shape)   # torch.Size([2, 6])
print(padded)
# tensor([[0., 1., 2., 3., 0., 0.],
#         [0., 4., 5., 6., 0., 0.]])

# 열에 (1,1), 행에 (2,0) 패딩
padded2 = F.pad(t, pad=(1, 1, 2, 0))
print(padded2.shape)   # torch.Size([4, 5])
print(padded2)
# tensor([[0., 0., 0., 0., 0.],
#         [0., 0., 0., 0., 0.],
#         [0., 1., 2., 3., 0.],
#         [0., 4., 5., 6., 0.]])

패딩 모드

모드	설명	사용처
`'constant'` (기본값)	상수값으로 채움 (기본 0)	일반 패딩
`'reflect'`	가장자리를 거울 반사	이미지 처리
`'replicate'`	가장자리 값 반복	이미지 처리
`'circular'`	순환 패딩	주기적 데이터

t = torch.tensor([[1.0, 2.0, 3.0]])   # shape: (1, 3)

print(F.pad(t, (1, 1), mode='constant', value=0))
# tensor([[0., 1., 2., 3., 0.]])

print(F.pad(t, (1, 1), mode='reflect'))
# tensor([[2., 1., 2., 3., 2.]])

print(F.pad(t, (1, 1), mode='replicate'))
# tensor([[1., 1., 2., 3., 3.]])

실전 변환 레시피 모음

레시피 1: CNN 출력을 FC 레이어 입력으로

# CNN 출력: (batch, channels, H, W)
cnn_out = torch.randn(32, 64, 7, 7)

# 방법 A: reshape
fc_input = cnn_out.reshape(32, -1)        # (32, 3136)

# 방법 B: flatten (더 명확)
fc_input = cnn_out.flatten(start_dim=1)   # (32, 3136)

레시피 2: 시퀀스 배치 패딩

# 길이가 다른 시퀀스를 동일 길이로 패딩
sequences = [torch.randn(5, 64), torch.randn(3, 64), torch.randn(7, 64)]
max_len = max(s.shape[0] for s in sequences)

padded = [F.pad(s, (0, 0, 0, max_len - s.shape[0])) for s in sequences]
batch = torch.stack(padded, dim=0)
print(batch.shape)   # torch.Size([3, 7, 64])

레시피 3: 어텐션 마스크 생성

# 배치별 실제 시퀀스 길이
lengths = torch.tensor([5, 3, 7])
max_len = 7

# (batch, max_len) 크기의 마스크 생성
positions = torch.arange(max_len).unsqueeze(0)   # (1, 7)
mask = positions < lengths.unsqueeze(1)           # (3, 7)
print(mask)
# tensor([[ True,  True,  True,  True,  True, False, False],
#         [ True,  True,  True, False, False, False, False],
#         [ True,  True,  True,  True,  True,  True,  True]])

레시피 4: 다중 헤드 어텐션 shape 변환

batch, seq_len, d_model = 2, 10, 64
n_heads = 8
head_dim = d_model // n_heads   # 8

# (batch, seq, d_model) → (batch, n_heads, seq, head_dim)
x = torch.randn(batch, seq_len, d_model)
x = x.reshape(batch, seq_len, n_heads, head_dim)
x = x.permute(0, 2, 1, 3)
print(x.shape)   # torch.Size([2, 8, 10, 8])

형태 변환 디버깅 팁

팁 1: 매 단계 shape 출력

x = torch.randn(32, 3, 28, 28)
print(f"입력: {x.shape}")

x = x.flatten(1)
print(f"flatten 후: {x.shape}")

x = x.unsqueeze(1)
print(f"unsqueeze 후: {x.shape}")

팁 2: 원소 수 검증

t = torch.randn(2, 3, 4)

# 형태 변환 전후 원소 수는 항상 동일
print(t.numel())              # 24
print(t.reshape(6, 4).numel()) # 24
print(t.reshape(24).numel())   # 24

팁 3: 흔한 shape 오류 패턴

# 오류 1: 원소 수 불일치
t = torch.randn(2, 3)
# t.reshape(4, 2)   # RuntimeError: shape '[4, 2]' is invalid for input of size 6

# 오류 2: non-contiguous에 view 적용
t = torch.randn(3, 4).T
# t.view(12)   # RuntimeError: non-contiguous
t.reshape(12)  # 정상 동작

# 오류 3: expand에 크기 1이 아닌 차원 확장
t = torch.randn(3, 2)
# t.expand(3, 5)   # RuntimeError: 크기 2인 차원을 5로 expand 불가

팁 4: 디버깅용 헬퍼

def shape_info(t, name="tensor"):
    print(f"{name}: shape={t.shape}, dtype={t.dtype}, "
          f"contiguous={t.is_contiguous()}, numel={t.numel()}")

x = torch.randn(3, 4).T
shape_info(x, "전치 후")
# 전치 후: shape=torch.Size([4, 3]), dtype=torch.float32, contiguous=False, numel=12

섹션 퀴즈

퀴즈를 불러오는 중...