통합과 테스트

멀티모델 아키텍처: 5가지 역할

단일 모델로 모든 작업을 처리하는 대신, 각 역할에 최적화된 모델을 배치합니다. 이 패턴은 비용을 낮추면서도 품질을 높일 수 있습니다.

역할	용도	권장 모델	특징
normal	일반 작업, 코드 생성	claude-sonnet-4-5	속도와 품질의 균형
thinking	복잡한 추론, 계획 수립	claude-opus-4-5 (extended thinking)	깊은 사고, 높은 비용
self-critique	자체 출력 검토	claude-sonnet-4-5	별도 컨텍스트에서 실행
vision	이미지·스크린샷 분석	claude-sonnet-4-5	멀티모달 입력 처리
fallback	오류 복구, 재시도	claude-haiku-4-5	빠른 응답, 낮은 비용

interface ModelRoles {
  normal: string;
  thinking: string;
  selfCritique: string;
  vision: string;
  fallback: string;
}

class MultiModelClient {
  constructor(
    private clients: Record<keyof ModelRoles, LLMClient>,
  ) {}

  async complete(
    request: CompletionRequest,
    role: keyof ModelRoles = 'normal',
  ): Promise<CompletionResponse> {
    try {
      return await this.clients[role].complete(request);
    } catch (error) {
      if (this.isRetryable(error) && role !== 'fallback') {
        console.warn(`[MultiModel] ${role} 실패, fallback으로 전환:`, error);
        return this.clients.fallback.complete(request);
      }
      throw error;
    }
  }

  private isRetryable(error: unknown): boolean {
    if (error instanceof APIError) {
      return error.status === 529 || error.status === 503; // 과부하, 서비스 불가
    }
    return false;
  }
}

근사적 출력 처리 (Approximate Output Handling)

LLM의 출력은 항상 예상된 형식을 따르지 않습니다. 특히 JSON 파싱, 코드 블록 추출, 구조화된 응답 파싱에서 실패가 잦습니다. 방어적 파싱 이 필수입니다.

function parseJsonResponse(raw: string): unknown {
  // 1차 시도: 직접 파싱
  try {
    return JSON.parse(raw);
  } catch { /* 계속 */ }

  // 2차 시도: 마크다운 코드 블록 제거
  const codeBlockMatch = raw.match(/```(?:json)?\s*([\s\S]*?)```/);
  if (codeBlockMatch) {
    try {
      return JSON.parse(codeBlockMatch[1].trim());
    } catch { /* 계속 */ }
  }

  // 3차 시도: 첫 번째 { } 블록 추출
  const jsonMatch = raw.match(/\{[\s\S]*\}/);
  if (jsonMatch) {
    try {
      return JSON.parse(jsonMatch[0]);
    } catch { /* 계속 */ }
  }

  throw new Error(`JSON 파싱 실패. 원본: ${raw.slice(0, 200)}`);
}

에러 처리 패턴

// 재시도 로직: 지수 백오프
async function withRetry<T>(
  fn: () => Promise<T>,
  maxAttempts = 3,
  baseDelayMs = 1000,
): Promise<T> {
  let lastError: unknown;

  for (let attempt = 1; attempt <= maxAttempts; attempt++) {
    try {
      return await fn();
    } catch (error) {
      lastError = error;

      if (!isRetryableError(error) || attempt === maxAttempts) throw error;

      const delay = baseDelayMs * Math.pow(2, attempt - 1);
      console.warn(`[Retry] 시도 ${attempt}/${maxAttempts} 실패. ${delay}ms 후 재시도`);
      await sleep(delay);
    }
  }

  throw lastError;
}

function isRetryableError(error: unknown): boolean {
  if (error instanceof APIError) {
    return [429, 503, 529].includes(error.status); // Rate limit, 서비스 불가, 과부하
  }
  return error instanceof TypeError; // 네트워크 오류
}

엔드투엔드 통합 테스트

진짜 LLM 호출 없이 전체 에이전트 흐름을 테스트하기 위해 MockLLMClient 를 사용합니다.

import { describe, it, expect, vi } from 'vitest';

class MockLLMClient implements LLMClient {
  private responses: CompletionResponse[];
  private callIndex = 0;

  constructor(responses: CompletionResponse[]) {
    this.responses = responses;
  }

  async complete(_request: CompletionRequest): Promise<CompletionResponse> {
    const response = this.responses[this.callIndex % this.responses.length];
    this.callIndex++;
    return response;
  }
}

describe('MainAgent 통합 테스트', () => {
  it('툴 호출 후 결과를 반영해 최종 답변을 생성한다', async () => {
    const mockLLM = new MockLLMClient([
      // 1번째 응답: 툴 호출 요청
      {
        stopReason: 'tool_use',
        content: '',
        toolCalls: [{ id: 'call_1', name: 'read_file', input: { path: '/test.txt' } }],
      },
      // 2번째 응답: 툴 결과를 반영한 최종 답변
      {
        stopReason: 'end_turn',
        content: '파일 내용을 확인했습니다: Hello World',
        toolCalls: [],
      },
    ]);

    const registry = new ToolRegistry();
    registry.register({
      name: 'read_file',
      description: 'test',
      inputSchema: {},
      execute: async () => 'Hello World',
    });

    const agent = new MainAgent({
      llmClient: mockLLM,
      toolRegistry: registry,
      sessionStore: new InMemorySessionStore(),
      promptComposer: new PromptComposer(),
      approvalManager: new NoOpApprovalManager(),
      subAgentFactory: null as any,
    });

    const result = await agent.run('/test.txt 파일을 읽어주세요');

    expect(result.success).toBe(true);
    expect(result.output).toContain('Hello World');
    expect(result.turns).toBe(2);
  });

  it('maxTurns 초과 시 실패를 반환한다', async () => {
    const infiniteToolCall = new MockLLMClient([
      { stopReason: 'tool_use', content: '', toolCalls: [{ id: 'c', name: 'read_file', input: {} }] },
    ]);
    // ...
  });
});

테스트 체크리스트

에이전트 harness를 배포하기 전 다음 항목을 확인합니다.

범주	확인 항목	방법
기능	툴 호출 → 결과 반영 루프 동작	MockLLMClient 통합 테스트
기능	SubAgentSpec allowedTools 필터링	단위 테스트
기능	세션 자동저장 및 재개	파일 I/O 테스트
기능	설정 우선순위 해석	ConfigResolver 단위 테스트
안정성	maxTurns 초과 처리	경계값 테스트
안정성	LLM API 오류 시 fallback 전환	에러 주입 테스트
안정성	컨텍스트 압축 후 히스토리 일관성	스냅샷 테스트
안전성	승인 거부 시 툴 실행 차단	ApprovalManager 테스트
성능	100턴 실행 후 메모리 사용량	부하 테스트

디버깅 전략

// 디버그 모드: 모든 LLM 요청/응답 로깅
class DebugLLMClient implements LLMClient {
  constructor(private inner: LLMClient, private logger: Logger) {}

  async complete(request: CompletionRequest): Promise<CompletionResponse> {
    this.logger.debug('LLM 요청', {
      messageCount: request.messages.length,
      systemLength: request.system?.length,
      tools: request.tools?.map(t => t.name),
    });

    const start = Date.now();
    const response = await this.inner.complete(request);
    const elapsed = Date.now() - start;

    this.logger.debug('LLM 응답', {
      stopReason: response.stopReason,
      contentLength: response.content?.length,
      toolCalls: response.toolCalls?.map(c => c.name),
      elapsedMs: elapsed,
    });

    return response;
  }
}