feat: distill dockerfile in pod

jinsoo · jinsoo · commit 51c73f3d3d92 · 2026-02-22T23:56:20.000+09:00
diff --git a/.env.example b/.env.example
@@ -1,5 +1,6 @@
 OPENAI_API_KEY=""
 HF_API=""
+GEMINI_API_KEY=""
 # OpenAI 429 시 vLLM(Qwen2.5-7B 등) 폴백. true + vllm 설치 시 429 발생하면 vLLM으로 재시도. 자세한 내용: etc_md/OPENAI_RATE_LIMIT_VLLM_FALLBACK.md
 # ENABLE_VLLM_FALLBACK_ON_RATE_LIMIT=false
 # vLLM 사용 시(429 폴백·기동 시 1차) RunPod Serverless GPU 사용. true면 요청 시 GPU 기동·유휴 시 스케일다운(비용 절감). 대상: RunPod. RUNPOD_API_KEY 필요.
diff --git a/Dockerfile.labeling-llm b/Dockerfile.labeling-llm
@@ -1,38 +1,29 @@
-# 라벨링 전용 이미지 (label_for_distill.py)
-# CPU 환경. LLM은 OpenAI API 또는 RunPod vLLM(VLLM_POD_BASE_URL) HTTP 호출.
+# RunPod Pod 전용 vLLM 이미지 (네트워크 볼륨 /workspace, OpenAI 호환 API)
+# - 이미지 내 /opt/llm-models 에 Qwen/Qwen2.5-7B-Instruct 포함.
+# - 엔트리포인트: /workspace에 모델 있으면 바로 vLLM / 없으면 이미지→/workspace 복사 후 vLLM / 복사 실패 시 다운로드(재시도).
 #
-# 빌드: docker build -f Dockerfile.labeling-llm -t tasteam-labeling-llm .
-# 실행: docker run -v $(pwd)/distill_pipeline_output:/app/distill_pipeline_output \
-#   -v $(pwd)/tasteam_app_all_review_data.json:/app/tasteam_app_all_review_data.json \
-#   --env-file .env tasteam-labeling-llm
+# 빌드: docker build -f Dockerfile.runpod-pod-vllm -t tasteam-runpod-pod-vllm:latest .
+# (빌드 시 HuggingFace에서 모델 다운로드하므로 시간·용량 큼. HF_TOKEN 있으면 private 허용)
 #
-# 환경변수: OPENAI_API_KEY, VLLM_POD_BASE_URL(teacher), LLM_PROVIDER 등
-
-FROM python:3.11-slim
-
-ENV PYTHONUNBUFFERED=1
-ENV PYTHONDONTWRITEBYTECODE=1
-
-WORKDIR /app
-
-# PyTorch CPU (LLMUtils import용, 실제 추론은 HTTP)
-RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
+# 실행 예 (로컬 테스트):
+#   docker run --gpus all -p 8000:8000 --ipc=host \
+#     -e HF_TOKEN=$HF_TOKEN -v /path/to/volume:/workspace \
+#     tasteam-runpod-pod-vllm:latest
+#
+FROM vllm/vllm-openai:v0.11.0
 
-COPY requirements.labeling-llm.txt /app/
-RUN pip install --no-cache-dir -r requirements.labeling-llm.txt
+ENV MODEL_NAME=/workspace/llm-models/Qwen/Qwen2.5-7B-Instruct
+ENV HF_MODEL_ID=Qwen/Qwen2.5-7B-Instruct
+# 이미지 내 모델 경로 (빌드 시 채워짐. 기동 시 /workspace 없으면 여기서 복사)
+ENV MODEL_IMAGE_PATH=/opt/llm-models/Qwen/Qwen2.5-7B-Instruct
 
-COPY src /app/src
-COPY scripts/label_for_distill.py scripts/data_augmentation.py /app/scripts/
+RUN pip install -q huggingface_hub && \
+    python3 -c "from huggingface_hub import snapshot_download; snapshot_download('Qwen/Qwen2.5-7B-Instruct', local_dir='/opt/llm-models/Qwen/Qwen2.5-7B-Instruct', local_dir_use_symlinks=False)"
 
-ENV PYTHONPATH=/app
+COPY scripts/runpod_pod_entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
 
-# 실행: --train-path, --output-dir 등 인자 전달 필요
-# 예: docker run ... tasteam-labeling-llm --train-path /app/datasets/xxx/train.json --output-dir /app/labeled/xxx --openai-cap 500
-ENTRYPOINT ["python", "scripts/label_for_distill.py"]
-CMD ["--help"]
+ENTRYPOINT ["/entrypoint.sh"]
+CMD ["--model", "/workspace/llm-models/Qwen/Qwen2.5-7B-Instruct", "--max-model-len", "4096", "--tensor-parallel-size", "1", "--gpu-memory-utilization", "0.90"]
 
-# docker build -f Dockerfile.labeling-llm -t tasteam-labeling-llm .
-# docker run -v $(pwd)/distill_pipeline_output:/app/distill_pipeline_output --env-file .env tasteam-labeling-llm \
-# --train-path /app/distill_pipeline_output/datasets/xxx/train.json \
-# --output-dir /app/distill_pipeline_output/labeled/xxx --openai-cap 500
+EXPOSE 8000
diff --git a/Dockerfile.train-llm b/Dockerfile.train-llm
@@ -1,14 +1,38 @@
 # 학습 전용 이미지 (scripts/train_qlora.py)
 # GPU 필수. QLoRA SFT, wandb.
+# 모델: 빌드 시 /opt에 다운로드, 기동 시 /workspace 없으면 복사 후 train_qlora 실행.
 #
 # 빌드: docker build -f Dockerfile.train-llm -t tasteam-train-llm .
-# 실행: docker run --gpus all \
-#   -v $(pwd)/distill_pipeline_output:/app/distill_pipeline_output \
-#   --env-file .env tasteam-train-llm --labeled-path /app/distill_pipeline_output/labeled/xxx/train_labeled.json --output-dir /app/distill_pipeline_output
 #
-# 환경변수: WANDB_API_KEY, HF_HOME 등
+# 로컬 실행:
+#   docker run --gpus all \
+#     -v $(pwd)/distill_pipeline_output:/app/distill_pipeline_output \
+#     --env-file .env tasteam-train-llm \
+#     --labeled-path /app/distill_pipeline_output/labeled/YYYYMMDD_HHMMSS/train_labeled.json \
+#     --output-dir /app/distill_pipeline_output
+#
+# RunPod Pod (Network Volume, 학습 결과 저장):
+#   -v /path/to/network/volume:/workspace \
+#   -e HF_HOME=/workspace/hf-cache -e HF_HUB_CACHE=/workspace/hf-cache \
+#   tasteam-train-llm \
+#   --labeled-path /workspace/distill_pipeline_output/labeled/YYYYMMDD_HHMMSS/train_labeled.json \
+#   --output-dir /workspace/distill_pipeline_output
+#   (adapter → /workspace/distill_pipeline_output/runs/YYYYMMDD_HHMMSS/adapter)
+#
+# 환경변수: WANDB_API_KEY, HF_HOME(선택), HF_HUB_CACHE(선택)
+
+FROM runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04
 
-FROM pytorch/pytorch:2.1.0-cuda12.1-cudnn8-runtime
+ENV MODEL_NAME=/workspace/llm-models/Qwen/Qwen2.5-0.5B-Instruct
+ENV HF_MODEL_ID=Qwen/Qwen2.5-0.5B-Instruct
+# 이미지 내 모델 경로 (빌드 시 채워짐. 기동 시 /workspace 없으면 여기서 복사)
+ENV MODEL_IMAGE_PATH=/opt/llm-models/Qwen/Qwen2.5-0.5B-Instruct
+
+RUN pip install -q huggingface_hub && \
+    python3 -c "from huggingface_hub import snapshot_download; snapshot_download('Qwen/Qwen2.5-0.5B-Instruct', local_dir='/opt/llm-models/Qwen/Qwen2.5-0.5B-Instruct', local_dir_use_symlinks=False)"
+
+COPY scripts/runpod_pod_entrypoint_05b.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
 
 ENV PYTHONUNBUFFERED=1
 ENV PYTHONDONTWRITEBYTECODE=1
@@ -19,14 +43,11 @@ COPY requirements.train-llm.txt /app/
 RUN pip install --no-cache-dir --upgrade pip && \
     pip install --no-cache-dir -r requirements.train-llm.txt
 
+# train_qlora.py 단독 사용. src/ import 시 COPY 추가 필요.
 COPY scripts/train_qlora.py /app/scripts/
 
 ENV PYTHONPATH=/app
 
-ENTRYPOINT ["python", "scripts/train_qlora.py"]
-CMD ["--help"]
-
-# docker build -f Dockerfile.train-llm -t tasteam-train-llm .
-# docker run --gpus all -v $(pwd)/distill_pipeline_output:/app/distill_pipeline_output --env-file .env tasteam-train-llm \
-# --labeled-path /app/distill_pipeline_output/labeled/xxx/train_labeled.json \
-# --output-dir /app/distill_pipeline_output
+ENTRYPOINT ["/entrypoint.sh"]
+# 기본: /workspace (RunPod 네트워크 볼륨). 로컬 실행 시 --output-dir /app/distill_pipeline_output 오버라이드.
+CMD ["--output-dir", "/workspace/distill_pipeline_output"]
diff --git a/Dockerfile.vllm-pod b/Dockerfile.vllm-pod
diff --git a/requirements.train-llm.txt b/requirements.train-llm.txt
@@ -4,17 +4,17 @@
 # 사용: pip install -r requirements.train-llm.txt
 # Docker: Dockerfile.train-llm (PyTorch CUDA 베이스)
 
-# LLM / 학습
-transformers>=4.30.0
+# LLM / 학습 (버전 고정: transformers/peft/trl 호환성)
+transformers==4.44.2
 accelerate>=0.20.0
-peft>=0.10.0
+peft==0.13.2
 bitsandbytes>=0.43.0
-trl>=0.12.0
+trl==0.12.0
 datasets>=2.18.0
 huggingface_hub>=0.20.0
 
-# 오케스트레이션
-prefect>=2.0.0
+# prefect: WANDB_RUN_ID 전달용, train_qlora는 import 안 함. 선택 사항(이미지 절약 시 제거 가능)
+# prefect>=2.0.0
 
 # 유틸
 python-dotenv>=1.0.0
diff --git a/scripts/runpod_cli/pod_create_delete_cli.py b/scripts/runpod_cli/pod_create_delete_cli.py
@@ -84,7 +84,7 @@ def get_default_pod_payload() -> Dict[str, Any]:
             "dataCenterPriority": "availability",
             "dockerEntrypoint": [],
             "dockerStartCmd": [],
-            "env": {"ENV_VAR": "value"},
+            "env": {"ENV_VAR": "value",**({"WANDB_API_KEY": os.environ["WANDB_API_KEY"]} if os.environ.get("WANDB_API_KEY") else {}),},
             "globalNetworking": False,
             "gpuCount": 1,
             "gpuTypeIds": ["NVIDIA GeForce RTX 4090"],
diff --git a/scripts/runpod_pod_entrypoint_05b.sh b/scripts/runpod_pod_entrypoint_05b.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+# RunPod Pod 학습용: /workspace에 모델 있으면 바로 train_qlora / 없으면 이미지→/workspace 복사 후 train_qlora.
+
+set -e
+
+MODEL_PATH="${MODEL_NAME:-/workspace/llm-models/Qwen/Qwen2.5-0.5B-Instruct}"
+MODEL_IMAGE_PATH="${MODEL_IMAGE_PATH:-/opt/llm-models/Qwen/Qwen2.5-0.5B-Instruct}"
+
+if [[ -f "${MODEL_PATH}/config.json" ]]; then
+  echo "[entrypoint] Model present at ${MODEL_PATH}, starting Qwen2.5-0.5B-Instruct."
+elif [[ -f "${MODEL_IMAGE_PATH}/config.json" ]]; then
+  echo "[entrypoint] Copying model from image ${MODEL_IMAGE_PATH} to ${MODEL_PATH}..."
+  mkdir -p "$(dirname "$MODEL_PATH")"
+  cp -a "${MODEL_IMAGE_PATH}" "$(dirname "$MODEL_PATH")/"
+  echo "[entrypoint] Copy done, starting Qwen2.5-0.5B-Instruct."
+else
+  echo "[entrypoint] ERROR: Model not found at ${MODEL_PATH} nor in image at ${MODEL_IMAGE_PATH}." >&2
+  exit 1
+fi
+
+exec python /app/scripts/train_qlora.py --student-model "${MODEL_PATH}" "$@"
diff --git a/src/config.py b/src/config.py
@@ -74,21 +74,21 @@ class _InferenceConfig:
     ENABLE_OPENAI_FALLBACK: bool = os.getenv("ENABLE_OPENAI_FALLBACK", "false").lower() == "true"
     # 벤더 API 페일오버 (failover_router_strategy.md): OpenAI 429/5xx 시 Gemini 등 다른 벤더로 전환
     GEMINI_API_KEY: Optional[str] = os.getenv("GEMINI_API_KEY")
-    GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")
+    GEMINI_MODEL: str = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
     LLM_FAILOVER_ENABLED: bool = bool(os.getenv("GEMINI_API_KEY", "").strip())
     # [Deprecated] OpenAI 429 시 self-hosted vLLM 폴백 → 현재는 벤더 API(LLMFailoverRouter) 페일오버로 대체됨
     ENABLE_VLLM_FALLBACK_ON_RATE_LIMIT: bool = os.getenv("ENABLE_VLLM_FALLBACK_ON_RATE_LIMIT", "false").lower() == "true"
     # [Deprecated] vLLM 폴백 시 RunPod Serverless 사용 → 벤더 API 페일오버(GEMINI_API_KEY)로 대체됨
     VLLM_USE_RUNPOD_GPU: bool = os.getenv("VLLM_USE_RUNPOD_GPU", "false").lower() == "true"
     # RunPod vLLM 전용 엔드포인트 ID. 미설정 시 RUNPOD_ENDPOINT_ID 사용.
-    RUNPOD_VLLM_ENDPOINT_ID: Optional[str] = (os.getenv("RUNPOD_VLLM_ENDPOINT_ID", "2mpd5y6lvccfk1") or "").strip() or None
+    #RUNPOD_VLLM_ENDPOINT_ID: Optional[str] = (os.getenv("RUNPOD_VLLM_ENDPOINT_ID", "2mpd5y6lvccfk1") or "").strip() or None
     RUNPOD_API_KEY: Optional[str] = os.getenv("RUNPOD_API_KEY")
-    RUNPOD_ENDPOINT_ID: str = (os.getenv("RUNPOD_ENDPOINT_ID", "2mpd5y6lvccfk1") or "").strip() or "2mpd5y6lvccfk1"
+    #RUNPOD_ENDPOINT_ID: str = (os.getenv("RUNPOD_ENDPOINT_ID", "2mpd5y6lvccfk1") or "").strip() or "2mpd5y6lvccfk1"
     USE_RUNPOD: bool = os.getenv("USE_RUNPOD", "true").lower() == "true"
     RUNPOD_POLL_INTERVAL: int = int(os.getenv("RUNPOD_POLL_INTERVAL", "2"))
     RUNPOD_MAX_WAIT_TIME: int = int(os.getenv("RUNPOD_MAX_WAIT_TIME", "300"))
     # RunPod Serverless vLLM 엔드포인트 사용 (앱 내 인프로세스 vLLM 제거됨)
-    USE_POD_VLLM: bool = os.getenv("USE_POD_VLLM", "true").lower() == "true"
+    #USE_POD_VLLM: bool = os.getenv("USE_POD_VLLM", "true").lower() == "true"
     # RunPod Pod 직접 URL (vLLM OpenAI 호환 /v1). 설정 시 Serverless 대신 이 URL로 추론. 기본값: 213.173.108.29:16366 (test_all_task 연동)
     VLLM_POD_BASE_URL: Optional[str] = (os.getenv("VLLM_POD_BASE_URL", "http://213.173.108.70:17517/v1") or "").strip() or None