diff --git a/dhleesep9/transformer.py b/dhleesep9/transformer.py
new file mode 100644
index 0000000..4407d73
--- /dev/null
+++ b/dhleesep9/transformer.py
@@ -0,0 +1,113 @@
+import datasets
+from datasets import load_dataset, DatasetDict
+from transformers import pipeline
+import torch
+import pandas as pd # 데이터 확인용
+
+# GPU 사용 가능 여부 확인 및 설정 (Colab에서는 보통 GPU 사용 가능)
+device = 0 if torch.cuda.is_available() else -1
+print(f"사용 가능한 디바이스: {'GPU' if device == 0 else 'CPU'}")
+
+
+full_dataset = load_dataset("yelp_polarity", split="train")
+full_dataset = full_dataset.remove_columns(["label"])
+
+# 실습을 위해 데이터 일부만 선택 (예: 앞 10개)
+num_samples_to_use = 20
+full_dataset_subset = full_dataset.select(range(num_samples_to_use))
+
+print("로드된 데이터셋 정보:")
+print(full_dataset_subset)
+
+print("\n첫 번째 데이터 예시 (text 확인):")
+print(full_dataset_subset[0]['text'])
+
+# 데이터셋 확인을 위해 Pandas DataFrame으로 변환 (선택 사항)
+df_check = pd.DataFrame(full_dataset_subset)
+print("\n데이터셋 일부 미리보기 (DataFrame):")
+display(df_check.head(3)) # Colab 환경에서는 display()가 표 형태로 보여줍니다
+
+
+translator = pipeline(
+    task="translation",
+    model="facebook/nllb-200-distilled-600M",
+    device=device
+)
+
+print("번역 파이프라인 로드 완료.")
+
+
+# 번역을 수행하는 함수 정의
+def translate_english_to_korean(example):
+  """데이터셋의 'summary'를 받아 영어 번역 결과를 반환하는 함수"""
+  translation_result = translator(
+        example['text'],
+        tgt_lang="kor_Hang",
+        src_lang="eng_Latn",
+        max_length=400,
+        min_length=30,
+        do_sample=False  # deterministic 출력을 원하면 False, 다양성을 원하면 True
+    )
+    # 결과에서 번역 텍스트 추출
+  example['korean_translate'] = translation_result[0]['translation_text']
+  return example
+
+print("번역 작업을 시작합니다... (모델 크기와 데이터 양에 따라 시간이 많이 소요될 수 있습니다)")
+translated_dataset = full_dataset_subset.map(translate_english_to_korean)
+print("번역 작업 완료.")
+
+print("\n번역이 추가된 데이터셋 정보:")
+print(translated_dataset)
+print(translated_dataset[0]['text'])
+print(translated_dataset[0]['korean_translate'])
+
+# 데이터셋 확인 (Pandas)
+df_check_translation = pd.DataFrame(translated_dataset)
+print("\n번역 추가 후 데이터셋 미리보기 (DataFrame):")
+display(df_check_translation.head(3))
+
+
+emotion_classifier = pipeline(
+    task="text-classification",
+    model="nlptown/bert-base-multilingual-uncased-sentiment",
+    top_k=1,
+    device=device
+)
+print("감정 분석 파이프라인 로드 완료.")
+
+# 감정 분석 테스트 (선택 사항)
+test_emotion = emotion_classifier("좋았다")
+print(f"감정 분석 테스트: {test_emotion[0][0]}")
+
+
+# 감정 분석을 수행하는 함수 정의
+def analyze_emotion(example):
+  emotion_result = emotion_classifier(example['korean_translate'])
+  example['emotion'] = emotion_result[0][0]['label']
+  return example
+
+print("감정 분석 작업을 시작합니다...")
+final_dataset = translated_dataset.map(analyze_emotion)
+print("감정 분석 작업 완료.")
+
+print("\n최종 데이터셋 정보:")
+print(final_dataset)
+
+print(final_dataset[0]['korean_translate'])
+print("\n--- 분석된 감정 (Emotion) ---")
+print(final_dataset[0]['emotion'])
+
+# 최종 데이터셋 확인 (Pandas)
+df_final = pd.DataFrame(final_dataset)
+print("\n최종 데이터셋 미리보기 (DataFrame):")
+display(df_final) # 전체 선택된 샘플 표시
+
+
+# 최종 결과 확인 (첫 5개 샘플)
+for i in range(min(5, len(final_dataset))):
+  print(f"\n--- 샘플 {i+1} ---")
+  print(f"원문 일부: {final_dataset[i]['text'][:100]}...")
+  print(f"한국어 번역: {final_dataset[i]['korean_translate']}")
+  print(f"예측 평점: {final_dataset[i]['emotion']}")
+
+
diff --git a/transformer.ipynb b/transformer.ipynb
index 1ac0625..306504d 100644
--- a/transformer.ipynb
+++ b/transformer.ipynb
@@ -1,323 +1 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "A3hT1XeLfGyo"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 1. 환경 설정 및 라이브러리 설치 (Cell 1)\n",
-    "\"\"\"\n",
-    "이 셀에서는 실습에 필요한 라이브러리들을 설치하고 임포트합니다.\n",
-    "- transformers: Hugging Face 모델 및 파이프라인 사용을 위한 라이브러리\n",
-    "- datasets: KLUE 데이터셋 등 Hugging Face Hub의 데이터셋 로드를 위한 라이브러리\n",
-    "- sentencepiece: KoBART 등 일부 모델에서 사용하는 토크나이저 라이브러리\n",
-    "- accelerate: 모델 로딩 및 분산 처리를 도와주는 라이브러리 (특히 NLLB 모델에 유용)\n",
-    "- torch: PyTorch 라이브러리 (기본 백엔드)\n",
-    "\"\"\"\n",
-    "# TODO: 필요한 라이브러리를 설치하는 명령어를 작성하세요. (transformers, datasets, sentencepiece, accelerate, torch)\n",
-    "# !pip install ...\n",
-    "\n",
-    "import datasets\n",
-    "from datasets import load_dataset, DatasetDict\n",
-    "from transformers import pipeline\n",
-    "import torch\n",
-    "import pandas as pd # 데이터 확인용\n",
-    "\n",
-    "# GPU 사용 가능 여부 확인 및 설정 (Colab에서는 보통 GPU 사용 가능)\n",
-    "device = 0 if torch.cuda.is_available() else -1\n",
-    "print(f\"사용 가능한 디바이스: {'GPU' if device == 0 else 'CPU'}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "EfLCkC9DfMnA"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 2. 데이터셋 로드 및 준비 (Cell 2)\n",
-    "\"\"\"\n",
-    "이 셀에서는 KLUE 데이터셋의 MRC 부분을 로드합니다.\n",
-    "전체 데이터셋은 클 수 있으므로, 실습을 위해 일부 데이터만 선택하여 사용합니다.\n",
-    "'context' 컬럼이 우리가 요약할 원본 기사 내용입니다.\n",
-    "\"\"\"\n",
-    "# TODO: KLUE MRC 데이터셋의 'train' split을 로드하세요. (변수명: full_dataset)\n",
-    "# full_dataset = load_dataset(...)\n",
-    "\n",
-    "# 실습을 위해 데이터 일부만 선택 (예: 앞 10개)\n",
-    "num_samples_to_use = 10\n",
-    "# TODO: full_dataset에서 앞 'num_samples_to_use' 개의 샘플만 선택하여 klue_mrc_subset 변수에 저장하세요.\n",
-    "# klue_mrc_subset = full_dataset.select(...)\n",
-    "\n",
-    "print(\"로드된 데이터셋 정보:\")\n",
-    "print(klue_mrc_subset)\n",
-    "\n",
-    "print(\"\\n첫 번째 데이터 예시 (context 확인):\")\n",
-    "print(klue_mrc_subset[0]['context'])\n",
-    "\n",
-    "# 데이터셋 확인을 위해 Pandas DataFrame으로 변환 (선택 사항)\n",
-    "df_check = pd.DataFrame(klue_mrc_subset)\n",
-    "print(\"\\n데이터셋 일부 미리보기 (DataFrame):\")\n",
-    "display(df_check.head(3)) # Colab 환경에서는 display()가 표 형태로 보여줍니다."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "wmT6YQgJfO7T"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 3. 요약 모델 파이프라인 로드 (Cell 3)\n",
-    "\"\"\"\n",
-    "이 셀에서는 기사 요약을 위한 KoBART 기반 모델 파이프라인을 로드합니다.\n",
-    "모델: gogamza/kobart-summarization\n",
-    "파이프라인 타입: summarization\n",
-    "\"\"\"\n",
-    "# TODO: 'summarization' 파이프라인을 로드하고, 사용할 모델은 'gogamza/kobart-summarization'로 지정하세요.\n",
-    "# GPU 사용 설정(device=device)도 추가하세요. (변수명: summarizer)\n",
-    "# summarizer = pipeline(...)\n",
-    "\n",
-    "print(\"요약 파이프라인 로드 완료.\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "I3LB2qL6fQom"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 4. 기사 내용 요약 및 데이터셋에 추가 (Cell 4)\n",
-    "\"\"\"\n",
-    "이 셀에서는 로드된 요약 파이프라인을 사용하여 데이터셋의 'context' 내용을 요약합니다.\n",
-    "map 함수를 사용하여 데이터셋의 각 샘플에 요약 함수를 적용하고,\n",
-    "결과를 'summary'라는 새로운 컬럼에 저장합니다.\n",
-    "\"\"\"\n",
-    "# 요약을 수행하는 함수 정의\n",
-    "def summarize_context(example):\n",
-    "  \"\"\"데이터셋의 'context'를 받아 요약 결과를 반환하는 함수\"\"\"\n",
-    "  # TODO: summarizer 파이프라인을 사용하여 example['context']를 요약하세요.\n",
-    "  # 요약 최대 길이는 150, 최소 길이는 30으로 설정하세요.\n",
-    "  # summary_result = summarizer(...)\n",
-    "  # 파이프라인 결과에서 실제 요약 텍스트를 추출하여 example 딕셔너리의 'summary' 키 값으로 저장하세요.\n",
-    "  # example['summary'] = ...\n",
-    "  return example\n",
-    "\n",
-    "# TODO: klue_mrc_subset 데이터셋의 map 함수를 사용하여 위에서 정의한 summarize_context 함수를 적용하세요.\n",
-    "# 결과를 summarized_dataset 변수에 저장하세요.\n",
-    "print(\"요약 작업을 시작합니다... (데이터 양에 따라 시간이 소요될 수 있습니다)\")\n",
-    "# summarized_dataset = klue_mrc_subset.map(...)\n",
-    "print(\"요약 작업 완료.\")\n",
-    "\n",
-    "print(\"\\n요약이 추가된 데이터셋 정보:\")\n",
-    "print(summarized_dataset)\n",
-    "\n",
-    "print(\"\\n첫 번째 데이터의 원문(context)과 요약(summary):\")\n",
-    "print(\"--- 원문 (Context) ---\")\n",
-    "print(summarized_dataset[0]['context'])\n",
-    "print(\"\\n--- 요약 (Summary) ---\")\n",
-    "print(summarized_dataset[0]['summary'])\n",
-    "\n",
-    "# 데이터셋 확인 (Pandas)\n",
-    "df_check_summary = pd.DataFrame(summarized_dataset)\n",
-    "print(\"\\n요약 추가 후 데이터셋 미리보기 (DataFrame):\")\n",
-    "display(df_check_summary.head(3))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "a84zkTptfTf9"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 5. 번역 모델 파이프라인 로드 (Cell 5)\n",
-    "\"\"\"\n",
-    "이 셀에서는 한국어 요약본을 영어로 번역하기 위한 NLLB 모델 파이프라인을 로드합니다.\n",
-    "모델: facebook/nllb-200-distilled-600M\n",
-    "파이프라인 타입: translation\n",
-    "NLLB 모델은 다양한 언어를 지원하며, 언어 코드를 지정해야 합니다.\n",
-    "한국어: kor_Hang, 영어: eng_Latn\n",
-    "\"\"\"\n",
-    "# TODO: 'translation' 파이프라인을 로드하고, 사용할 모델은 'facebook/nllb-200-distilled-600M'로 지정하세요.\n",
-    "# GPU 사용 설정(device=device)도 추가하세요. (변수명: translator)\n",
-    "# translator = pipeline(...)\n",
-    "\n",
-    "print(\"번역 파이프라인 로드 완료.\")\n",
-    "\n",
-    "# 번역 테스트 (선택 사항)\n",
-    "# test_translation = translator(\"안녕하세요?\", src_lang=\"kor_Hang\", tgt_lang=\"eng_Latn\")\n",
-    "# print(f\"번역 테스트: {test_translation}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "0drRaREnfVn2"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 6. 기사 요약본 영어로 번역 및 데이터셋에 추가 (Cell 6)\n",
-    "\"\"\"\n",
-    "이 셀에서는 생성된 'summary' 컬럼의 한국어 텍스트를 영어로 번역합니다.\n",
-    "map 함수를 사용하여 번역 함수를 적용하고,\n",
-    "결과를 'english_summary'라는 새로운 컬럼에 저장합니다.\n",
-    "\"\"\"\n",
-    "# 번역을 수행하는 함수 정의\n",
-    "def translate_summary_to_english(example):\n",
-    "  \"\"\"데이터셋의 'summary'를 받아 영어 번역 결과를 반환하는 함수\"\"\"\n",
-    "  # TODO: translator 파이프라인을 사용하여 example['summary']를 번역하세요.\n",
-    "  # 출발 언어(src_lang)는 'kor_Hang', 목표 언어(tgt_lang)는 'eng_Latn'으로 지정해야 합니다.\n",
-    "  # translation_result = translator(...)\n",
-    "  # 파이프라인 결과에서 실제 번역 텍스트를 추출하여 example 딕셔너리의 'english_summary' 키 값으로 저장하세요.\n",
-    "  # example['english_summary'] = ...\n",
-    "  return example\n",
-    "\n",
-    "# TODO: summarized_dataset 데이터셋의 map 함수를 사용하여 위에서 정의한 translate_summary_to_english 함수를 적용하세요.\n",
-    "# 결과를 translated_dataset 변수에 저장하세요.\n",
-    "print(\"번역 작업을 시작합니다... (모델 크기와 데이터 양에 따라 시간이 많이 소요될 수 있습니다)\")\n",
-    "# translated_dataset = summarized_dataset.map(...)\n",
-    "print(\"번역 작업 완료.\")\n",
-    "\n",
-    "print(\"\\n번역이 추가된 데이터셋 정보:\")\n",
-    "print(translated_dataset)\n",
-    "\n",
-    "print(\"\\n첫 번째 데이터의 한국어 요약(summary)과 영어 번역(english_summary):\")\n",
-    "print(\"--- 한국어 요약 (Summary) ---\")\n",
-    "print(translated_dataset[0]['summary'])\n",
-    "print(\"\\n--- 영어 번역 (English Summary) ---\")\n",
-    "print(translated_dataset[0]['english_summary'])\n",
-    "\n",
-    "# 데이터셋 확인 (Pandas)\n",
-    "df_check_translation = pd.DataFrame(translated_dataset)\n",
-    "print(\"\\n번역 추가 후 데이터셋 미리보기 (DataFrame):\")\n",
-    "display(df_check_translation.head(3))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "VsiLRqSWfYEt"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 7. 감정 분석 모델 파이프라인 로드 (Cell 7)\n",
-    "\"\"\"\n",
-    "이 셀에서는 영어 텍스트의 감정을 분석하기 위한 모델 파이프라인을 로드합니다.\n",
-    "모델: SamLowe/roberta-base-go_emotions\n",
-    "파이프라인 타입: text-classification\n",
-    "이 모델은 다중 레이블 감정(분노, 기쁨, 슬픔 등)을 예측할 수 있습니다.\n",
-    "\"\"\"\n",
-    "# TODO: 'text-classification' 파이프라인을 로드하고, 사용할 모델은 'SamLowe/roberta-base-go_emotions'로 지정하세요.\n",
-    "# GPU 사용 설정(device=device)과 함께, 가장 확률 높은 결과 1개만 받도록 top_k=1 옵션을 추가하세요. (변수명: emotion_classifier)\n",
-    "# emotion_classifier = pipeline(...)\n",
-    "\n",
-    "print(\"감정 분석 파이프라인 로드 완료.\")\n",
-    "\n",
-    "# 감정 분석 테스트 (선택 사항)\n",
-    "# test_emotion = emotion_classifier(\"I am very happy today!\")\n",
-    "# print(f\"감정 분석 테스트: {test_emotion}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "xLE4i46FfaCG"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 8. 영어 번역본 감정 분석 및 데이터셋에 추가 (Cell 8)\n",
-    "\"\"\"\n",
-    "이 셀에서는 번역된 'english_summary' 컬럼의 텍스트에 대해 감정 분석을 수행합니다.\n",
-    "map 함수를 사용하여 감정 분석 함수를 적용하고,\n",
-    "가장 확률이 높은 감정 레이블을 'emotion'이라는 새로운 컬럼에 저장합니다.\n",
-    "\"\"\"\n",
-    "# 감정 분석을 수행하는 함수 정의\n",
-    "def analyze_emotion(example):\n",
-    "  \"\"\"데이터셋의 'english_summary'를 받아 감정 분석 결과를 반환하는 함수\"\"\"\n",
-    "  # TODO: emotion_classifier 파이프라인을 사용하여 example['english_summary']의 감정을 분석하세요.\n",
-    "  # emotion_result = emotion_classifier(...)\n",
-    "  # top_k=1 이므로 결과는 [[{'label': '...', 'score': ...}]] 형태입니다.\n",
-    "  # TODO: 결과에서 가장 확률 높은 감정의 'label' 값만 추출하여 example 딕셔너리의 'emotion' 키 값으로 저장하세요.\n",
-    "  # example['emotion'] = ...\n",
-    "  return example\n",
-    "\n",
-    "# TODO: translated_dataset 데이터셋의 map 함수를 사용하여 위에서 정의한 analyze_emotion 함수를 적용하세요.\n",
-    "# 결과를 final_dataset 변수에 저장하세요.\n",
-    "print(\"감정 분석 작업을 시작합니다...\")\n",
-    "# final_dataset = translated_dataset.map(...)\n",
-    "print(\"감정 분석 작업 완료.\")\n",
-    "\n",
-    "print(\"\\n최종 데이터셋 정보:\")\n",
-    "print(final_dataset)\n",
-    "\n",
-    "print(\"\\n첫 번째 데이터의 영어 번역(english_summary)과 감정(emotion):\")\n",
-    "print(\"--- 영어 번역 (English Summary) ---\")\n",
-    "print(final_dataset[0]['english_summary'])\n",
-    "print(\"\\n--- 분석된 감정 (Emotion) ---\")\n",
-    "print(final_dataset[0]['emotion'])\n",
-    "\n",
-    "# 최종 데이터셋 확인 (Pandas)\n",
-    "df_final = pd.DataFrame(final_dataset)\n",
-    "print(\"\\n최종 데이터셋 미리보기 (DataFrame):\")\n",
-    "display(df_final) # 전체 선택된 샘플 표시"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "NWJksEW6fcmB"
-   },
-   "outputs": [],
-   "source": [
-    "# @title 9. 결과 정리 및 마무리 (Cell 9)\n",
-    "\"\"\"\n",
-    "모든 단계를 거쳐 생성된 최종 데이터셋(final_dataset)에는\n",
-    "원본 KLUE-MRC 데이터에 'summary', 'english_summary', 'emotion' 컬럼이 추가되었습니다.\n",
-    "이 데이터를 CSV 파일로 저장하거나 추가 분석에 활용할 수 있습니다.\n",
-    "\"\"\"\n",
-    "print(\"모든 작업이 완료되었습니다.\")\n",
-    "print(\"최종 데이터셋 컬럼:\", final_dataset.column_names)\n",
-    "\n",
-    "# 최종 결과 확인 (첫 5개 샘플)\n",
-    "for i in range(min(5, len(final_dataset))):\n",
-    "  print(f\"\\n--- 샘플 {i+1} ---\")\n",
-    "  print(f\"원문 일부: {final_dataset[i]['context'][:100]}...\")\n",
-    "  print(f\"요약: {final_dataset[i]['summary']}\")\n",
-    "  print(f\"영어 번역: {final_dataset[i]['english_summary']}\")\n",
-    "  print(f\"감정 분석: {final_dataset[i]['emotion']}\")\n",
-    "\n",
-    "# 필요시 CSV 저장\n",
-    "# df_final.to_csv(\"klue_mrc_processed.csv\", index=False, encoding='utf-8-sig')\n",
-    "# print(\"\\n결과를 CSV 파일로 저장했습니다. (필요시 주석 해제)\")"
-   ]
-  }
- ],
- "metadata": {
-  "colab": {
-   "authorship_tag": "ABX9TyPWRPvZBF36a3ATGwVdOGk0",
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
+{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"id":"A3hT1XeLfGyo"},"outputs":[],"source":["\n","import datasets\n","from datasets import load_dataset, DatasetDict\n","from transformers import pipeline\n","import torch\n","import pandas as pd # 데이터 확인용\n","\n","# GPU 사용 가능 여부 확인 및 설정 (Colab에서는 보통 GPU 사용 가능)\n","device = 0 if torch.cuda.is_available() else -1\n","print(f\"사용 가능한 디바이스: {'GPU' if device == 0 else 'CPU'}\")"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"EfLCkC9DfMnA"},"outputs":[],"source":["\n","full_dataset = load_dataset(\"yelp_polarity\", split=\"train\")\n","full_dataset = full_dataset.remove_columns([\"label\"])\n","\n","# 실습을 위해 데이터 일부만 선택 (예: 앞 10개)\n","num_samples_to_use = 20\n","full_dataset_subset = full_dataset.select(range(num_samples_to_use))\n","\n","print(\"로드된 데이터셋 정보:\")\n","print(full_dataset_subset)\n","\n","print(\"\\n첫 번째 데이터 예시 (text 확인):\")\n","print(full_dataset_subset[0]['text'])\n","\n","# 데이터셋 확인을 위해 Pandas DataFrame으로 변환 (선택 사항)\n","df_check = pd.DataFrame(full_dataset_subset)\n","print(\"\\n데이터셋 일부 미리보기 (DataFrame):\")\n","display(df_check.head(3)) # Colab 환경에서는 display()가 표 형태로 보여줍니다"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"a84zkTptfTf9"},"outputs":[],"source":["\n","translator = pipeline(\n","    task=\"translation\",\n","    model=\"facebook/nllb-200-distilled-600M\",\n","    device=device\n",")\n","\n","print(\"번역 파이프라인 로드 완료.\")"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"0drRaREnfVn2"},"outputs":[],"source":["\n","# 번역을 수행하는 함수 정의\n","def translate_english_to_korean(example):\n","  \"\"\"데이터셋의 'summary'를 받아 영어 번역 결과를 반환하는 함수\"\"\"\n","  translation_result = translator(\n","        example['text'],\n","        tgt_lang=\"kor_Hang\",\n","        src_lang=\"eng_Latn\",\n","        max_length=400,\n","        min_length=30,\n","        do_sample=False  # deterministic 출력을 원하면 False, 다양성을 원하면 True\n","    )\n","    # 결과에서 번역 텍스트 추출\n","  example['korean_translate'] = translation_result[0]['translation_text']\n","  return example\n","\n","print(\"번역 작업을 시작합니다... (모델 크기와 데이터 양에 따라 시간이 많이 소요될 수 있습니다)\")\n","translated_dataset = full_dataset_subset.map(translate_english_to_korean)\n","print(\"번역 작업 완료.\")\n","\n","print(\"\\n번역이 추가된 데이터셋 정보:\")\n","print(translated_dataset)\n","print(translated_dataset[0]['text'])\n","print(translated_dataset[0]['korean_translate'])\n","\n","# 데이터셋 확인 (Pandas)\n","df_check_translation = pd.DataFrame(translated_dataset)\n","print(\"\\n번역 추가 후 데이터셋 미리보기 (DataFrame):\")\n","display(df_check_translation.head(3))"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"VsiLRqSWfYEt"},"outputs":[],"source":["\n","emotion_classifier = pipeline(\n","    task=\"text-classification\",\n","    model=\"nlptown/bert-base-multilingual-uncased-sentiment\",\n","    top_k=1,\n","    device=device\n",")\n","print(\"감정 분석 파이프라인 로드 완료.\")\n","\n","# 감정 분석 테스트 (선택 사항)\n","test_emotion = emotion_classifier(\"좋았다\")\n","print(f\"감정 분석 테스트: {test_emotion[0][0]}\")"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"xLE4i46FfaCG"},"outputs":[],"source":["\n","# 감정 분석을 수행하는 함수 정의\n","def analyze_emotion(example):\n","  emotion_result = emotion_classifier(example['korean_translate'])\n","  example['emotion'] = emotion_result[0][0]['label']\n","  return example\n","\n","print(\"감정 분석 작업을 시작합니다...\")\n","final_dataset = translated_dataset.map(analyze_emotion)\n","print(\"감정 분석 작업 완료.\")\n","\n","print(\"\\n최종 데이터셋 정보:\")\n","print(final_dataset)\n","\n","print(final_dataset[0]['korean_translate'])\n","print(\"\\n--- 분석된 감정 (Emotion) ---\")\n","print(final_dataset[0]['emotion'])\n","\n","# 최종 데이터셋 확인 (Pandas)\n","df_final = pd.DataFrame(final_dataset)\n","print(\"\\n최종 데이터셋 미리보기 (DataFrame):\")\n","display(df_final) # 전체 선택된 샘플 표시"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"NWJksEW6fcmB"},"outputs":[],"source":["\n","# 최종 결과 확인 (첫 5개 샘플)\n","for i in range(min(5, len(final_dataset))):\n","  print(f\"\\n--- 샘플 {i+1} ---\")\n","  print(f\"원문 일부: {final_dataset[i]['text'][:100]}...\")\n","  print(f\"한국어 번역: {final_dataset[i]['korean_translate']}\")\n","  print(f\"예측 평점: {final_dataset[i]['emotion']}\")"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"7iPEwt4yt43v"},"outputs":[],"source":[]}],"metadata":{"accelerator":"GPU","colab":{"gpuType":"T4","provenance":[],"toc_visible":true},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0}
\ No newline at end of file