HateSlop · rmdnps10 · Sep 19, 2025 · Sep 19, 2025 · Sep 19, 2025 · Sep 19, 2025
diff --git a/rmdnps10/static-crawling-books.csv b/rmdnps10/static-crawling-books.csv
@@ -0,0 +1,9 @@
+Title,Author,Price
+2026 해커스경찰 갓대환 형사법 기출총정리 세트 (경찰공무원) - 전3권,김대환 지음 | 해커스경찰,"85,500"
+2026 써니 행정법총론 기출문제집 - 전2권,박준철 지음 | 제이씨에듀,"41,400"
+자료조직개론,신인수 지음 | 스페라플러스(speraplus),"31,500"
+2026 문동균 한국사 기출은 문동균,문동균 지음 | 에스티유니타스,"27,000"
+2026 김민철 경찰학 핵심요약집,김민철 지음 | 미래가치,"25,200"
+2026 선재국어 예상 기출서,이선재 지음 | 수비니겨,"18,900"
+2026 김민철 경찰학 기출 1000제,김민철 지음 | 미래가치,"34,200"
+2026 황철곤 행정학 패스프레소 (개념 압축 노트),황철곤 지음 | 사피엔스넷,"18,900"
diff --git a/rmdnps10/static-crawling_assignment.py b/rmdnps10/static-crawling_assignment.py
@@ -0,0 +1,27 @@
+from bs4 import BeautifulSoup
+import requests
+import csv
+
+url = 'https://www.aladin.co.kr/shop/wbrowse.aspx?CID=34582'
+reponse = requests.get(url)
+html = reponse.text
+soup = BeautifulSoup(html, 'html.parser')
+
+books_container = soup.select_one('.b-bestseller .BrowseBestSeller') 
+books = books_container.select('li')
+book_data = []
+
+for book in books:
+    title = book.select_one('h4>a').get_text(strip=True)
+    author = book.select_one('.b-author').get_text(strip=True)
+    price = book.select_one('.b-price>strong').get_text(strip=True)
+    book_data.append([title, author, price])
+
+with open('books.csv', 'w', newline='', encoding='utf-8') as f:
+    writer = csv.writer(f)
+    writer.writerow(['Title', 'Author', 'Price'])
+    writer.writerows(book_data)
+
+
+
+
diff --git a/rmdnps10/yanolja_crawling.py b/rmdnps10/yanolja_crawling.py
@@ -0,0 +1,111 @@
+from selenium import webdriver 
+from selenium.webdriver.common.by import By
+import time
+from bs4 import BeautifulSoup
+import pandas as pd
+from collections import Counter
+import re
+
+# Selenium으로 페이지 로드
+driver = webdriver.Chrome()
+url = 'https://www.yanolja.com/reviews/domestic/10041505'
+driver.get(url)
+
+# 페이지 로딩을 위해 대기
+time.sleep(3)
+
+# 스크롤 설정: 페이지 하단까지 스크롤을 내리기
+scroll_count = 10
+
+for _ in range(scroll_count):
+    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
+    time.sleep(1)
+
+page_source = driver.page_source
+soup = BeautifulSoup(page_source, 'html.parser')
+
+reviews_class = soup.find_all(class_="review-item-container")
+reviews = []
+
+for review in reviews_class:
+    cleaned_text = review.get_text(strip=True).replace('\r', '').replace('\n', '')
+    reviews.append(cleaned_text)
+
+ratings = []
+
+# 각 리뷰 컨테이너에서 별점 추출
+for review_container in reviews_class:
+    star_container = review_container.find(class_="css-rz7kwu")
+
+    if star_container:
+        # SVG 별들 찾기
+        stars = star_container.find_all("svg")
+
+        # 채워진 별과 빈 별 구분
+        filled_stars = 0
+        for star in stars:
+            # path 태그의 fill-rule 속성으로 빈 별 구분
+            path = star.find("path")
+            if path and path.get("fill-rule") == "evenodd":
+                # 빈 별 (fill-rule="evenodd"가 있는 경우)
+                filled_stars += 0
+            else:
+                # 채워진 별
+                filled_stars += 1
+
+        ratings.append(filled_stars)
+    else:
+        # 별점을 찾을 수 없는 경우 0점 처리
+        ratings.append(0)
+
+# 별점과 리뷰 개수가 맞지 않을 경우 조정
+min_length = min(len(ratings), len(reviews))
+ratings = ratings[:min_length]
+reviews = reviews[:min_length]
+
+# 데이터 정리 및 DataFrame으로 변환
+data = list(zip(ratings, reviews))
+df_reviews = pd.DataFrame(data, columns=['Rating', 'Review'])
+
+# 평균 별점 계산
+if ratings:
+    average_rating = sum(ratings) / len(ratings)
+else:
+    average_rating = 0
+
+# 자주 등장하는 단어 추출
+korean_stopwords = set(['이', '그', '저', '것', '들', '다', '을', '를', '에', '의', '가', '이', '는', '해', '한', '하', '하고', '에서', '에게', '과', '와', '너무', '잘', '또','좀', '호텔', '아주', '진짜', '정말'])
+
+# 모든 리뷰를 하나의 문자열로 결합
+all_reviews_text = ' '.join(reviews)
+
+# 단어 추출 (특수문자 제거)
+words = re.findall(r'[가-힣]+', all_reviews_text)
+
+# 불용어 제거
+filtered_words = [word for word in words if word not in korean_stopwords and len(word) > 1]
+
+# 단어 빈도 계산
+word_counts = Counter(filtered_words)
+
+# 자주 등장하는 상위 15개 단어 추출
+common_words = word_counts.most_common(15)
+
+# 분석 결과 요약
+summary_df = pd.DataFrame({
+    'Average Rating': [average_rating],
+    'Common Words': [', '.join([f"{word}({count})" for word, count in common_words])]
+})
+
+print(f"총 {len(reviews)}개의 리뷰가 수집되었습니다.")
+print(f"평균 별점: {average_rating:.2f}")
+print(f"자주 등장하는 단어 상위 10개:")
+for word, count in common_words[:10]:
+    print(f"  {word}: {count}번")
+
+df_reviews.to_excel('yanolja_reviews.xlsx', index=False)
+print("\n리뷰 데이터가 'yanolja_reviews.xlsx' 파일로 저장되었습니다.")
+
+# 드라이버 종료
+driver.quit()
+
diff --git a/rmdnps10/yanolja_reviews.xlsx b/rmdnps10/yanolja_reviews.xlsx
diff --git a/static-crawling/jisigin.ipynb b/static-crawling/jisigin.ipynb
@@ -23,7 +23,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -55,10 +55,10 @@
     "\n",
     "# 네이버 지식인 삼성전자 검색 페이지 URL\n",
     "url = \"https://kin.naver.com/search/list.naver?query=%EC%82%BC%EC%84%B1%EC%A0%84%EC%9E%90\"\n",
-    "response =   # 요청 보내기\n",
-    "html =   # 응답 받은 HTML 문서\n",
-    "soup =   # BeautifulSoup으로 파싱\n",
-    "soup"
+    "response = requests.get(url)\n",
+    "html = response.text  # 응답 받은 HTML 문서\n",
+    "soup = BeautifulSoup(html, 'html.parser')\n",
+    "soup\n"
    ]
   },
   {
@@ -79,8 +79,8 @@
    "outputs": [],
    "source": [
     "# 첫 번째 질문 요소 선택\n",
-    "tree = \n",
-    "tree  # 첫 번째 질문의 HTML 구조를 출력하여 확인"
+    "tree = soup.select_one(\".basic1 > li > dl\")   \n",
+    "tree"
    ]
   },
   {
@@ -102,10 +102,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# 제목과 링크 추출\n",
-    "title_tag = \n",
-    "title = \n",
-    "link = \n",
+    "\n",
+    "# # 제목과 링크 추출\n",
+    "title = tree.select_one(\"._nclicks\\:kin\\.txt._searchListTitleAnchor\").text\n",
+    "link = tree.select_one(\"._nclicks\\:kin\\.txt._searchListTitleAnchor\").attrs['href']\n",
     "print(title, link)\n"
    ]
   },
@@ -116,8 +116,9 @@
    "outputs": [],
    "source": [
     "# 날짜 추출\n",
-    "date_tag = \n",
-    "date = \n",
+    "\n",
+    "date_tag = tree.select_one (\".txt_inline\")\n",
+    "date = date_tag.text\n",
     "print(date)"
    ]
   },
@@ -128,29 +129,22 @@
    "outputs": [],
    "source": [
     "# 카테고리 추출\n",
-    "category_tag = \n",
-    "category = \n",
-    "print(category)"
+    "category_tag = tree.select_one(\".txt_g1 _nclicks\\:kin\\.cat2\")\n",
+    "print (category_tag)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "4\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# 조회수 추출\n",
-    "hit_tag = \n",
-    "texts = \n",
-    "# hit = \n",
+    "hit_tag = tree.select_one(\".hit\")\n",
+    "print(hit_tag)\n",
+    "\n",
+    "texts = hit_tag.text\n",
+    "hit = texts.split(\" \")[1]\n",
     "print(hit)"
    ]
   },
@@ -218,9 +212,16 @@
     "\n",
     "# DataFrame으로 변환\n",
     "import pandas as pd\n",
-    "df = "
+    "df = pd.DataFrame (data, columns =['제목', '링크', '날짜', '카테고리', '히트'])"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -237,13 +238,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# pandas를 사용해 엑셀로 저장"
+    "df.to_excel(\"jisigin.xlsx\", index =False)"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": ".venv",
+   "display_name": "hateslop",
    "language": "python",
    "name": "python3"
   },
@@ -257,7 +258,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.4"
+   "version": "3.9.23"
   }
  },
  "nbformat": 4,