Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions crawlers/skincare/crawl_brand_skincare.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,14 @@ def get_brand_product_detail_info(sb, goods_no: str) -> dict:
except Exception as e:
log.warning(f"[get_brand_product_detail_info] ์ด ๋ฆฌ๋ทฐ์ˆ˜ ํŒŒ์‹ฑ ์‹คํŒจ: {e}")
total_review = 0

# ๋ฆฌ๋ทฐํ‰์ 
try:
review_score = soup.select_one("#repReview b")
review_score = float(review_score.text.strip())
log.info(f"[get_brand_product_detail_info] ๋ฆฌ๋ทฐํ‰์ : {review_score}")
except Exception as e:
log.warning(f"[get_brand_product_detail_info] ๋ฆฌ๋ทฐํ‰์  ํŒŒ์‹ฑ ์‹คํŒจ: {e}")
review_score = None
review_score = ""

# ๋ฆฌ๋ทฐ ๋ถ„ํฌ ๊ธฐ๋ณธ๊ฐ’
pctOf5 = pctOf4 = pctOf3 = pctOf2 = pctOf1 = None
Expand Down Expand Up @@ -164,9 +163,10 @@ def get_brand_product_detail_info(sb, goods_no: str) -> dict:
except Exception:
total_comment = ""
log.warning("[get_brand_product_detail_info] ๋Œ€ํ‘œ ์ฝ”๋ฉ˜ํŠธ ์ถ”์ถœ ์‹คํŒจ")

except Exception as e:
log.warning(f"[get_brand_product_detail_info] ๋ฆฌ๋ทฐ ์ •๋ณด ์—†์Œ: {e}")
log.warning(f"[get_brand_product_detail_info] ๋ฆฌ๋ทฐ ์ •๋ณด ์ˆ˜์ง‘ ์‹คํŒจ: {e}")
else:
log.warning("[get_product_detail_info] ๋ฆฌ๋ทฐ ์ •๋ณด ์—†์Œ: ๋ฆฌ๋ทฐ ์ˆ˜๊ฐ€ 0๊ฑด ์ž…๋‹ˆ๋‹ค.")

# === ์ƒ์„ธ์ŠคํŽ™(๊ตฌ๋งค์ •๋ณด) ์ถ”์ถœ ===
# ๊ตฌ๋งค์ •๋ณด ํƒญ ํด๋ฆญ
Expand All @@ -190,7 +190,7 @@ def get_detail_info(soup, title):
dt_text = dt.text.strip()
dd_text = dd.text.strip()
if title in dt_text:
log.info(f"[get_brand_product_detail_info] {title} ์ถ”์ถœ: {dd_text}")
log.info(f"[get_brand_product_detail_info] {title} ์ถ”์ถœ ์„ฑ๊ณต!")
return dd_text
except Exception as e:
log.warning(f"[get_brand_product_detail_info] ์ƒ์„ธ ์ •๋ณด ํŒŒ์‹ฑ ์‹คํŒจ ({title}): {e}")
Expand Down Expand Up @@ -228,7 +228,6 @@ def get_detail_info(soup, title):
for title, key in spec_map.items():
detail_spec[key] = get_detail_info(soup, title)

log.info("[get_brand_product_detail_info] ์ตœ์ข… ๋ฐ์ดํ„ฐ ๋ฆฌํ„ด")
return {
"category": category,
"totalComment": total_comment,
Expand Down
17 changes: 9 additions & 8 deletions crawlers/skincare/crawl_rank_skincare.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from bs4 import BeautifulSoup
from airflow.utils.log.logging_mixin import LoggingMixin


def get_top100_skincare() -> tuple:
log = LoggingMixin().log
log.info("[get_top100_skincare] ์‹œ์ž‘")
Expand All @@ -21,12 +22,12 @@ def get_top100_skincare() -> tuple:
)

driver = webdriver.Chrome(
#service=Service(ChromeDriverManager().install()), options=chrome_options
service=Service("/usr/local/bin/chromedriver"), options=chrome_options
)

# ์˜ฌ๋ฆฌ๋ธŒ์˜ ์Šคํ‚จ์ผ€์–ด ๋žญํ‚น ํŽ˜์ด์ง€ ์—ด๊ธฐ
url = "https://www.oliveyoung.co.kr/store/main/getBestList.do?dispCatNo=900000100100001&fltDispCatNo=10000010001&pageIdx=1&rowsPerPage=8&t_page=%EB%9E%AD%ED%82%B9&t_click=%ED%8C%90%EB%A7%A4%EB%9E%AD%ED%82%B9_%EC%8A%A4%ED%82%A8%EC%BC%80%EC%96%B4"
log.info(f"[get_top100_skincare] URL ์˜คํ”ˆ: {url}")
driver.get(url)

# ํŽ˜์ด์ง€ ๋กœ๋”ฉ ๋Œ€๊ธฐ
Expand Down Expand Up @@ -149,8 +150,8 @@ def get_product_detail_info(sb, goods_no: str) -> dict:
log = LoggingMixin().log
url = f"https://www.oliveyoung.co.kr/store/goods/getGoodsDetail.do?goodsNo={goods_no}"
log.info(f"[get_product_detail_info] ์‹œ์ž‘: goods_no={goods_no}")
sb.uc_open_with_reconnect(url, reconnect_time=5)
log.info(f"[get_product_detail_info] URL ์˜คํ”ˆ: {url}")
#sb.uc_open_with_reconnect(url, reconnect_time=5) # ์†๋„ ๋” ๋А๋ฆผ
sb.open(url)
time.sleep(1)
html = sb.driver.page_source
soup = BeautifulSoup(html, 'html.parser')
Expand All @@ -175,6 +176,7 @@ def get_product_detail_info(sb, goods_no: str) -> dict:
pctOf5 = pctOf4 = pctOf3 = pctOf2 = pctOf1 = None

# ๋ฆฌ๋ทฐ๊ฐ€ 1๊ฑด ์ด์ƒ ์žˆ์„ ๋•Œ๋งŒ ๋ฆฌ๋ทฐํƒญ ํด๋ฆญ ๋ฐ ๋ถ„ํฌ ์ˆ˜์ง‘
total_comment = ""
if total_review > 0:
try:
sb.click("a.goods_reputation")
Expand All @@ -199,11 +201,10 @@ def get_product_detail_info(sb, goods_no: str) -> dict:
except Exception:
total_comment = ""
log.warning("[get_product_detail_info] ๋Œ€ํ‘œ ์ฝ”๋ฉ˜ํŠธ ์ถ”์ถœ ์‹คํŒจ")

except Exception as e:
log.warning(f"[get_product_detail_info] ๋ฆฌ๋ทฐ ์ •๋ณด ์—†์Œ: {e}")


log.warning(f"[get_product_detail_info] ๋ฆฌ๋ทฐ ์ •๋ณด ์ˆ˜์ง‘ ์‹คํŒจ: {e}")
else:
log.warning("[get_product_detail_info] ๋ฆฌ๋ทฐ ์ •๋ณด ์—†์Œ: ๋ฆฌ๋ทฐ ์ˆ˜๊ฐ€ 0๊ฑด ์ž…๋‹ˆ๋‹ค.")

# === ์ƒ์„ธ์ŠคํŽ™(๊ตฌ๋งค์ •๋ณด) ์ถ”์ถœ ===
# ๊ตฌ๋งค์ •๋ณด ํƒญ ํด๋ฆญ
Expand All @@ -227,7 +228,7 @@ def get_detail_info(soup, title):
dt_text = dt.text.strip()
dd_text = dd.text.strip()
if title in dt_text:
log.info(f"[get_product_detail_info] {title} ์ถ”์ถœ: {dd_text}")
log.info(f"[get_product_detail_info] {title} ์ถ”์ถœ ์„ฑ๊ณต!")
return dd_text
except Exception as e:
log.warning(f"[get_product_detail_info] ์ƒ์„ธ ์ •๋ณด ํŒŒ์‹ฑ ์‹คํŒจ ({title}): {e}")
Expand Down
Loading