Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 10 additions & 36 deletions crawlers/skincare/crawl_brand_skincare.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def get_brand(brand_name, brand_code):
with SB(uc=True, test=True, headless=True) as sb:
log.info(f"[get_brand] URL μ˜€ν”ˆ: {url}")
sb.open(url)
time.sleep(1)
time.sleep(10) # νŽ˜μ΄μ§€ λ Œλ”λ§ λŒ€κΈ° (3초, ν•„μš”μ‹œ 더 늘릴 수 있음)

page = 1
while True:
Expand Down Expand Up @@ -67,13 +67,12 @@ def get_brand(brand_name, brand_code):
try:
price_original = item.select_one("span.origin").text.strip().replace("원", "").replace(",", "")
except Exception:
price_original = ""
price_original = price_final
try:
flag_spans = item.select("div.flags span.flag")
flag_list = [span.text.strip() for span in flag_spans if span.text.strip()]
flag_str = ",".join(flag_list) if flag_list else ""
except Exception:
flag_str = ""
flag_list = []
try:
soldout_flag = item.select_one("span.status_flag.soldout")
is_soldout = bool(soldout_flag)
Expand All @@ -86,7 +85,7 @@ def get_brand(brand_name, brand_code):
"goodsName": name,
"salePrice": price_final,
"originalPrice": price_original,
"flagList": flag_str,
"flagList": flag_list,
"isSoldout": is_soldout,
"createdAt": collected_at
})
Expand Down Expand Up @@ -201,7 +200,11 @@ def get_detail_info(soup, title):
try:
poll_div = soup.select_one("div.poll_all")
if poll_div:
for dl in poll_div.select("dl.poll_type2.type3"):
# μš°μ„  dl.poll_type2.type3을 μ°Ύκ³ , μ—†μœΌλ©΄ dl.poll_type2만 μ°ΎκΈ°
dl_tags = poll_div.select("dl.poll_type2.type3")
if not dl_tags:
dl_tags = poll_div.select("dl.poll_type2")
for dl in dl_tags:
type_name = dl.select_one("dt span")
type_name = type_name.text.strip() if type_name else ""
for li in dl.select("dd ul.list > li"):
Expand Down Expand Up @@ -240,33 +243,4 @@ def get_detail_info(soup, title):
"pctOf1": pctOf1,
"reviewDetail": review_detail,
**detail_spec,
}

##### μ‹€ν–‰ μ½”λ“œ #####
# PB_BRAND_CODE_DICT = {
# "λ°”μ΄μ˜€νž 보": "A000897",
# "브링그린": "A002253",
# "웨이크메이크": "A001240",
# "컬러그램": "A002712",
# "필리밀리": "A002502",
# "아이디얼포맨": "A001643",
# "λΌμš΄λ“œμ–΄λΌμš΄λ“œ": "A001306",
# "μ‹λ¬Όλ‚˜λΌ": "A000036",
# "μΌ€μ–΄ν”ŒλŸ¬μŠ€": "A003339",
# "탄탄": "A015673",
# "λ”œλΌμ΄νŠΈ ν”„λ‘œμ νŠΈ": "A003361",
# }

# for brand_name, brand_code in PB_BRAND_CODE_DICT.items():
# df = get_brand(brand_name, brand_code)

# with SB(uc=True, test=True) as sb:
# detail_list = []
# for goods_no in df['goodsNo']:
# detail = get_brand_product_detail_info(sb, goods_no)
# detail_list.append(detail)

# detail_df = pd.DataFrame(detail_list)
# result_df = pd.concat([df.reset_index(drop=True), detail_df.reset_index(drop=True)], axis=1)

# result_df.to_json('skincare_result.json', orient='records', force_ascii=False, indent=2)
}
43 changes: 15 additions & 28 deletions crawlers/skincare/crawl_rank_skincare.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,7 @@ def get_top100_skincare() -> tuple:
# goods_no_list에 λ°”λ‘œ μΆ”κ°€
if goods_no:
goods_no_list.append(goods_no)
# μ •κ°€ (null ν—ˆμš©)
try:
price_original = item.find_element(
By.CSS_SELECTOR, ".prd_price .tx_org .tx_num"
).text.strip()
except Exception:
price_original = ""

# ꡬ맀가격
try:
price_final = item.find_element(
Expand All @@ -84,6 +78,13 @@ def get_top100_skincare() -> tuple:
except Exception as e:
log.warning(f"[get_top100_skincare] ꡬ맀가격 정보 νŒŒμ‹± μ‹€νŒ¨: {e}")
price_final = ""
# μ •κ°€ (null ν—ˆμš©)
try:
price_original = item.find_element(
By.CSS_SELECTOR, ".prd_price .tx_org .tx_num"
).text.strip()
except Exception:
price_original = price_final
# 기타 ν”„λ‘œλͺ¨μ…˜ 정보(null ν—ˆμš©)
try:
flag_spans = item.find_elements(By.CSS_SELECTOR, ".prd_flag .icon_flag")
Expand Down Expand Up @@ -119,8 +120,6 @@ def get_top100_skincare() -> tuple:
except Exception:
is_soldout = False



data.append(
{
"rank": rank_val,
Expand All @@ -131,7 +130,8 @@ def get_top100_skincare() -> tuple:
"originalPrice": price_original,
"flagList": flag_list, # 리슀트둜 μ €μž₯
"createdAt": collected_at,
"isSoldout": bool(is_soldout)
"isSoldout": bool(is_soldout),
"category": "μŠ€ν‚¨μΌ€μ–΄"
}
)
log.info(f"[get_top100_skincare] {rank_val}μœ„ μƒν’ˆ: {brand} {name} (goods_no: {goods_no})")
Expand Down Expand Up @@ -239,7 +239,11 @@ def get_detail_info(soup, title):
try:
poll_div = soup.select_one("div.poll_all")
if poll_div:
for dl in poll_div.select("dl.poll_type2.type3"):
# μš°μ„  dl.poll_type2.type3을 μ°Ύκ³ , μ—†μœΌλ©΄ dl.poll_type2만 μ°ΎκΈ°
dl_tags = poll_div.select("dl.poll_type2.type3")
if not dl_tags:
dl_tags = poll_div.select("dl.poll_type2")
for dl in dl_tags:
type_name = dl.select_one("dt span")
type_name = type_name.text.strip() if type_name else ""
for li in dl.select("dd ul.list > li"):
Expand Down Expand Up @@ -279,20 +283,3 @@ def get_detail_info(soup, title):
**detail_spec,

}


##### μ‹€ν–‰ μ½”λ“œ #####
# data, goods_no_list = get_top100_skincare()

# with SB(uc=True, test=True) as sb:
# detail_list = []
# for goods_no in goods_no_list:
# detail = get_product_detail_info(sb, goods_no)
# detail_list.append(detail)

# df = pd.DataFrame(data)
# detail_df = pd.DataFrame(detail_list)
# result_df = pd.concat([df.reset_index(drop=True), detail_df.reset_index(drop=True)], axis=1)

# result_df.to_json('skincare_result.json', orient='records', force_ascii=False, indent=2)

Loading