11# src/uosai/indexer/index.py
22import os , sys , time , traceback
3- from datetime import datetime
3+ from datetime import datetime , timedelta
44
55# 공통 유틸
6- from uosai .common .utils import fetch_all_rows , row_to_doc , split_docs , upsert_docs
6+ from uosai .common .utils import fetch_all_rows , fetch_rows_since , row_to_doc , split_docs , upsert_docs
77
88BATCH_SIZE = int (os .getenv ("BATCH_SIZE" , "200" ))
99BATCH_SLEEP_SEC = float (os .getenv ("BATCH_SLEEP_SEC" , "0.8" )) # 레이트리밋 대응
1010
11+ # 증분 업데이트 설정
12+ INDEX_MODE = os .getenv ("INDEX_MODE" , "incremental" ) # "incremental" 또는 "full"
13+ INCREMENTAL_DAYS = int (os .getenv ("INCREMENTAL_DAYS" , "7" )) # 증분 업데이트 시 최근 N일
14+
1115def log (msg : str ) -> None :
1216 print (f"[indexer { datetime .now ():%Y-%m-%d %H:%M:%S} ] { msg } " )
1317
1418def main () -> int :
15- log ("Full rebuild start" )
16- rows = fetch_all_rows ()
19+ """인덱싱 메인 함수
20+
21+ 환경 변수:
22+ INDEX_MODE: "incremental" (증분 업데이트, 기본값) 또는 "full" (전체 리빌드)
23+ INCREMENTAL_DAYS: 증분 업데이트 시 최근 N일 데이터 처리 (기본값: 7)
24+
25+ WUs 절약:
26+ - incremental: 최근 N일 데이터만 upsert (일일 약 30-300 WUs)
27+ - full: 전체 삭제 후 재삽입 (약 6,000 WUs, 월 1회 권장)
28+ """
29+ mode = INDEX_MODE .lower ()
30+
31+ if mode == "full" :
32+ log ("=== FULL REBUILD MODE ===" )
33+ log ("WARNING: This will consume significant WUs!" )
34+ rows = fetch_all_rows ()
35+ rebuild = True
36+ elif mode == "incremental" :
37+ log (f"=== INCREMENTAL UPDATE MODE (last { INCREMENTAL_DAYS } days) ===" )
38+ since_date = (datetime .now () - timedelta (days = INCREMENTAL_DAYS )).strftime ("%Y-%m-%d" )
39+ log (f"Fetching notices since: { since_date } " )
40+ rows = fetch_rows_since (since_date )
41+ rebuild = False
42+ else :
43+ log (f"ERROR: Invalid INDEX_MODE={ INDEX_MODE } . Use 'incremental' or 'full'" )
44+ return 1
45+
1746 if not rows :
1847 log ("No rows found" )
1948 return 0
@@ -24,14 +53,16 @@ def main() -> int:
2453 total = 0
2554 for i in range (0 , len (docs ), BATCH_SIZE ):
2655 batch = docs [i :i + BATCH_SIZE ]
27- # 첫 배치만 전체 삭제
28- n = upsert_docs (batch , rebuild = (i == 0 ))
56+ # full 모드일 때만 첫 배치에서 전체 삭제
57+ should_rebuild = rebuild and (i == 0 )
58+ n = upsert_docs (batch , rebuild = should_rebuild )
2959 total += n
3060 log (f"Upsert batch { i // BATCH_SIZE + 1 } : { n } chunks (cum { total } )" )
3161 if i + BATCH_SIZE < len (docs ) and BATCH_SLEEP_SEC > 0 :
3262 time .sleep (BATCH_SLEEP_SEC )
3363
34- log (f"Full rebuild done: chunks={ total } " )
64+ mode_label = "Full rebuild" if rebuild else "Incremental update"
65+ log (f"{ mode_label } done: chunks={ total } " )
3566 return total
3667
3768if __name__ == "__main__" :
0 commit comments