11import { existsSync , readFileSync , writeFileSync } from "node:fs" ;
22import path from "node:path" ;
33import { parse } from "yaml" ;
4- import { defineCommand , runMain } from "citty" ;
5- import { scrapeCatalogTerm } from "../generate/main " ;
4+ import { defineCommand } from "citty" ;
5+ import { scrapeCatalogTerm } from "@sneu/scraper/generate " ;
66import { infer as zinfer } from "zod" ;
7- import { Config } from "../config" ;
8- import { consola } from "consola" ;
9- import { ScraperBannerCache } from "../schemas/scraper/banner-cache" ;
7+ import { Config } from "@sneu/scraper/config" ;
8+ import { ScraperBannerCache } from "@sneu/scraper/schemas/banner-cache" ;
9+ import { ScraperEventEmitter } from "@sneu/scraper/events" ;
10+ import { brandIntro , p , pc , setVerbosity } from "../ui" ;
11+ import { attachLogger } from "../logger" ;
1012
1113const CACHE_FORMAT = ( term : string ) => `term-${ term } .json` ;
12- const CACHE_VERSION = 3 ;
14+ const CACHE_VERSION = 5 ;
1315
14- const main = defineCommand ( {
16+ export default defineCommand ( {
1517 meta : {
16- name : "scrape:gen " ,
18+ name : "generate " ,
1719 description : "runs the scraper to generate the banner cache files" ,
1820 } ,
1921 args : {
@@ -26,8 +28,15 @@ const main = defineCommand({
2628 } ,
2729 cachePath : {
2830 type : "string" ,
29- default : "cache/" ,
30- description : "" ,
31+ default : process . env . SCRAPER_CACHE_PATH ?? "cache/" ,
32+ description : "path to cache directory (env: SCRAPER_CACHE_PATH)" ,
33+ required : false ,
34+ } ,
35+ configPath : {
36+ type : "string" ,
37+ default : process . env . SCRAPER_CONFIG_PATH ?? "config/" ,
38+ description :
39+ "path to config directory containing manifest.yaml (env: SCRAPER_CONFIG_PATH)" ,
3140 required : false ,
3241 } ,
3342 interactive : {
@@ -56,59 +65,64 @@ const main = defineCommand({
5665 } ,
5766 } ,
5867 async run ( { args } ) {
59- if ( args . verbose ) consola . level = 4 ;
60- if ( args . veryverbose ) consola . level = 999 ;
68+ // const interactive = args.interactive ?? false;
69+ // setVerbosity({ verbose: args.verbose, veryVerbose: args.veryverbose });
70+ setVerbosity ( { verbose : true , veryVerbose : false } ) ;
71+ // updateSettings({ withGuide: false });
72+ brandIntro ( "generate" ) ;
6173
62- const interactive = args . interactive ?? false ;
74+ const emitter = new ScraperEventEmitter ( ) ;
75+ attachLogger ( emitter , { interactive : true } ) ;
6376
6477 const configStream = readFileSync (
65- path . resolve ( args . cachePath , "manifest.yaml" ) ,
66- {
67- encoding : "utf8" ,
68- } ,
78+ path . resolve ( args . configPath , "manifest.yaml" ) ,
79+ { encoding : "utf8" } ,
6980 ) ;
7081 const configRaw = parse ( configStream ) ;
7182 const configResponse = Config . safeParse ( configRaw ) ;
7283 if ( ! configResponse . success ) {
73- consola . error ( configResponse . error ) ;
84+ p . log . error ( pc . red ( String ( configResponse . error ) ) ) ;
85+ p . cancel ( "Invalid config" ) ;
7486 return ;
7587 }
7688
7789 const config = configResponse . data ;
78-
7990 const termsToScrape = filterTerms ( config , args . terms ) ;
80- consola . info ( `scraping ${ termsToScrape . length } terms` ) ;
91+
92+ p . log . info (
93+ `Scraping ${ pc . bold ( String ( termsToScrape . length ) ) } term${ termsToScrape . length !== 1 ? "s" : "" } ` ,
94+ ) ;
8195
8296 if ( termsToScrape . length === 0 ) {
83- consola . log ( "no active / configured terms to scrape") ;
97+ p . outro ( "No active terms to scrape") ;
8498 return ;
8599 }
86100
87101 for ( const termConfig of termsToScrape ) {
88- consola . start ( `scraping term ${ termConfig . term } ` ) ;
89-
90102 const cachename = path . resolve (
91103 args . cachePath ,
92104 CACHE_FORMAT ( termConfig . term . toString ( ) ) ,
93105 ) ;
94106 const existingCache = existsSync ( cachename ) ;
95107 if ( args . overwrite && existingCache ) {
96- consola . info ( "existing cache found, overwriting with new scrape" ) ;
108+ p . log . info (
109+ `Existing cache for ${ pc . cyan ( String ( termConfig . term ) ) } , overwriting` ,
110+ ) ;
97111 } else if ( ! args . overwrite && existingCache ) {
98- consola . success ( "existing cache found, skipping term" ) ;
112+ p . log . success (
113+ `Cache exists for ${ pc . cyan ( String ( termConfig . term ) ) } , skipping` ,
114+ ) ;
99115 continue ;
100116 }
101117
102118 try {
103119 const out = await scrapeCatalogTerm (
104120 termConfig . term . toString ( ) ,
105- termConfig ,
106- interactive ,
121+ emitter ,
107122 ) ;
108123
109124 if ( ! out ) {
110- consola . error ( `error scraping term ${ termConfig . term } ` ) ;
111- // return;
125+ p . log . error ( pc . red ( `Failed to scrape term ${ termConfig . term } ` ) ) ;
112126 continue ;
113127 }
114128
@@ -119,23 +133,18 @@ const main = defineCommand({
119133 } ;
120134
121135 writeFileSync ( cachename , JSON . stringify ( cachedData , null , 2 ) ) ;
122- consola . success ( `scraped term ${ termConfig . term } ` ) ;
123136 } catch ( e ) {
124- consola . error ( `failed to scrape term ${ termConfig . term } ` , e ) ;
137+ p . log . error ( pc . red ( `Failed to scrape term ${ termConfig . term } : ${ e } ` ) ) ;
125138 continue ;
126139 }
127140 }
128141
129- consola . success (
130- `successfully scraped ${ termsToScrape . length } term${ termsToScrape . length > 1 ? "s" : "" } ` ,
142+ p . outro (
143+ `Scraped ${ pc . bold ( String ( termsToScrape . length ) ) } term${ termsToScrape . length > 1 ? "s" : "" } — cache is fresh ` ,
131144 ) ;
132145 } ,
133146} ) ;
134147
135- void runMain ( main ) ;
136-
137- /**
138- */
139148function filterTerms ( config : zinfer < typeof Config > , termArg : string ) {
140149 if ( termArg === "all" ) {
141150 return config . terms ;
@@ -151,7 +160,9 @@ function filterTerms(config: zinfer<typeof Config>, termArg: string) {
151160 splitTerms . includes ( t . term . toString ( ) ) ,
152161 ) ;
153162 if ( filteredTerms . length === 0 ) {
154- consola . error ( `no matching terms found for: ${ splitTerms . join ( ", " ) } ` ) ;
163+ p . log . error (
164+ pc . red ( `No matching terms found for: ${ splitTerms . join ( ", " ) } ` ) ,
165+ ) ;
155166 process . exit ( 1 ) ;
156167 }
157168 return filteredTerms ;
0 commit comments