diff --git a/lib/routes/gq/tw/templates/description.art b/lib/routes/gq/tw/templates/description.art new file mode 100644 index 00000000000000..40c9eafd446a09 --- /dev/null +++ b/lib/routes/gq/tw/templates/description.art @@ -0,0 +1,7 @@ +{{ if src }} +
+ + {{ if alt }}
{{@ alt }}
{{ /if }} +
+{{ /if }} +{{ if text }}
{{@ text }}
{{ /if }} diff --git a/lib/routes/gq/tw/tw.ts b/lib/routes/gq/tw/tw.ts new file mode 100644 index 00000000000000..cecf594d061e40 --- /dev/null +++ b/lib/routes/gq/tw/tw.ts @@ -0,0 +1,190 @@ +import { type Data, type DataItem, type Route } from '@/types'; +import { art } from '@/utils/render'; +import ofetch from '@/utils/ofetch'; +import { parseDate } from '@/utils/parse-date'; +import logger from '@/utils/logger'; + +import { load } from 'cheerio'; +import path from 'node:path'; +import type { Context } from 'hono'; +import { JSONPath } from 'jsonpath-plus'; + +const baseUrl = 'https://www.gq.com.tw'; + +const categoryTitleMap: Record = { + life: 'LIFE', + fashion: 'FASHION', + entertainment: 'ENTERTAINMENT', + gadget: 'GADGET', + bettermen: 'BETTER MEN', +}; + +const subcategoryTitleMaps: Record> = { + life: { + food: '美食', + wine: '微醺', + outdoor: '戶外生活', + design: '設計生活', + lifestyleinsider: '五感十築', + gogreen: 'GoGreen', + special: '特別報導', + }, + fashion: { + 'fashion-news': '新訊', + shopping: '編輯推薦', + guide: '穿搭指南', + special: '特別報導', + }, + entertainment: { + movie: '電影', + popculture: '娛樂', + celebrities: '名人', + girl: '美女', + sports: '體育', + special: '特別報導', + // 奧斯卡導向 tag 頁,不作為此路由的子分類 + }, + gadget: { + '3c': '3C', + auto: '車', + watch: '腕錶', + special: '特別報導', + }, + bettermen: { + wellbeing: '保養健身', + relationship: '感情關係', + sex: '性愛', + 'one-shot': 'ONE-SHOT', + special: '特別報導', + }, +}; + +export const route: Route = { + path: '/tw/:category/:subcategory?', // category required because https://www.gq.com.tw/feed/rss already exists + categories: ['new-media'], + example: '/gq/tw/life/outdoor', + parameters: { + category: 'Category, e.g., life', + subcategory: 'Subcategory, e.g., outdoor', + }, + features: { + requireConfig: false, + requirePuppeteer: false, + antiCrawler: false, + supportBT: false, + supportPodcast: false, + supportScihub: false, + }, + radar: [ + { + source: ['gq.com.tw/:category/:subcategory?'], + target: '/tw/:category/:subcategory?', + }, + ], + name: 'GQ台灣', + maintainers: ['johan456789'], + handler, + description: 'GQ台灣 最新內容,可選擇類別與子類別', +}; + +async function handler(ctx: Context): Promise { + const category = ctx.req.param('category') ?? ''; + const subcategory = ctx.req.param('subcategory') ?? ''; + const limit: number = Number.parseInt(ctx.req.query('limit') ?? '21', 10); + + if (!category || !(category in categoryTitleMap)) { + throw new Error(`Unsupported category: ${category}`); + } + + if (subcategory) { + const allowedSubMap = subcategoryTitleMaps[category] ?? {}; + if (!(subcategory in allowedSubMap)) { + throw new Error(`Unsupported subcategory: ${subcategory}`); + } + } + + const listUrl = `${baseUrl}/${category}${subcategory ? '/' + subcategory : ''}`; + const { items, headTitle } = await parseWebpage(listUrl); + logger.info(`[gq/tw] fetched ${items.length} items from ${listUrl}`); + + const categoryTitle = categoryTitleMap[category]; + const subcategoryTitle = subcategory ? subcategoryTitleMaps[category][subcategory] : undefined; + const fallbackTitle = subcategory ? `${subcategoryTitle} | GQ Taiwan` : `${categoryTitle} | GQ Taiwan`; + const title = headTitle || fallbackTitle; + return { + title, + link: listUrl, + item: items.slice(0, limit), + }; +} +interface PageParseResult { + items: DataItem[]; + headTitle?: string; +} + +async function parseWebpage(url: string): Promise { + const html = await ofetch(url); + const $ = load(html); + + const stateObj = extractPreloadedStateObject($); + + if (!stateObj || !stateObj.transformed) { + throw new Error(`Failed to extract preloaded state object from ${url}`); + } + + const headTitle = String(stateObj.transformed['head.title']); + + const nodes = (JSONPath({ + path: '$.transformed.bundle.containers[*].items[*]', + json: stateObj, + }) as any[]).filter((node) => node && node.url); + + const items: DataItem[] = nodes.map((node: any) => { + const rawUrlPath = String(node.url); + const urlPath = rawUrlPath.replaceAll(String.raw`\u002F`, "/"); + const link = new URL(urlPath, baseUrl).toString(); + + const title = String(node.dangerousHed ?? node.hed ?? '').trim(); + const pubDate = node.pubDate ? parseDate(String(node.pubDate)) : undefined; + + const imgSources = node.image?.sources || undefined; + const imgSrc = imgSources?.xxl?.url || imgSources?.lg?.url || imgSources?.sm?.url || undefined; + const textDescription = node.dangerousDek ? String(node.dangerousDek) : undefined; + const description = (Boolean(imgSrc) || Boolean(textDescription)) + ? art(path.join(__dirname, 'templates/description.art'), { src: imgSrc, alt: title, text: textDescription }) + : undefined; + + return { + title, + link, + pubDate, + description, + image: imgSrc, + } as DataItem; + }); + + logger.info(`[gq/tw] parsed ${items.length} items from JSON state ${url}`); + return { items, headTitle }; +} + +/** + * Extract preloaded state object from HTML + */ +function extractPreloadedStateObject($: ReturnType): any | null { + const stateScriptText = $('script').filter((_, el) => $(el).text().includes('__PRELOADED_STATE__')).text(); + if (!stateScriptText) { + logger.info('[gq/tw] __PRELOADED_STATE__ script not found'); + return null; + } + + const assignIndex = stateScriptText.indexOf('window.__PRELOADED_STATE__'); + const braceStart = stateScriptText.indexOf('{', assignIndex); + const braceEnd = stateScriptText.lastIndexOf('}'); + if (braceStart === -1 || braceEnd === -1 || braceEnd <= braceStart) { + logger.info('[gq/tw] __PRELOADED_STATE__ json is malformed'); + return null; + } + + const jsonText = stateScriptText.slice(braceStart, braceEnd + 1); + return JSON.parse(jsonText); +}