-
Notifications
You must be signed in to change notification settings - Fork 8.8k
feat: add gq tw routes #20183
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
feat: add gq tw routes #20183
Changes from all commits
a6dd0a0
1567128
76cbbd2
d838b25
5190d57
3237d23
fbbec60
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| {{ if src }} | ||
| <figure> | ||
| <img src="{{ src }}"{{ if alt }} alt="{{@ alt }}"{{ /if }}> | ||
| {{ if alt }}<figcaption>{{@ alt }}</figcaption>{{ /if }} | ||
| </figure> | ||
| {{ /if }} | ||
| {{ if text }}<div>{{@ text }}</div>{{ /if }} |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,190 @@ | ||
| import { type Data, type DataItem, type Route } from '@/types'; | ||
| import { art } from '@/utils/render'; | ||
| import ofetch from '@/utils/ofetch'; | ||
| import { parseDate } from '@/utils/parse-date'; | ||
| import logger from '@/utils/logger'; | ||
|
|
||
| import { load } from 'cheerio'; | ||
| import path from 'node:path'; | ||
| import type { Context } from 'hono'; | ||
| import { JSONPath } from 'jsonpath-plus'; | ||
|
|
||
| const baseUrl = 'https://www.gq.com.tw'; | ||
|
|
||
| const categoryTitleMap: Record<string, string> = { | ||
| life: 'LIFE', | ||
| fashion: 'FASHION', | ||
| entertainment: 'ENTERTAINMENT', | ||
| gadget: 'GADGET', | ||
| bettermen: 'BETTER MEN', | ||
| }; | ||
|
|
||
| const subcategoryTitleMaps: Record<string, Record<string, string>> = { | ||
| life: { | ||
| food: '美食', | ||
| wine: '微醺', | ||
| outdoor: '戶外生活', | ||
| design: '設計生活', | ||
| lifestyleinsider: '五感十築', | ||
| gogreen: 'GoGreen', | ||
| special: '特別報導', | ||
| }, | ||
| fashion: { | ||
| 'fashion-news': '新訊', | ||
| shopping: '編輯推薦', | ||
| guide: '穿搭指南', | ||
| special: '特別報導', | ||
| }, | ||
| entertainment: { | ||
| movie: '電影', | ||
| popculture: '娛樂', | ||
| celebrities: '名人', | ||
| girl: '美女', | ||
| sports: '體育', | ||
| special: '特別報導', | ||
| // 奧斯卡導向 tag 頁,不作為此路由的子分類 | ||
| }, | ||
| gadget: { | ||
| '3c': '3C', | ||
| auto: '車', | ||
| watch: '腕錶', | ||
| special: '特別報導', | ||
| }, | ||
| bettermen: { | ||
| wellbeing: '保養健身', | ||
| relationship: '感情關係', | ||
| sex: '性愛', | ||
| 'one-shot': 'ONE-SHOT', | ||
| special: '特別報導', | ||
| }, | ||
| }; | ||
|
|
||
| export const route: Route = { | ||
| path: '/tw/:category/:subcategory?', // category required because https://www.gq.com.tw/feed/rss already exists | ||
| categories: ['new-media'], | ||
| example: '/gq/tw/life/outdoor', | ||
| parameters: { | ||
| category: 'Category, e.g., life', | ||
| subcategory: 'Subcategory, e.g., outdoor', | ||
| }, | ||
| features: { | ||
| requireConfig: false, | ||
| requirePuppeteer: false, | ||
| antiCrawler: false, | ||
| supportBT: false, | ||
| supportPodcast: false, | ||
| supportScihub: false, | ||
| }, | ||
| radar: [ | ||
| { | ||
| source: ['gq.com.tw/:category/:subcategory?'], | ||
| target: '/tw/:category/:subcategory?', | ||
| }, | ||
| ], | ||
| name: 'GQ台灣', | ||
| maintainers: ['johan456789'], | ||
| handler, | ||
| description: 'GQ台灣 最新內容,可選擇類別與子類別', | ||
| }; | ||
|
|
||
| async function handler(ctx: Context): Promise<Data> { | ||
| const category = ctx.req.param('category') ?? ''; | ||
| const subcategory = ctx.req.param('subcategory') ?? ''; | ||
| const limit: number = Number.parseInt(ctx.req.query('limit') ?? '21', 10); | ||
|
|
||
| if (!category || !(category in categoryTitleMap)) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unnecessary check of |
||
| throw new Error(`Unsupported category: ${category}`); | ||
| } | ||
|
|
||
| if (subcategory) { | ||
| const allowedSubMap = subcategoryTitleMaps[category] ?? {}; | ||
| if (!(subcategory in allowedSubMap)) { | ||
| throw new Error(`Unsupported subcategory: ${subcategory}`); | ||
| } | ||
| } | ||
|
|
||
| const listUrl = `${baseUrl}/${category}${subcategory ? '/' + subcategory : ''}`; | ||
| const { items, headTitle } = await parseWebpage(listUrl); | ||
| logger.info(`[gq/tw] fetched ${items.length} items from ${listUrl}`); | ||
|
|
||
| const categoryTitle = categoryTitleMap[category]; | ||
| const subcategoryTitle = subcategory ? subcategoryTitleMaps[category][subcategory] : undefined; | ||
| const fallbackTitle = subcategory ? `${subcategoryTitle} | GQ Taiwan` : `${categoryTitle} | GQ Taiwan`; | ||
| const title = headTitle || fallbackTitle; | ||
| return { | ||
| title, | ||
| link: listUrl, | ||
| item: items.slice(0, limit), | ||
| }; | ||
| } | ||
| interface PageParseResult { | ||
| items: DataItem[]; | ||
| headTitle?: string; | ||
| } | ||
|
|
||
| async function parseWebpage(url: string): Promise<PageParseResult> { | ||
| const html = await ofetch(url); | ||
| const $ = load(html); | ||
|
|
||
| const stateObj = extractPreloadedStateObject($); | ||
|
|
||
| if (!stateObj || !stateObj.transformed) { | ||
| throw new Error(`Failed to extract preloaded state object from ${url}`); | ||
| } | ||
|
|
||
| const headTitle = String(stateObj.transformed['head.title']); | ||
|
|
||
| const nodes = (JSONPath({ | ||
| path: '$.transformed.bundle.containers[*].items[*]', | ||
| json: stateObj, | ||
| }) as any[]).filter((node) => node && node.url); | ||
|
|
||
| const items: DataItem[] = nodes.map((node: any) => { | ||
|
Comment on lines
+137
to
+142
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unnecessary use of JSONPath. You can use flatMap instead. |
||
| const rawUrlPath = String(node.url); | ||
| const urlPath = rawUrlPath.replaceAll(String.raw`\u002F`, "/"); | ||
| const link = new URL(urlPath, baseUrl).toString(); | ||
|
|
||
| const title = String(node.dangerousHed ?? node.hed ?? '').trim(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| const pubDate = node.pubDate ? parseDate(String(node.pubDate)) : undefined; | ||
|
|
||
| const imgSources = node.image?.sources || undefined; | ||
| const imgSrc = imgSources?.xxl?.url || imgSources?.lg?.url || imgSources?.sm?.url || undefined; | ||
| const textDescription = node.dangerousDek ? String(node.dangerousDek) : undefined; | ||
| const description = (Boolean(imgSrc) || Boolean(textDescription)) | ||
| ? art(path.join(__dirname, 'templates/description.art'), { src: imgSrc, alt: title, text: textDescription }) | ||
| : undefined; | ||
|
|
||
| return { | ||
| title, | ||
| link, | ||
| pubDate, | ||
| description, | ||
| image: imgSrc, | ||
| } as DataItem; | ||
| }); | ||
|
|
||
| logger.info(`[gq/tw] parsed ${items.length} items from JSON state ${url}`); | ||
| return { items, headTitle }; | ||
| } | ||
|
|
||
| /** | ||
| * Extract preloaded state object from HTML | ||
| */ | ||
| function extractPreloadedStateObject($: ReturnType<typeof load>): any | null { | ||
| const stateScriptText = $('script').filter((_, el) => $(el).text().includes('__PRELOADED_STATE__')).text(); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use :contains() selector instead. |
||
| if (!stateScriptText) { | ||
| logger.info('[gq/tw] __PRELOADED_STATE__ script not found'); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do not use the default logging level |
||
| return null; | ||
| } | ||
|
|
||
| const assignIndex = stateScriptText.indexOf('window.__PRELOADED_STATE__'); | ||
| const braceStart = stateScriptText.indexOf('{', assignIndex); | ||
| const braceEnd = stateScriptText.lastIndexOf('}'); | ||
| if (braceStart === -1 || braceEnd === -1 || braceEnd <= braceStart) { | ||
| logger.info('[gq/tw] __PRELOADED_STATE__ json is malformed'); | ||
| return null; | ||
| } | ||
|
|
||
| const jsonText = stateScriptText.slice(braceStart, braceEnd + 1); | ||
| return JSON.parse(jsonText); | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unnecessary nullish coalescing since
RSSHub/lib/routes/gq/tw/tw.ts
Line 63 in fbbec60
categoryas compulsory. Requests withoutcategorywill be rejected by hono instead of reaching your handler.