-
Notifications
You must be signed in to change notification settings - Fork 8.8k
feat: add gq tw routes #20183
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
feat: add gq tw routes #20183
Changes from 4 commits
a6dd0a0
1567128
76cbbd2
d838b25
5190d57
3237d23
fbbec60
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| import type { Namespace } from '@/types'; | ||
|
|
||
| export const namespace: Namespace = { | ||
| name: 'GQ台灣', | ||
| url: 'gq.com.tw', | ||
| lang: 'zh-TW', | ||
| }; | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| {{ if src }} | ||
| <figure> | ||
| <img src="{{ src }}"{{ if alt }} alt="{{@ alt }}"{{ /if }}> | ||
| {{ if alt }}<figcaption>{{@ alt }}</figcaption>{{ /if }} | ||
| </figure> | ||
| {{ /if }} | ||
| {{ if text }}<div>{{@ text }}</div>{{ /if }} |
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
| @@ -0,0 +1,220 @@ | ||||
| import { type Data, type DataItem, type Route } from '@/types'; | ||||
| import { art } from '@/utils/render'; | ||||
| import cache from '@/utils/cache'; | ||||
| import ofetch from '@/utils/ofetch'; | ||||
| import { parseDate } from '@/utils/parse-date'; | ||||
| import logger from '@/utils/logger'; | ||||
|
|
||||
| import { load } from 'cheerio'; | ||||
| import path from 'node:path'; | ||||
| import type { Context } from 'hono'; | ||||
| import { JSONPath } from 'jsonpath-plus'; | ||||
|
|
||||
| const baseUrl = 'https://www.gq.com.tw'; | ||||
|
|
||||
| const categoryTitleMap: Record<string, string> = { | ||||
| life: 'Life', | ||||
| fashion: 'Fashion', | ||||
| entertainment: 'Entertainment', | ||||
| gadget: 'Gadget', | ||||
| bettermen: 'Better Men', | ||||
| }; | ||||
|
|
||||
| const subcategoryTitleMaps: Record<string, Record<string, string>> = { | ||||
| life: { | ||||
| food: '美食', | ||||
| wine: '微醺', | ||||
| outdoor: '戶外生活', | ||||
| design: '設計生活', | ||||
| lifestyleinsider: '五感十築', | ||||
| gogreen: 'GoGreen', | ||||
| special: '特別報導', | ||||
| }, | ||||
| fashion: { | ||||
| 'fashion-news': '新訊', | ||||
| shopping: '編輯推薦', | ||||
| guide: '穿搭指南', | ||||
| special: '特別報導', | ||||
| }, | ||||
| entertainment: { | ||||
| movie: '電影', | ||||
| popculture: '娛樂', | ||||
| celebrities: '名人', | ||||
| girl: '美女', | ||||
| sports: '體育', | ||||
| special: '特別報導', | ||||
| // 奧斯卡導向 tag 頁,不作為此路由的子分類 | ||||
| }, | ||||
| gadget: { | ||||
| '3c': '3C', | ||||
| auto: '車', | ||||
| watch: '腕錶', | ||||
| special: '特別報導', | ||||
| }, | ||||
| bettermen: { | ||||
| wellbeing: '保養健身', | ||||
| relationship: '感情關係', | ||||
| sex: '性愛', | ||||
| 'one-shot': 'ONE-SHOT', | ||||
| special: '特別報導', | ||||
| }, | ||||
| }; | ||||
|
|
||||
| export const route: Route = { | ||||
| path: '/tw/:category/:subcategory?', // category required because https://www.gq.com.tw/feed/rss already exists | ||||
| categories: ['new-media'], | ||||
| example: '/gq/tw/life/outdoor', | ||||
| parameters: { | ||||
| category: 'Category, e.g., life', | ||||
| subcategory: 'Subcategory, e.g., outdoor', | ||||
| }, | ||||
| features: { | ||||
| requireConfig: false, | ||||
| requirePuppeteer: false, | ||||
| antiCrawler: false, | ||||
| supportBT: false, | ||||
| supportPodcast: false, | ||||
| supportScihub: false, | ||||
| }, | ||||
| radar: [ | ||||
| { | ||||
| source: ['gq.com.tw/:category/:subcategory?'], | ||||
| target: '/tw/:category/:subcategory?', | ||||
| }, | ||||
| ], | ||||
| name: 'GQ台灣', | ||||
| maintainers: ['johan456789'], | ||||
| handler, | ||||
| description: 'GQ台灣 最新內容,可選擇類別與子類別', | ||||
| }; | ||||
|
|
||||
| async function handler(ctx: Context): Promise<Data> { | ||||
| const category = ctx.req.param('category') ?? ''; | ||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unnecessary nullish coalescing since Line 63 in fbbec60
category as compulsory. Requests without category will be rejected by hono instead of reaching your handler.
|
||||
| const subcategory = ctx.req.param('subcategory') ?? ''; | ||||
| const limit: number = Number.parseInt(ctx.req.query('limit') ?? '21', 10); | ||||
|
|
||||
| if (!category || !(category in categoryTitleMap)) { | ||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unnecessary check of |
||||
| throw new Error(`Unsupported category: ${category}`); | ||||
| } | ||||
|
|
||||
| if (subcategory) { | ||||
| const allowedSubMap = subcategoryTitleMaps[category] ?? {}; | ||||
| if (!(subcategory in allowedSubMap)) { | ||||
| throw new Error(`Unsupported subcategory: ${subcategory}`); | ||||
| } | ||||
| } | ||||
|
|
||||
| const listUrl = `${baseUrl}/${category}${subcategory ? '/' + subcategory : ''}`; | ||||
| const items = await cache.tryGet(listUrl, () => parseWebpage(listUrl)); | ||||
johan456789 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||
| logger.info(`[gq/tw] fetched ${items.length} items from ${listUrl}`); | ||||
|
|
||||
| const categoryTitle = categoryTitleMap[category]; | ||||
| const subcategoryTitle = subcategory ? subcategoryTitleMaps[category][subcategory] : undefined; | ||||
| const title = subcategory ? `GQ台灣 - ${categoryTitle}/${subcategoryTitle}` : `GQ台灣 - ${categoryTitle}`; | ||||
johan456789 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||
| return { | ||||
| title, | ||||
| link: listUrl, | ||||
| item: items.slice(0, limit), | ||||
| }; | ||||
| } | ||||
|
|
||||
| async function parseWebpage(url: string): Promise<DataItem[]> { | ||||
| const html = await ofetch(url); | ||||
| const $ = load(html); | ||||
| const containers = $('div[class^="SummaryCollectionGridItems"]'); | ||||
| const wrappers = containers.find('div[class^="SummaryItemWrapper"]'); | ||||
|
|
||||
| const urlMetaMap = buildUrlMetaMap(extractPreloadedStateObject($), baseUrl); | ||||
|
|
||||
| const items = wrappers | ||||
| .toArray() | ||||
| .map((el) => { | ||||
| const $el = $(el); | ||||
|
|
||||
| const linkEl = $el.find('div[class^="SummaryItemContent"] a').first(); | ||||
| const linkPath = linkEl.attr('href')?.trim(); | ||||
| if (!linkPath) { | ||||
| return null; | ||||
| } | ||||
| const link = linkPath.startsWith('http') ? linkPath : new URL(linkPath, baseUrl).toString(); | ||||
|
|
||||
| const imgEl = $el.find('div[class^="SummaryItemAssetContainer"] img').first(); | ||||
| const imgSrc = imgEl.attr('src')?.trim() || imgEl.attr('data-src')?.trim() || ''; | ||||
|
|
||||
| const title = $el | ||||
| .find('div[class^="SummaryItemContent"] a > h2') | ||||
| .text() | ||||
| .trim(); | ||||
|
|
||||
| const meta = urlMetaMap.get(link) ?? urlMetaMap.get(decodeURI(link)); | ||||
| const pubDateText = meta?.pubDate; | ||||
| const timeEl = $el.find('div[class^="SummaryItemBylineWrapper"] > time'); | ||||
| const timeText = timeEl.text().trim(); | ||||
| const pubDate = pubDateText ? parseDate(pubDateText) : (timeText ? parseDate(timeText, 'YYYY年M月D日') : undefined); | ||||
|
|
||||
| const textDescription = meta?.description; | ||||
| const description = Boolean(imgSrc) || Boolean(textDescription) | ||||
| ? art(path.join(__dirname, 'templates/description.art'), { src: imgSrc || undefined, alt: title, text: textDescription }) | ||||
| : undefined; | ||||
|
|
||||
| return { | ||||
| title, | ||||
| link, | ||||
| pubDate, | ||||
| description, | ||||
| image: imgSrc || undefined, | ||||
| } as DataItem; | ||||
| }) | ||||
johan456789 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||
| .filter(Boolean) as DataItem[]; | ||||
|
|
||||
| logger.info(`[gq/tw] parsed ${items.length} items from list page ${url}`); | ||||
|
|
||||
| return items; | ||||
| } | ||||
|
|
||||
| function extractPreloadedStateObject($: ReturnType<typeof load>): any | null { | ||||
| const stateScriptText = $('script').filter((_, el) => $(el).text().includes('__PRELOADED_STATE__')).text(); | ||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Use :contains() selector instead. |
||||
| if (!stateScriptText) { | ||||
| logger.info('[gq/tw] __PRELOADED_STATE__ script not found'); | ||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do not use the default logging level |
||||
| return null; | ||||
| } | ||||
|
|
||||
| const assignIndex = stateScriptText.indexOf('window.__PRELOADED_STATE__'); | ||||
| const braceStart = stateScriptText.indexOf('{', assignIndex); | ||||
| const braceEnd = stateScriptText.lastIndexOf('}'); | ||||
| if (braceStart === -1 || braceEnd === -1 || braceEnd <= braceStart) { | ||||
| return null; | ||||
| } | ||||
|
|
||||
| const jsonText = stateScriptText.slice(braceStart, braceEnd + 1); | ||||
| return JSON.parse(jsonText); | ||||
| } | ||||
| interface UrlMeta { | ||||
| pubDate?: string; | ||||
| description?: string; | ||||
| } | ||||
|
|
||||
| function buildUrlMetaMap(stateObj: any, baseUrl: string): Map<string, UrlMeta> { | ||||
| if (!stateObj) { | ||||
| return new Map<string, UrlMeta>(); | ||||
| } | ||||
|
|
||||
| const items = JSONPath({ | ||||
| path: '$.transformed.bundle.containers[*].items[*]', | ||||
| json: stateObj, | ||||
| }) as any[]; | ||||
|
|
||||
| const entries: Array<[string, UrlMeta]> = items | ||||
| .filter((node: any) => node && node.url) | ||||
| .map((node: any) => { | ||||
| const urlPath = String(node.url).replaceAll(String.raw`\u002F`, "/"); | ||||
| const absoluteUrl = new URL(urlPath, baseUrl).toString(); | ||||
| const meta: UrlMeta = { | ||||
| pubDate: node.pubDate ? String(node.pubDate) : undefined, | ||||
| description: node.dangerousDek ? String(node.dangerousDek) : undefined, | ||||
| }; | ||||
| return [absoluteUrl, meta]; | ||||
| }); | ||||
|
|
||||
| return new Map<string, UrlMeta>(entries); | ||||
| } | ||||
Uh oh!
There was an error while loading. Please reload this page.