Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions lib/routes/gq/tw/templates/description.art
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{{ if src }}
<figure>
<img src="{{ src }}"{{ if alt }} alt="{{@ alt }}"{{ /if }}>
{{ if alt }}<figcaption>{{@ alt }}</figcaption>{{ /if }}
</figure>
{{ /if }}
{{ if text }}<div>{{@ text }}</div>{{ /if }}
190 changes: 190 additions & 0 deletions lib/routes/gq/tw/tw.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
import { type Data, type DataItem, type Route } from '@/types';
import { art } from '@/utils/render';
import ofetch from '@/utils/ofetch';
import { parseDate } from '@/utils/parse-date';
import logger from '@/utils/logger';

import { load } from 'cheerio';
import path from 'node:path';
import type { Context } from 'hono';
import { JSONPath } from 'jsonpath-plus';

const baseUrl = 'https://www.gq.com.tw';

const categoryTitleMap: Record<string, string> = {
life: 'LIFE',
fashion: 'FASHION',
entertainment: 'ENTERTAINMENT',
gadget: 'GADGET',
bettermen: 'BETTER MEN',
};

const subcategoryTitleMaps: Record<string, Record<string, string>> = {
life: {
food: '美食',
wine: '微醺',
outdoor: '戶外生活',
design: '設計生活',
lifestyleinsider: '五感十築',
gogreen: 'GoGreen',
special: '特別報導',
},
fashion: {
'fashion-news': '新訊',
shopping: '編輯推薦',
guide: '穿搭指南',
special: '特別報導',
},
entertainment: {
movie: '電影',
popculture: '娛樂',
celebrities: '名人',
girl: '美女',
sports: '體育',
special: '特別報導',
// 奧斯卡導向 tag 頁,不作為此路由的子分類
},
gadget: {
'3c': '3C',
auto: '車',
watch: '腕錶',
special: '特別報導',
},
bettermen: {
wellbeing: '保養健身',
relationship: '感情關係',
sex: '性愛',
'one-shot': 'ONE-SHOT',
special: '特別報導',
},
};

export const route: Route = {
path: '/tw/:category/:subcategory?', // category required because https://www.gq.com.tw/feed/rss already exists
categories: ['new-media'],
example: '/gq/tw/life/outdoor',
parameters: {
category: 'Category, e.g., life',
subcategory: 'Subcategory, e.g., outdoor',
},
features: {
requireConfig: false,
requirePuppeteer: false,
antiCrawler: false,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
radar: [
{
source: ['gq.com.tw/:category/:subcategory?'],
target: '/tw/:category/:subcategory?',
},
],
name: 'GQ台灣',
maintainers: ['johan456789'],
handler,
description: 'GQ台灣 最新內容,可選擇類別與子類別',
};

async function handler(ctx: Context): Promise<Data> {
const category = ctx.req.param('category') ?? '';
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary nullish coalescing since

path: '/tw/:category/:subcategory?', // category required because https://www.gq.com.tw/feed/rss already exists
defines category as compulsory. Requests without category will be rejected by hono instead of reaching your handler.

const subcategory = ctx.req.param('subcategory') ?? '';
const limit: number = Number.parseInt(ctx.req.query('limit') ?? '21', 10);

if (!category || !(category in categoryTitleMap)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary check of !category

throw new Error(`Unsupported category: ${category}`);
}

if (subcategory) {
const allowedSubMap = subcategoryTitleMaps[category] ?? {};
if (!(subcategory in allowedSubMap)) {
throw new Error(`Unsupported subcategory: ${subcategory}`);
}
}

const listUrl = `${baseUrl}/${category}${subcategory ? '/' + subcategory : ''}`;
const { items, headTitle } = await parseWebpage(listUrl);
logger.info(`[gq/tw] fetched ${items.length} items from ${listUrl}`);

const categoryTitle = categoryTitleMap[category];
const subcategoryTitle = subcategory ? subcategoryTitleMaps[category][subcategory] : undefined;
const fallbackTitle = subcategory ? `${subcategoryTitle} | GQ Taiwan` : `${categoryTitle} | GQ Taiwan`;
const title = headTitle || fallbackTitle;
return {
title,
link: listUrl,
item: items.slice(0, limit),
};
}
interface PageParseResult {
items: DataItem[];
headTitle?: string;
}

async function parseWebpage(url: string): Promise<PageParseResult> {
const html = await ofetch(url);
const $ = load(html);

const stateObj = extractPreloadedStateObject($);

if (!stateObj || !stateObj.transformed) {
throw new Error(`Failed to extract preloaded state object from ${url}`);
}

const headTitle = String(stateObj.transformed['head.title']);

const nodes = (JSONPath({
path: '$.transformed.bundle.containers[*].items[*]',
json: stateObj,
}) as any[]).filter((node) => node && node.url);

const items: DataItem[] = nodes.map((node: any) => {
Comment on lines +137 to +142
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unnecessary use of JSONPath. You can use flatMap instead.

const rawUrlPath = String(node.url);
const urlPath = rawUrlPath.replaceAll(String.raw`\u002F`, "/");
const link = new URL(urlPath, baseUrl).toString();

const title = String(node.dangerousHed ?? node.hed ?? '').trim();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

node.hed does not exist. I guess you want node.source.hed.

const pubDate = node.pubDate ? parseDate(String(node.pubDate)) : undefined;

const imgSources = node.image?.sources || undefined;
const imgSrc = imgSources?.xxl?.url || imgSources?.lg?.url || imgSources?.sm?.url || undefined;
const textDescription = node.dangerousDek ? String(node.dangerousDek) : undefined;
const description = (Boolean(imgSrc) || Boolean(textDescription))
? art(path.join(__dirname, 'templates/description.art'), { src: imgSrc, alt: title, text: textDescription })
: undefined;

return {
title,
link,
pubDate,
description,
image: imgSrc,
} as DataItem;
});

logger.info(`[gq/tw] parsed ${items.length} items from JSON state ${url}`);
return { items, headTitle };
}

/**
* Extract preloaded state object from HTML
*/
function extractPreloadedStateObject($: ReturnType<typeof load>): any | null {
const stateScriptText = $('script').filter((_, el) => $(el).text().includes('__PRELOADED_STATE__')).text();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use :contains() selector instead.

if (!stateScriptText) {
logger.info('[gq/tw] __PRELOADED_STATE__ script not found');
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do not use the default logging level

return null;
}

const assignIndex = stateScriptText.indexOf('window.__PRELOADED_STATE__');
const braceStart = stateScriptText.indexOf('{', assignIndex);
const braceEnd = stateScriptText.lastIndexOf('}');
if (braceStart === -1 || braceEnd === -1 || braceEnd <= braceStart) {
logger.info('[gq/tw] __PRELOADED_STATE__ json is malformed');
return null;
}

const jsonText = stateScriptText.slice(braceStart, braceEnd + 1);
return JSON.parse(jsonText);
}