This repository was archived by the owner on Feb 13, 2026. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 125
Expand file tree
/
Copy patharticleReaderRouter.ts
More file actions
106 lines (92 loc) · 2.91 KB
/
articleReaderRouter.ts
File metadata and controls
106 lines (92 loc) · 2.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import { OpenAPIRegistry } from '@asteasolutions/zod-to-openapi';
import { Readability } from '@mozilla/readability';
import * as cheerio from 'cheerio';
import express, { Request, Response, Router } from 'express';
import got from 'got';
import { StatusCodes } from 'http-status-codes';
import { JSDOM } from 'jsdom';
import { apiKeyHeader } from '@/api-docs/openAPIHeaderBuilders';
import { createApiResponse } from '@/api-docs/openAPIResponseBuilders';
import { apiKeyHandler } from '@/common/middleware/apiKeyHandler';
import { ResponseStatus, ServiceResponse } from '@/common/models/serviceResponse';
import { handleServiceResponse } from '@/common/utils/httpHandlers';
import { ArticleReaderSchema } from './articleReaderModel';
export const articleReaderRegistry = new OpenAPIRegistry();
articleReaderRegistry.register('ArticleReader', ArticleReaderSchema);
const removeUnwantedElements = (_cheerio: any) => {
const elementsToRemove = [
'footer',
'header',
'nav',
'script',
'style',
'link',
'meta',
'noscript',
'img',
'picture',
'video',
'audio',
'iframe',
'object',
'embed',
'param',
'track',
'source',
'canvas',
'map',
'area',
'svg',
'math',
];
elementsToRemove.forEach((element) => _cheerio(element).remove());
};
const fetchAndCleanContent = async (url: string) => {
const { body } = await got(url);
const $ = cheerio.load(body);
const title = $('title').text();
removeUnwantedElements($);
const doc = new JSDOM($.text(), {
url: url,
});
const reader = new Readability(doc.window.document);
const article = reader.parse();
return { title, content: article ? article.textContent : '' };
};
export const articleReaderRouter: Router = (() => {
const router = express.Router();
router.use(apiKeyHandler);
articleReaderRegistry.registerPath({
method: 'get',
path: '/content',
tags: ['Article Reader'],
request: {
headers: [apiKeyHeader],
},
responses: createApiResponse(ArticleReaderSchema, 'Success'),
});
router.get('/', async (_req: Request, res: Response) => {
const { url } = _req.query;
if (typeof url !== 'string') {
return new ServiceResponse(ResponseStatus.Failed, 'URL must be a string', null, StatusCodes.BAD_REQUEST);
}
try {
const content = await fetchAndCleanContent(url);
const serviceResponse = new ServiceResponse(
ResponseStatus.Success,
'Service is healthy',
content,
StatusCodes.OK
);
handleServiceResponse(serviceResponse, res);
} catch (error) {
console.error(`Error fetching content ${(error as Error).message}`);
const errorMessage = `Error fetching content $${(error as Error).message}`;
handleServiceResponse(
new ServiceResponse(ResponseStatus.Failed, errorMessage, null, StatusCodes.INTERNAL_SERVER_ERROR),
res
);
}
});
return router;
})();