Skip to content

Commit 58d4b65

Browse files
author
jjeejj
committed
生成 极客时间 专栏 文章 pdf
0 parents  commit 58d4b65

File tree

10 files changed

+3740
-0
lines changed

10 files changed

+3740
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
node_modules
2+
.vscode
3+
geektime_*

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
### 极客时间专栏转换为PDF

columnArticleList.js

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// 获取专栏文章列表
2+
const config = require('./config.js');
3+
const superagent = require('superagent');
4+
const utils = require('./utils');
5+
const path = require('path');
6+
const generaterPdf = require('./generaterPdf.js');
7+
8+
(async function getColumnArticleList (firstArticalId){
9+
await utils.createDir('geektime_'+config.columnName);
10+
console.log('专栏文章链接开始获取');
11+
let columnArticleUrlList = [];
12+
let articalId = firstArticalId;
13+
async function getNextColumnArticleUrl (){
14+
try {
15+
let res = await superagent.post(config.url)
16+
.set({
17+
'Content-Type': 'application/json',
18+
'Cookie': config.cookie,
19+
'Referer': config.columnBaseUrl + articalId,
20+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
21+
}).send({
22+
'id': articalId,
23+
'include_neighbors': true
24+
});
25+
console.log(res.body.data.article_title);
26+
let columnArticle = res.body.data;
27+
let articleInfo = {
28+
articleTitle: columnArticle.article_title, // 文章标题
29+
articalUrl: config.columnBaseUrl + articalId, // 文章地址
30+
articleContent: columnArticle.article_content, // 文章内容
31+
articleCover: columnArticle.article_cover, // 文章背景图
32+
authorName: columnArticle.author_name, // 文章作者
33+
articleCtime: utils.formatDate(columnArticle.article_ctime), // 文章创建时间 unix 时间戳 单位为 s
34+
articleNeighbors: columnArticle.neighbors, // 上下篇文章信息
35+
audioDownloadUrl: columnArticle.audio_download_url,
36+
audioTitle: columnArticle.audio_title
37+
};
38+
columnArticleUrlList.push(articleInfo);
39+
//生成PDF
40+
await generaterPdf(articleInfo,
41+
columnArticle.article_title + '.pdf',
42+
path.resolve(__dirname, config.columnName)
43+
);
44+
// 判断是否还有下一篇文章
45+
let neighborRight = columnArticle.neighbors.right;
46+
if (neighborRight && neighborRight.id){
47+
articalId = neighborRight.id;
48+
await utils.sleep(1.5);
49+
await getNextColumnArticleUrl();
50+
};
51+
} catch(err){
52+
console.log(`访问 地址 ${config.columnBaseUrl + articalId} err`, err.message);
53+
};
54+
};
55+
await getNextColumnArticleUrl(firstArticalId);
56+
console.log('专栏文章链接获取完成');
57+
utils.writeToFile(`${config.columnName}`, JSON.stringify(columnArticleUrlList,null,4));
58+
return columnArticleUrlList;
59+
})(config.firstArticalId);

config.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
/**
2+
* 需要转换为 pdf 的配置信息
3+
*/
4+
module.exports = {
5+
url: 'https://time.geekbang.org/serv/v1/article',
6+
columnBaseUrl: 'https://time.geekbang.org/column/article/',
7+
columnName: '玩转VScode', // 专栏名称
8+
firstArticalId: 18053, //专栏第一篇文章的ID
9+
isdownloadVideo: false, // 是否下载音频
10+
isComment: false, // 是否导出评论
11+
cookie: '_ga=GA1.2.452325826.1522412060; OUTFOX_SEARCH_USER_ID_NCOO=960782688.2413434; orderInfo={%22list%22:[{%22count%22:1%2C%22image%22:%22https://static001.geekbang.org/resource/image/cb/c8/cb154cb749aeb59c2cf8c00a17f0b0c8.jpg%22%2C%22name%22:%22TensorFlow%E5%BF%AB%E9%80%9F%E5%85%A5%E9%97%A8%E4%B8%8E%E5%AE%9E%E6%88%98%22%2C%22sku%22:100023001%2C%22price%22:{%22sale%22:9900}}]%2C%22invoice%22:false%2C%22app_id%22:3%2C%22cid%22:153%2C%22isFromTime%22:true%2C%22detail_url%22:%22https://time.geekbang.org/column/detail/153-76554%22}; GCID=935e65d-0de041d-857c0d5-e9e4f37; _gid=GA1.2.1237200660.1554519228; GCESS=BAMEfBWoXAgBAwUEAAAAAAIEfBWoXAkBAQEET1cPAAQEAC8NAAsCBAAKBAAAAAAGBAHVTeoHBKaztAQMAQE-; Hm_lvt_022f847c4e3acd44d4a2481d9187f1e6=1554519252,1554519316,1554544920,1554545178; Hm_lpvt_022f847c4e3acd44d4a2481d9187f1e6=1554545178; _gat=1; SERVERID=1fa1f330efedec1559b3abbcb6e30f50|1554545360|1554545092'
12+
};

generaterPdf.js

Lines changed: 62 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

index.js

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
const puppeteer = require('puppeteer');
2+
const devices = require('puppeteer/DeviceDescriptors');
3+
const iPhone = devices['iPhone 6'];
4+
5+
(async () => {
6+
const browser = await puppeteer.launch({
7+
// headless: false
8+
});
9+
const page = await browser.newPage();
10+
await page.emulate(iPhone);
11+
await page.goto('https://time.geekbang.org/column/article/40261');
12+
await (new Promise((resolve)=>{setTimeout(resolve,5000)}));
13+
await page.pdf({path: 'example.pdf'});
14+
15+
console.log('generater pdf success');
16+
17+
// await browser.close();
18+
})();

0 commit comments

Comments
 (0)