-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathhttpsgc.js
More file actions
133 lines (114 loc) · 3.79 KB
/
httpsgc.js
File metadata and controls
133 lines (114 loc) · 3.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
'use strict';
//
// This file might be the biggest WTF that you've read today. And you are right.
// WTF am I doing here. The reason for spawning requests is that I've discovered
// a memory leak in node's https.get method which causes memory to be retained
// after doing large reqeusts. In our case we've got fetch a 15 MB file hourly to
// ensure that we've got the latest and greatest of data. This can lead to huge
// node processes which will eventually die because of ENOMEM.
//
var debug = require('diagnostics')('httpgc')
, spawn = require('child_process').spawn
, hash = require('crypto').createHash
, which = require('which').sync
, path = require('path')
, fs = require('fs')
, curl
, wget;
//
// Attempt to detect which of these are installed on the host system in order to
// download a file.
//
try { curl = which('curl'); }
catch (e) {}
try { wget = which('wget'); }
catch (e) {}
/**
* Request a HTTP resource and return it's contents.
*
* @param {String} url The HTTPS URL that we need to fetch.
* @param {String} using Optionally force which fetch engine we should use.
* @param {Function} fn Completion callback.
* @param {Boolean} disk Store file to disk instead of streaming spawns.
* @api private
*/
function request(url, using, fn, disk) {
if ('function' === typeof using) {
disk = fn;
fn = using;
using = null;
}
if (!using) {
if (curl) using = 'curl';
else if (wget) using = 'wget';
else using = 'node';
}
debug('requesting '+ url +' using '+ using);
request[using + ( disk ? 'd' : '')](url, fn);
}
/**
* Minimal factory for creating spawns from hell. The arguments for command can
* contain a special %url% string which will be replaced with the URL that we're
* about to execute.
*
* @param {String} cmd Name of the command we need to execute.
* @param {Array} arg Arguments for the command.
* @param {Boolean} read Readout the saved file and unlink.
* @api private
*/
request.factory = function factory(cmd, arg, read) {
arg = Array.isArray(arg) ? arg : [arg];
//
// Simple counter to prevent multiple requests to the same URL remove and add
// the same file when in read mode.
//
var index = 0;
return function hellspawn(url, fn) {
var file = path.join(__dirname, hash('md5').update(url).digest('hex')) +'-'+ index++
, chunks = []
, size = 0
, child;
child = spawn(cmd, arg.map(function map(arg) {
return arg
.replace('%url%', url)
.replace('%dir%', file);
}));
child.stdout.on('data', function queue(data) {
size += data.length;
chunks.push(data);
});
child.stdout.on('end', function queue(data) {
if (data) {
size += data.length;
chunks.push(data);
}
});
child.on('exit', function exit(code) {
if (code !== 0) fn(new Error('Received invalid status code from '+ cmd));
else fn(undefined, read ? fs.readFileSync(file, 'utf-8') : Buffer.concat(chunks, size));
//
// Clean up all our things, wrap it in a try catch as it could already
// have been removed.
//
if (read) try { fs.unlinkSync(file); }
catch (e) {}
chunks.length = size = 0;
});
};
};
//
// Regular requests.
//
request.curl = request.factory('curl', ['%url%']);
request.wget = request.factory('wget', ['-qO-', '%url%']);
request.node = request.factory('node', ['-e', 'require("https").get("%url%",function(r){r.pipe(process.stdout)})']);
//
// Write to disk requests.
//
request.curld = request.factory('curl', ['-o', '%dir%', '%url%'], true);
request.wgetd = request.factory('wget', ['%url%', '-O', '%dir%'], true);
request.noded = request.factory('node', ['-e', 'require("https").get("%url%",function(r){r.pipe(require("fs").createWriteStream("%dir%"))})']);
//
// Expose the module.
//
module.exports = request;