Skip to content

Commit 05f38fd

Browse files
authored
Merge pull request #829 from jnunemaker/memory
feat: stream multipart file uploads to reduce memory usage
2 parents 091bd6a + 8901c23 commit 05f38fd

File tree

6 files changed

+469
-1
lines changed

6 files changed

+469
-1
lines changed

lib/httparty/request.rb

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,14 @@ def setup_raw_request
248248
elsif options[:body].respond_to?(:to_hash) && !@raw_request['Content-Type']
249249
@raw_request['Content-Type'] = 'application/x-www-form-urlencoded'
250250
end
251-
@raw_request.body = body.call
251+
252+
if body.streaming? && options[:stream_body] != false
253+
stream = body.to_stream
254+
@raw_request.body_stream = stream
255+
@raw_request['Content-Length'] = stream.size.to_s
256+
else
257+
@raw_request.body = body.call
258+
end
252259
end
253260

254261
@raw_request.instance_variable_set(:@decode_content, decompress_content?)

lib/httparty/request/body.rb

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# frozen_string_literal: true
22

33
require_relative 'multipart_boundary'
4+
require_relative 'streaming_multipart_body'
45

56
module HTTParty
67
class Request
@@ -30,6 +31,22 @@ def multipart?
3031
params.respond_to?(:to_hash) && (force_multipart || has_file?(params))
3132
end
3233

34+
def streaming?
35+
multipart? && has_file?(params)
36+
end
37+
38+
def to_stream
39+
return nil unless streaming?
40+
StreamingMultipartBody.new(prepared_parts, boundary)
41+
end
42+
43+
def prepared_parts
44+
normalized_params = params.flat_map { |key, value| HashConversions.normalize_keys(key, value) }
45+
normalized_params.map do |key, value|
46+
[key, value, file?(value)]
47+
end
48+
end
49+
3350
private
3451

3552
# https://html.spec.whatwg.org/#multipart-form-data
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
# frozen_string_literal: true
2+
3+
module HTTParty
4+
class Request
5+
class StreamingMultipartBody
6+
NEWLINE = "\r\n"
7+
CHUNK_SIZE = 64 * 1024 # 64 KB chunks
8+
9+
def initialize(parts, boundary)
10+
@parts = parts
11+
@boundary = boundary
12+
@part_index = 0
13+
@state = :header
14+
@current_file = nil
15+
@header_buffer = nil
16+
@header_offset = 0
17+
@footer_sent = false
18+
end
19+
20+
def size
21+
@size ||= calculate_size
22+
end
23+
24+
def read(length = nil, outbuf = nil)
25+
outbuf = outbuf ? outbuf.replace(''.b) : ''.b
26+
27+
return read_all(outbuf) if length.nil?
28+
29+
while outbuf.bytesize < length
30+
chunk = read_chunk(length - outbuf.bytesize)
31+
break if chunk.nil?
32+
outbuf << chunk
33+
end
34+
35+
outbuf.empty? ? nil : outbuf
36+
end
37+
38+
def rewind
39+
@part_index = 0
40+
@state = :header
41+
@current_file = nil
42+
@header_buffer = nil
43+
@header_offset = 0
44+
@footer_sent = false
45+
@parts.each do |_key, value, _is_file|
46+
value.rewind if value.respond_to?(:rewind)
47+
end
48+
end
49+
50+
private
51+
52+
def read_all(outbuf)
53+
while (chunk = read_chunk(CHUNK_SIZE))
54+
outbuf << chunk
55+
end
56+
outbuf.empty? ? nil : outbuf
57+
end
58+
59+
def read_chunk(max_length)
60+
loop do
61+
return nil if @part_index >= @parts.size && @footer_sent
62+
63+
if @part_index >= @parts.size
64+
@footer_sent = true
65+
return "--#{@boundary}--#{NEWLINE}".b
66+
end
67+
68+
key, value, is_file = @parts[@part_index]
69+
70+
case @state
71+
when :header
72+
chunk = read_header_chunk(key, value, is_file, max_length)
73+
return chunk if chunk
74+
75+
when :body
76+
chunk = read_body_chunk(value, is_file, max_length)
77+
return chunk if chunk
78+
79+
when :newline
80+
@state = :header
81+
@part_index += 1
82+
return NEWLINE.b
83+
end
84+
end
85+
end
86+
87+
def read_header_chunk(key, value, is_file, max_length)
88+
if @header_buffer.nil?
89+
@header_buffer = build_part_header(key, value, is_file)
90+
@header_offset = 0
91+
end
92+
93+
remaining = @header_buffer.bytesize - @header_offset
94+
if remaining > 0
95+
chunk_size = [remaining, max_length].min
96+
chunk = @header_buffer.byteslice(@header_offset, chunk_size)
97+
@header_offset += chunk_size
98+
return chunk
99+
end
100+
101+
@header_buffer = nil
102+
@header_offset = 0
103+
@state = :body
104+
nil
105+
end
106+
107+
def read_body_chunk(value, is_file, max_length)
108+
if is_file
109+
chunk = read_file_chunk(value, max_length)
110+
if chunk
111+
return chunk
112+
else
113+
@current_file = nil
114+
@state = :newline
115+
return nil
116+
end
117+
else
118+
@state = :newline
119+
return value.to_s.b
120+
end
121+
end
122+
123+
def read_file_chunk(file, max_length)
124+
chunk_size = [max_length, CHUNK_SIZE].min
125+
chunk = file.read(chunk_size)
126+
return nil if chunk.nil?
127+
chunk.force_encoding(Encoding::BINARY) if chunk.respond_to?(:force_encoding)
128+
chunk
129+
end
130+
131+
def build_part_header(key, value, is_file)
132+
header = "--#{@boundary}#{NEWLINE}".b
133+
header << %(Content-Disposition: form-data; name="#{key}").b
134+
if is_file
135+
header << %(; filename="#{file_name(value).gsub(/["\r\n]/, replacement_table)}").b
136+
header << NEWLINE.b
137+
header << "Content-Type: #{content_type(value)}#{NEWLINE}".b
138+
end
139+
header << NEWLINE.b
140+
header
141+
end
142+
143+
def calculate_size
144+
total = 0
145+
@parts.each do |key, value, is_file|
146+
total += build_part_header(key, value, is_file).bytesize
147+
total += content_size(value, is_file)
148+
total += NEWLINE.bytesize
149+
end
150+
total += "--#{@boundary}--#{NEWLINE}".bytesize
151+
total
152+
end
153+
154+
def content_size(value, is_file)
155+
if is_file
156+
if value.respond_to?(:size)
157+
value.size
158+
elsif value.respond_to?(:stat)
159+
value.stat.size
160+
else
161+
value.read.bytesize.tap { value.rewind }
162+
end
163+
else
164+
value.to_s.b.bytesize
165+
end
166+
end
167+
168+
def content_type(object)
169+
return object.content_type if object.respond_to?(:content_type)
170+
require 'mini_mime'
171+
mime = MiniMime.lookup_by_filename(object.path)
172+
mime ? mime.content_type : 'application/octet-stream'
173+
end
174+
175+
def file_name(object)
176+
object.respond_to?(:original_filename) ? object.original_filename : File.basename(object.path)
177+
end
178+
179+
def replacement_table
180+
@replacement_table ||= {
181+
'"' => '%22',
182+
"\r" => '%0D',
183+
"\n" => '%0A'
184+
}.freeze
185+
end
186+
end
187+
end
188+
end

spec/httparty/request/body_spec.rb

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,4 +226,70 @@ def to_ary
226226
end
227227
end
228228
end
229+
230+
describe '#streaming?' do
231+
let(:file) { File.open('spec/fixtures/tiny.gif') }
232+
233+
after { file.close }
234+
235+
context 'when params contains a file' do
236+
let(:params) { { avatar: file } }
237+
subject { described_class.new(params) }
238+
239+
it { expect(subject.streaming?).to be true }
240+
end
241+
242+
context 'when force_multipart but no file' do
243+
let(:params) { { name: 'John' } }
244+
subject { described_class.new(params, force_multipart: true) }
245+
246+
it { expect(subject.streaming?).to be false }
247+
end
248+
249+
context 'when params is a string' do
250+
let(:params) { 'name=John' }
251+
subject { described_class.new(params) }
252+
253+
it { expect(subject.streaming?).to be false }
254+
end
255+
end
256+
257+
describe '#to_stream' do
258+
let(:file) { File.open('spec/fixtures/tiny.gif', 'rb') }
259+
260+
after { file.close }
261+
262+
context 'when streaming is possible' do
263+
let(:params) { { avatar: file } }
264+
subject { described_class.new(params) }
265+
266+
it 'returns a StreamingMultipartBody' do
267+
expect(subject.to_stream).to be_a(HTTParty::Request::StreamingMultipartBody)
268+
end
269+
270+
it 'produces equivalent content to call' do
271+
allow(HTTParty::Request::MultipartBoundary).to receive(:generate).and_return('test-boundary')
272+
273+
stream = subject.to_stream
274+
file.rewind
275+
streamed_content = stream.read
276+
277+
file.rewind
278+
body = described_class.new(params)
279+
allow(HTTParty::Request::MultipartBoundary).to receive(:generate).and_return('test-boundary')
280+
regular_content = body.call
281+
282+
expect(streamed_content).to eq(regular_content)
283+
end
284+
end
285+
286+
context 'when streaming is not possible' do
287+
let(:params) { { name: 'John' } }
288+
subject { described_class.new(params, force_multipart: true) }
289+
290+
it 'returns nil' do
291+
expect(subject.to_stream).to be_nil
292+
end
293+
end
294+
end
229295
end

0 commit comments

Comments
 (0)