Skip to content

Commit 065a928

Browse files
authored
RUBY-3658 Ensure partial writes are retried (mongodb#2927)
* HELP-74319 Ensure partial reads are retried * simplify the socket wait logic * Ubuntu 20.04 has been retired on GitHub Actions * reduce the number of tests run on GH focus on modern servers, recent rubies * well, let's try 22.04, then * respect the reported number of bytes written write() is blocking, but apparently may still not write the entire requested block (for various reasons). It is safest to NOT assume it wrote everything, and instead check the return value for the number of bytes actually written. * different errors being raised
1 parent 09db639 commit 065a928

File tree

4 files changed

+92
-63
lines changed

4 files changed

+92
-63
lines changed

.github/workflows/test.yml

Lines changed: 5 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -10,40 +10,15 @@ jobs:
1010
env:
1111
CI: true
1212
TESTOPTS: "-v"
13-
runs-on: ubuntu-20.04
13+
runs-on: ubuntu-22.04
1414
continue-on-error: true
1515
strategy:
1616
fail-fast: false
1717
matrix:
18-
os: [ ubuntu-20.04 ]
19-
ruby: ["2.7", "3.0", "3.1", "3.2", "3.3"]
20-
mongodb: ["4.4", "5.0", "6.0", "7.0", "8.0"]
21-
topology: [replica_set, sharded_cluster]
22-
include:
23-
- os: macos
24-
ruby: "2.7"
25-
mongodb: "7.0"
26-
topology: server
27-
- os: macos
28-
ruby: "3.0"
29-
mongodb: "7.0"
30-
topology: server
31-
- os: ubuntu-latest
32-
ruby: "2.7"
33-
mongodb: "7.0"
34-
topology: server
35-
- os: ubuntu-latest
36-
ruby: "3.1"
37-
mongodb: "7.0"
38-
topology: server
39-
- os: ubuntu-latest
40-
ruby: "3.2"
41-
mongodb: "7.0"
42-
topology: server
43-
- os: ubuntu-latest
44-
ruby: "3.2"
45-
mongodb: "8.0"
46-
topology: replica_set
18+
os: [ ubuntu-22.04 ]
19+
ruby: [ "3.2" ]
20+
mongodb: [ "7.0", "8.0" ]
21+
topology: [ replica_set, sharded_cluster ]
4722
steps:
4823
- name: repo checkout
4924
uses: actions/checkout@v2

lib/mongo/socket.rb

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -491,9 +491,8 @@ def write_without_timeout(*args)
491491
buf = buf.to_s
492492
i = 0
493493
while i < buf.length
494-
chunk = buf[i...i+WRITE_CHUNK_SIZE]
495-
@socket.write(chunk)
496-
i += WRITE_CHUNK_SIZE
494+
chunk = buf[i, WRITE_CHUNK_SIZE]
495+
i += @socket.write(chunk)
497496
end
498497
end
499498
end
@@ -523,32 +522,40 @@ def write_with_timeout(*args, timeout:)
523522

524523
def write_chunk(chunk, timeout)
525524
deadline = Utils.monotonic_time + timeout
525+
526526
written = 0
527-
begin
528-
written += @socket.write_nonblock(chunk[written..-1])
529-
rescue IO::WaitWritable, Errno::EINTR
530-
select_timeout = deadline - Utils.monotonic_time
531-
rv = Kernel.select(nil, [@socket], nil, select_timeout)
532-
if BSON::Environment.jruby?
533-
# Ignore the return value of Kernel.select.
534-
# On JRuby, select appears to return nil prior to timeout expiration
535-
# (apparently due to a EAGAIN) which then causes us to fail the read
536-
# even though we could have retried it.
537-
# Check the deadline ourselves.
538-
if deadline
539-
select_timeout = deadline - Utils.monotonic_time
540-
if select_timeout <= 0
541-
raise_timeout_error!("Took more than #{timeout} seconds to receive data", true)
542-
end
527+
while written < chunk.length
528+
begin
529+
written += @socket.write_nonblock(chunk[written..-1])
530+
rescue IO::WaitWritable, Errno::EINTR
531+
if !wait_for_socket_to_be_writable(deadline)
532+
raise_timeout_error!("Took more than #{timeout} seconds to receive data", true)
543533
end
544-
elsif rv.nil?
545-
raise_timeout_error!("Took more than #{timeout} seconds to receive data (select call timed out)", true)
534+
535+
retry
546536
end
547-
retry
548537
end
538+
549539
written
550540
end
551541

542+
def wait_for_socket_to_be_writable(deadline)
543+
select_timeout = deadline - Utils.monotonic_time
544+
rv = Kernel.select(nil, [@socket], nil, select_timeout)
545+
546+
if BSON::Environment.jruby?
547+
# Ignore the return value of Kernel.select.
548+
# On JRuby, select appears to return nil prior to timeout expiration
549+
# (apparently due to a EAGAIN) which then causes us to fail the read
550+
# even though we could have retried it.
551+
# Check the deadline ourselves.
552+
select_timeout = deadline - Utils.monotonic_time
553+
return select_timeout > 0
554+
end
555+
556+
!rv.nil?
557+
end
558+
552559
def unix_socket?(sock)
553560
defined?(UNIXSocket) && sock.is_a?(UNIXSocket)
554561
end

spec/integration/client_side_encryption/custom_endpoint_spec.rb

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -94,11 +94,7 @@
9494
end
9595

9696
let(:error_regex) do
97-
if BSON::Environment.jruby?
98-
/SocketError/
99-
else
100-
/Connection refused/
101-
end
97+
/Connection refused|SocketError|SocketTimeoutError/
10298
end
10399

104100
it_behaves_like 'raising a KMS error'

spec/mongo/socket_spec.rb

Lines changed: 57 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,6 @@
6868

6969
let(:raw_socket) { socket.instance_variable_get('@socket') }
7070

71-
let(:wait_readable_class) do
72-
Class.new(Exception) do
73-
include IO::WaitReadable
74-
end
75-
end
76-
7771
context 'timeout' do
7872
clean_slate_for_all
7973

@@ -116,4 +110,61 @@
116110
end
117111
end
118112
end
113+
114+
describe '#write' do
115+
let(:target_host) do
116+
host = ClusterConfig.instance.primary_address_host
117+
# Take ipv4 address
118+
Socket.getaddrinfo(host, 0).detect { |ai| ai.first == 'AF_INET' }[3]
119+
end
120+
121+
let(:socket) do
122+
Mongo::Socket::TCP.new(target_host, ClusterConfig.instance.primary_address_port, 1, Socket::PF_INET)
123+
end
124+
125+
let(:raw_socket) { socket.instance_variable_get('@socket') }
126+
127+
context 'with timeout' do
128+
let(:timeout) { 5_000 }
129+
130+
context 'data is less than WRITE_CHUNK_SIZE' do
131+
let(:data) { "a" * 1024 }
132+
133+
context 'when a partial write occurs' do
134+
before do
135+
expect(raw_socket)
136+
.to receive(:write_nonblock)
137+
.twice
138+
.and_return(data.length / 2)
139+
end
140+
141+
it 'eventually writes everything' do
142+
expect(socket.write(data, timeout: timeout)).
143+
to be === data.length
144+
end
145+
end
146+
end
147+
148+
context 'data is greater than WRITE_CHUNK_SIZE' do
149+
let(:data) { "a" * (2 * Mongo::Socket::WRITE_CHUNK_SIZE + 256) }
150+
151+
context 'when a partial write occurs' do
152+
before do
153+
expect(raw_socket)
154+
.to receive(:write_nonblock)
155+
.exactly(4).times
156+
.and_return(Mongo::Socket::WRITE_CHUNK_SIZE,
157+
128,
158+
Mongo::Socket::WRITE_CHUNK_SIZE - 128,
159+
256)
160+
end
161+
162+
it 'eventually writes everything' do
163+
expect(socket.write(data, timeout: timeout)).
164+
to be === data.length
165+
end
166+
end
167+
end
168+
end
169+
end
119170
end

0 commit comments

Comments
 (0)