Skip to content

Commit 9490d0d

Browse files
authored
CBL-8076: Do not retry replicator connections if WebSocket being clos… (#2479)
The Abnormal Close occurs after the client is already decided to close the socket but fails to receive response from the server. We don't want to retry replicating on this condition. We fix it by a change in WebSocket, that it now treats a connection closure that is not fully complete only due to a pending final server acknowledgement (i.e. CLOSE frame was already sent).
1 parent 0cc98eb commit 9490d0d

2 files changed

Lines changed: 82 additions & 1 deletion

File tree

Networking/WebSockets/WebSocketImpl.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,8 @@ namespace litecore::websocket {
534534

535535
if ( clean ) {
536536
status.reason = kWebSocketClose;
537-
if ( !expected ) status.code = kCodeAbnormal;
537+
// if (!expected) then _closeSent => !_closeReceived
538+
if ( !expected ) status.code = _closeSent ? kCodeNormal : kCodeAbnormal;
538539
else if ( !_closeMessage )
539540
status.code = kCodeNormal;
540541
else {

Replicator/tests/ReplicatorAPITest.cc

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,86 @@ TEST_CASE_METHOD(ReplicatorAPITest, "Stop after transient connect failure", "[C]
816816
waitForStatus(kC4Stopped);
817817
}
818818

819+
// CBL-8074
820+
TEST_CASE_METHOD(ReplicatorAPITest, "WebSocket Peer Going Away", "[C][Push][Pull]") {
821+
bool afterClose = false;
822+
C4SocketFactory factory = {};
823+
C4Socket* c4socket = nullptr;
824+
factory.context = &c4socket;
825+
factory.open = [](C4Socket* socket, const C4Address* addr, C4Slice options, void* context) {
826+
c4socket_opened(socket);
827+
*(C4Socket**)context = socket;
828+
};
829+
830+
factory.close = [](C4Socket* socket) {
831+
// Not invoked
832+
REQUIRE(false);
833+
};
834+
835+
// "peer going away" before CLOSE is sent
836+
// Replicator receives error code 1006, which is transient.
837+
// C4Replicator goes to offline and waiting for retry.
838+
SECTION("CLOSE Not Sent") {
839+
afterClose = false;
840+
_mayGoOffline = true;
841+
factory.write = [](C4Socket* socket, C4SliceResult msg) {
842+
// Simulate Peer-Going-Away before Replicator calling Stop.
843+
// Socket is closed unexpectedly, without the client sending CLOSE
844+
FLSliceResult_Release(msg);
845+
c4socket_closed(socket, {WebSocketDomain, websocket::kCodeGoingAway});
846+
};
847+
}
848+
849+
// "peer going away" after CLOSE frame was already sent
850+
// Since the replicator is already stopped when the peer goes away, WebSocket will
851+
// treat it as Normal Close.
852+
SECTION("CLOSE Has Been Sent") {
853+
afterClose = true;
854+
_mayGoOffline = false;
855+
factory.write = [](C4Socket* socket, C4SliceResult msg) {
856+
// Do nothing
857+
FLSliceResult_Release(msg);
858+
};
859+
}
860+
861+
_socketFactory = &factory;
862+
C4Error err;
863+
importJSONLines(sFixturesDir + "names_100.json");
864+
865+
if ( !afterClose ) {
866+
// WebSocket code 1006, transient error
867+
REQUIRE(startReplicator(kC4Disabled, kC4OneShot, WITH_ERROR(&err)));
868+
_numCallbacksWithLevel[kC4Offline] = 0;
869+
waitForStatus(kC4Offline);
870+
} else {
871+
REQUIRE(startReplicator(kC4Disabled, kC4Continuous, WITH_ERROR(&err)));
872+
// Making sure the WebSocket is open/connected
873+
waitForStatus(kC4Busy);
874+
}
875+
876+
c4repl_stop(_repl);
877+
878+
if ( afterClose ) {
879+
// Give some time for Replicator::_stop to be called, but before timeout in WebSocketImpl
880+
// to not get Timeout error.
881+
std::this_thread::sleep_for(1s);
882+
// WebSocket will treat it as Normal Close
883+
c4socket_closed(c4socket, {WebSocketDomain, websocket::kCodeGoingAway});
884+
}
885+
886+
waitForStatus(kC4Stopped);
887+
888+
auto status = c4repl_getStatus(_repl);
889+
890+
if ( !afterClose ) {
891+
// kCodeAbnormal == 1006
892+
CHECK((status.error.domain == WebSocketDomain && status.error.code == websocket::kCodeAbnormal));
893+
} else {
894+
// "peer going away" after stop results in normal Stop.
895+
CHECK(status.error.code == 0);
896+
}
897+
}
898+
819899
TEST_CASE_METHOD(ReplicatorAPITest, "Calling c4socket_ method after STOP", "[C][Push][Pull]") {
820900
// c.f. the flow with test case "Stop after transient connect failure"
821901
_mayGoOffline = true;

0 commit comments

Comments
 (0)