Skip to content

Commit 1e43f14

Browse files
committed
Retry on another node if you get appsock exhausted error
Signed-off-by: Salil Chandra <[email protected]>
1 parent 69291be commit 1e43f14

File tree

6 files changed

+216
-0
lines changed

6 files changed

+216
-0
lines changed

Diff for: cdb2api/cdb2api.c

+7
Original file line numberDiff line numberDiff line change
@@ -5023,6 +5023,13 @@ static int cdb2_run_statement_typed_int(cdb2_hndl_tp *hndl, const char *sql,
50235023
goto retry_queries;
50245024
}
50255025

5026+
if (hndl->firstresponse->error_code == CDB2__ERROR_CODE__APPSOCK_LIMIT) {
5027+
newsql_disconnect(hndl, hndl->sb, __LINE__);
5028+
hndl->sb = NULL;
5029+
// retry all shouldn't matter here. Can only happen at beginning of transaction on begin?
5030+
goto retry_queries;
5031+
}
5032+
50265033
if (is_begin) {
50275034
debugprint("setting in_trans to 1\n");
50285035
hndl->in_trans = 1;

Diff for: tests/appsock.test/Makefile

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
ifeq ($(TESTSROOTDIR),)
2+
include ../testcase.mk
3+
else
4+
include $(TESTSROOTDIR)/testcase.mk
5+
endif
6+
ifeq ($(TEST_TIMEOUT),)
7+
export TEST_TIMEOUT=2m
8+
endif

Diff for: tests/appsock.test/README

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
This test tests the appsock exhausted code, which attempts to retry on different nodes various times before erroring.
2+
Previously when receiving an appsock exhausted error, cdb2api would not retry on a different node which could accommodate the connection.
3+
This test only queries one node from the cluster and just checks that a connect error is received now instead of an appsock error.
4+
5+
We cannot test this using CDB2_RANDOM on a cluster and see if there exists a node that hasn't reaced the maximum number of connections since the old code could also get to the maximum number of connections on each node by connecting to a random node each time. Would need to use CDB2_RANDOMROOM and have a cluster with nodes in different rooms.

Diff for: tests/appsock.test/runit

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#!/usr/bin/env bash
2+
bash -n "$0" | exit 1
3+
4+
${TESTSBUILDDIR}/appsock ${DBNAME}

Diff for: tests/tools/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ macro(add_exe exe)
2525
endmacro()
2626

2727
add_exe(api_events api_events.c)
28+
add_exe(appsock appsock.c)
2829
add_exe(blob blob.c)
2930
add_exe(bound bound.cpp)
3031
add_exe(breakloop breakloop.c nemesis.c testutil.c)

Diff for: tests/tools/appsock.c

+191
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
#include <stdint.h>
2+
#include <stdlib.h>
3+
#include <stdio.h>
4+
#include <string.h>
5+
#include <signal.h>
6+
#include <libgen.h>
7+
#include <unistd.h>
8+
#include <errno.h>
9+
10+
#include <cdb2api.h>
11+
12+
void free_conns(cdb2_hndl_tp **dbs, int freq) {
13+
cdb2_hndl_tp *db;
14+
for (int i = 0; i < freq; i++) {
15+
db = dbs[i];
16+
if (!db)
17+
continue;
18+
cdb2_run_statement(db, "rollback"); // may or may not fail
19+
cdb2_close(db);
20+
}
21+
free(dbs);
22+
}
23+
24+
// Just check that when using CDB2_DIRECT_CPU you get max retry error or connect error instead of appsock error
25+
// In a cluster when attempting to connect to all nodes, these errors mean that the api has retried on different nodes
26+
int appsock_test(cdb2_hndl_tp **dbs, int freq, char *dbname, char *host) {
27+
cdb2_hndl_tp **db;
28+
int rc;
29+
int i;
30+
for (i = 0; i < freq; i++) {
31+
db = &dbs[i];
32+
rc = cdb2_open(db, dbname, host, CDB2_DIRECT_CPU);
33+
if (rc != CDB2_OK) {
34+
fprintf(stderr, "%s: Error opening %s %d %s\n", __func__, dbname, rc, cdb2_errstr(*db));
35+
return -1;
36+
}
37+
38+
rc = cdb2_run_statement(*db, "begin");
39+
if (rc == CDB2ERR_CONNECT_ERROR && i == 5) // this is expected on the last try
40+
return 0;
41+
if (rc) {
42+
fprintf(stderr, "%s: Error running begin on %s %d %s\n", __func__, dbname, rc, cdb2_errstr(*db));
43+
return -1;
44+
}
45+
rc = cdb2_run_statement(*db, "select comdb2_host()");
46+
if (rc) {
47+
fprintf(stderr, "%s: Error running query on %s %d %s\n", __func__, dbname, rc, cdb2_errstr(*db));
48+
return -1;
49+
}
50+
rc = cdb2_next_record(*db);
51+
if (rc != CDB2_OK) {
52+
fprintf(stderr, "%s: Error reading query on %s %d %s\n", __func__, dbname, rc, cdb2_errstr(*db));
53+
return -1;
54+
}
55+
}
56+
57+
fprintf(stderr, "%s: Didn't get connect error\n", __func__);
58+
return -1;
59+
}
60+
61+
static int change_appsock_limit(cdb2_hndl_tp *hndl)
62+
{
63+
char query[500];
64+
int value = 5;
65+
sprintf(query, "exec procedure sys.cmd.send('bdb setattr MAXAPPSOCKSLIMIT %d')", value);
66+
int rc = cdb2_run_statement(hndl, query);
67+
if (rc) {
68+
fprintf(stderr, "Error setting MAXAPPSOCKSLIMIT to %d %d %s\n", value, rc, cdb2_errstr(hndl));
69+
return -1;
70+
}
71+
72+
// make sure tunable is on/off
73+
rc = cdb2_run_statement(hndl, "select value from comdb2_tunables where name = 'maxappsockslimit'");
74+
if (rc) {
75+
fprintf(stderr, "Error running query %d %s\n", rc, cdb2_errstr(hndl));
76+
return -1;
77+
}
78+
rc = cdb2_next_record(hndl);
79+
if (rc != CDB2_OK) {
80+
fprintf(stderr, "%s: Expected record %d %s\n", __func__, rc, cdb2_errstr(hndl));
81+
return -1;
82+
}
83+
char *returned = (char *)cdb2_column_value(hndl, 0);
84+
char expected[5];
85+
sprintf(expected, "%d", value);
86+
if (strcmp(returned, expected)) {
87+
fprintf(stderr, "Expected tunable %s, got %s\n", expected, returned);
88+
return -1;
89+
}
90+
rc = cdb2_next_record(hndl);
91+
if (rc != CDB2_OK_DONE) {
92+
fprintf(stderr, "Expected done %d %s\n", rc, cdb2_errstr(hndl));
93+
return -1;
94+
}
95+
96+
return 0;
97+
}
98+
99+
static int tunable_driver(char *dbname, char **rhost)
100+
{
101+
cdb2_hndl_tp *hndl;
102+
int rc = cdb2_open(&hndl, dbname, "default", 0);
103+
if (rc) {
104+
fprintf(stderr, "%s: Error opening %s %d %s\n", __func__, dbname, rc, cdb2_errstr(hndl));
105+
return -1;
106+
}
107+
rc = cdb2_run_statement(hndl, "select host from comdb2_cluster order by is_master limit 1");
108+
if (rc) {
109+
fprintf(stderr, "Error running %s %d %s\n", __func__, rc, cdb2_errstr(hndl));
110+
return -1;
111+
}
112+
cdb2_hndl_tp *hndl2;
113+
char *host;
114+
*rhost = NULL;
115+
while ((rc = cdb2_next_record(hndl)) == CDB2_OK) {
116+
if (*rhost) {
117+
fprintf(stderr, "Did not expect another record\n");
118+
return -1;
119+
}
120+
host = (char *)cdb2_column_value(hndl, 0);
121+
*rhost = strdup(host);
122+
rc = cdb2_open(&hndl2, dbname, host, CDB2_DIRECT_CPU);
123+
if (rc) {
124+
fprintf(stderr, "Error opening %s on host %s %d %s\n", dbname, host, rc, cdb2_errstr(hndl2));
125+
return -1;
126+
}
127+
rc = cdb2_run_statement(hndl2, "select comdb2_host()");
128+
if (rc) {
129+
fprintf(stderr, "can't run select comdb2_host on host %s %d %s\n", host, rc, cdb2_errstr(hndl2));
130+
return -1;
131+
}
132+
rc = cdb2_next_record(hndl2);
133+
if (rc != CDB2_OK) {
134+
fprintf(stderr, "can't read\n");
135+
return -1;
136+
}
137+
char *host2 = (char *)cdb2_column_value(hndl2, 0);
138+
if (strcmp(host, host2) != 0) {
139+
fprintf(stderr, "Should be connected to %s, actually connected to %s\n", host, host2);
140+
return -1;
141+
}
142+
rc = cdb2_next_record(hndl2);
143+
if (rc != CDB2_OK_DONE) {
144+
fprintf(stderr, "can't finish reading\n");
145+
return -1;
146+
}
147+
rc = change_appsock_limit(hndl2);
148+
if (rc)
149+
return -1;
150+
cdb2_close(hndl2);
151+
}
152+
if (!*rhost) {
153+
fprintf(stderr, "Could not find a host\n");
154+
return -1;
155+
}
156+
if (rc != CDB2_OK_DONE) {
157+
fprintf(stderr, "Error reading %s %d %s\n", __func__, rc, cdb2_errstr(hndl));
158+
return -1;
159+
}
160+
rc = cdb2_close(hndl);
161+
if (rc) {
162+
fprintf(stderr, "%s: Error closing %s %d %s\n", __func__, dbname, rc, cdb2_errstr(hndl));
163+
return -1;
164+
}
165+
return 0;
166+
}
167+
168+
int main(int argc, char **argv)
169+
{
170+
signal(SIGPIPE, SIG_IGN);
171+
cdb2_disable_sockpool();
172+
setenv("COMDB2_CONFIG_MAX_LOCAL_CONNECTION_CACHE_ENTRIES", "0", 1);
173+
174+
char *dbname = argv[1];
175+
char *conf = getenv("CDB2_CONFIG");
176+
if (conf) cdb2_set_comdb2db_config(conf);
177+
char *host = NULL;
178+
if (tunable_driver(dbname, &host))
179+
abort();
180+
181+
int freq = 6;
182+
cdb2_hndl_tp **dbs = (cdb2_hndl_tp **)calloc(freq, sizeof(cdb2_hndl_tp *));
183+
if (appsock_test(dbs, freq, dbname, host) != 0) {
184+
free_conns(dbs, freq);
185+
abort();
186+
}
187+
free_conns(dbs, freq);
188+
189+
printf("%s - pass\n", basename(argv[0]));
190+
return 0;
191+
}

0 commit comments

Comments
 (0)