Skip to content

Commit 1de2a5b

Browse files
authored
sources/sighandler.c: perform grac shutdown for SIGTERM
and and several fixes for tests to live with it Signed-off-by: roman khapov <[email protected]>
1 parent c24389f commit 1de2a5b

File tree

15 files changed

+110
-73
lines changed

15 files changed

+110
-73
lines changed

docker/functional/bin/ody-stop

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,33 +4,37 @@ import os
44
import time
55
import signal
66
import subprocess
7+
import sys
78

89

910
def get_odyssey_pids():
10-
print(subprocess.check_output('ps aux | grep odyssey', shell=True).decode('utf-8'))
11+
print(subprocess.check_output(
12+
'ps aux | grep odyssey', shell=True).decode('utf-8'))
1113

1214
try:
1315
return list(map(int, subprocess.check_output(['pgrep', 'odyssey']).split()))
1416
except subprocess.CalledProcessError:
1517
# non-zero exit code means that there is no odyssey pids
1618
return []
1719

20+
1821
def terminate_gracefully(pid, timeout):
1922
try:
2023
os.kill(pid, signal.SIGTERM)
2124
except ProcessLookupError:
2225
print(f'Process {pid} already finished or doesnt ever existed')
2326
return
2427

25-
print(f'Waiting {timeout} seconds to {pid} finish after SIGTERM...', end='')
28+
print(f'Waiting {timeout} seconds to {
29+
pid} finish after SIGTERM...', end='')
2630

2731
start = time.time()
2832
finished = False
2933

3034
while time.time() - start < timeout:
3135
try:
3236
output = subprocess.check_output(['ps', '-p', str(pid)], )
33-
print(output)
37+
print(output.decode('utf-8'))
3438
if 'odyssey'.encode('utf-8') not in output:
3539
finished = True
3640
break
@@ -50,16 +54,21 @@ def terminate_gracefully(pid, timeout):
5054
def main():
5155
timeout = 5
5256

53-
pids = get_odyssey_pids()
54-
print('Found odyssey pids:', ', '.join(list(map(str, pids))))
57+
if len(sys.argv) > 1:
58+
pids = [int(sys.argv[1])]
59+
else:
60+
pids = get_odyssey_pids()
61+
print('Odyssey pids to stop:', ', '.join(list(map(str, pids))))
5562

5663
for pid in pids:
5764
if not terminate_gracefully(pid, timeout):
5865
print(f'Process {pid} didnt finish within {timeout} seconds')
59-
print(subprocess.check_output(['gdb', '-p', str(pid), '--batch', '-ex', 't a a bt', '-ex', 'source /gdb.py', 'mmcoro all bt']))
66+
print(subprocess.check_output(['gdb', '-p', str(pid), '--batch', '-ex',
67+
't a a bt', '-ex', 'source /gdb.py', '-ex', 'mmcoro all bt']).decode('utf-8'))
6068
exit(1)
6169

6270
exit(0)
6371

72+
6473
if __name__ == "__main__":
6574
main()

docker/functional/tests/cascade/runner.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,12 @@ mkdir -p /tmp/gateway
2626
mkdir -p /tmp/root1
2727
mkdir -p /tmp/root2
2828

29+
/usr/bin/odyssey /tests/cascade/odyssey-gateway.conf
30+
sleep 1
31+
gateway_pid=$(pidof odyssey)
32+
2933
/usr/bin/odyssey /tests/cascade/odyssey-root1.conf
3034
/usr/bin/odyssey /tests/cascade/odyssey-root2.conf
31-
/usr/bin/odyssey /tests/cascade/odyssey-gateway.conf
3235

3336
sleep 1
3437

@@ -80,4 +83,5 @@ exit(0 if diff < threshold else 1)' $root1_client_processed $root2_client_proces
8083

8184
sleep 1
8285

86+
ody-stop $gateway_pid
8387
ody-stop

docker/functional/tests/ody_integration_test/pkg/client-server.go

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,8 @@ func usrReadResultWhilesigusr2Test(
3535
ch := make(chan error, 1)
3636

3737
go func(chan error) {
38-
_, err := t.Queryx("select pg_sleep(100)")
38+
rows, err := t.Queryx("select pg_sleep(10)")
39+
rows.Close()
3940
ch <- err
4041
}(ch)
4142

@@ -47,7 +48,12 @@ func usrReadResultWhilesigusr2Test(
4748
close(ch)
4849

4950
if err != nil || !ok {
50-
return fmt.Errorf("connection closed or reset\n")
51+
return fmt.Errorf("connection closed or reset")
52+
}
53+
54+
err = t.Commit()
55+
if err != nil {
56+
return fmt.Errorf("commit failed: %w", err)
5157
}
5258

5359
return nil
@@ -64,11 +70,13 @@ func select42(ctx context.Context, ch chan error, wg *sync.WaitGroup) {
6470
}
6571
defer db.Close()
6672

67-
if _, err := db.Query("Select 42"); err != nil {
73+
rows, err := db.Query("Select 42")
74+
if err != nil {
6875
ch <- err
6976
fmt.Println(err)
7077
return
7178
}
79+
rows.Close()
7280

7381
fmt.Printf("select 42 OK\n")
7482
}
@@ -133,7 +141,7 @@ func selectSleep(ctx context.Context, i int, ch chan error, wg *sync.WaitGroup,
133141
}
134142

135143
const (
136-
sleepInterval = 10
144+
sleepInterval = 5
137145
maxCoroutineFailOk = 4
138146
)
139147

@@ -162,6 +170,10 @@ func onlineRestartTest(ctx context.Context) error {
162170
return err
163171
}
164172

173+
if err := stopOdyssey(ctx); err != nil {
174+
return err
175+
}
176+
165177
if err := ensureOdysseyRunning(ctx); err != nil {
166178
return err
167179
}
@@ -190,7 +202,9 @@ func onlineRestartTest(ctx context.Context) error {
190202
// to make sure previous select was in old ody
191203
time.Sleep(1 * time.Second)
192204

193-
restartOdyssey(ctx)
205+
if err = restartOdyssey(ctx); err != nil {
206+
return err
207+
}
194208

195209
for i := 0; i < coroutineSleepCnt*2; i++ {
196210
wg.Add(1)
@@ -214,11 +228,14 @@ func onlineRestartTest(ctx context.Context) error {
214228
return err
215229
}
216230
time.Sleep(1 * time.Second)
217-
fmt.Println("Iter complete")
231+
fmt.Printf("Iter %d complete\n", j)
218232
}
219-
if _, err := signalToProc(syscall.SIGINT, "odyssey"); err != nil {
233+
234+
if err = stopOdyssey(ctx); err != nil {
235+
time.Sleep(time.Second * 1000)
220236
return err
221237
}
238+
222239
return nil
223240
}
224241

@@ -259,7 +276,7 @@ func sigusr2Test(
259276
return err
260277
}
261278

262-
if _, err := signalToProc(syscall.SIGINT, "odyssey"); err != nil {
279+
if err := stopOdyssey(ctx); err != nil {
263280
return err
264281
}
265282
return nil
@@ -272,18 +289,21 @@ func odyClientServerInteractionsTestSet(ctx context.Context) error {
272289
fmt.Println(err)
273290
return err
274291
}
292+
logTestDone("usrReadResultWhilesigusr2Test")
275293

276294
if err := onlineRestartTest(ctx); err != nil {
277295
err = fmt.Errorf("online restart error %w", err)
278296
fmt.Println(err)
279297
return err
280298
}
299+
logTestDone("onlineRestartTest")
281300

282301
if err := sigusr2Test(ctx); err != nil {
283302
err = fmt.Errorf("sigusr2 error %w", err)
284303
fmt.Println(err)
285304
return err
286305
}
306+
logTestDone("sigusr2Test")
287307

288308
fmt.Println("odyClientServerInteractionsTestSet: Ok")
289309

docker/functional/tests/ody_integration_test/pkg/cores.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ const benchTimeSec = 10
1818
const timeSleep = 5
1919
const procName = "odyssey"
2020
const signal = syscall.SIGTERM
21-
const testCount = 100
21+
const testCount = 10
2222

2323
func bunchProcess(ctx context.Context, wg *sync.WaitGroup) {
2424
defer wg.Done()
@@ -82,6 +82,7 @@ func odyCoresTestSet(ctx context.Context) error {
8282
fmt.Println(err)
8383
return err
8484
}
85+
logTestDone("SigTermAfterHighLoad")
8586

8687
fmt.Println("odyCoresTestSet: Ok")
8788

docker/functional/tests/ody_integration_test/pkg/main.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ func main() {
1515
odyClientServerInteractionsTestSet,
1616
odyPkgSyncTestSet,
1717
odyShowErrsTestSet,
18-
odySignalsTestSet,
1918
odyCoresTestSet,
2019
} {
2120
err := f(ctx)

docker/functional/tests/ody_integration_test/pkg/pkg-sync.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ func odyPkgSyncTestSet(ctx context.Context) error {
5252
fmt.Println(err)
5353
return err
5454
}
55+
logTestDone("syncPackets")
5556

5657
fmt.Println("odyPkgSyncTestSet: Ok")
5758

docker/functional/tests/ody_integration_test/pkg/showerrs.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ import (
44
"context"
55
"database/sql"
66
"fmt"
7-
"syscall"
87
"time"
98

109
"github.com/jmoiron/sqlx"
@@ -44,7 +43,7 @@ func getErrs(ctx context.Context, db *sqlx.DB) (map[string]int, error) {
4443
func showErrors(ctx context.Context) error {
4544
// restarting odyssey drops show errs view, but we have change to request show errors to old instance
4645
// so we explicitly kill old od
47-
if _, err := signalToProc(syscall.SIGINT, "odyssey"); err != nil {
46+
if err := stopOdyssey(ctx); err != nil {
4847
return err
4948
}
5049

@@ -91,7 +90,7 @@ func showErrors(ctx context.Context) error {
9190
func showErrorsAfterPgRestart(ctx context.Context) error {
9291
// restarting odyssey drops show errs view, but we have change to request show errors to old instance
9392
// so we explicitly kill old od
94-
if _, err := signalToProc(syscall.SIGINT, "odyssey"); err != nil {
93+
if err := stopOdyssey(ctx); err != nil {
9594
return err
9695
}
9796

@@ -132,12 +131,14 @@ func odyShowErrsTestSet(ctx context.Context) error {
132131
fmt.Println(err)
133132
return err
134133
}
134+
logTestDone("showErrors")
135135

136136
if err := showErrorsAfterPgRestart(ctx); err != nil {
137137
err = fmt.Errorf("show errors failed: %w", err)
138138
fmt.Println(err)
139139
return err
140140
}
141+
logTestDone("showErrorsAfterPgRestart")
141142

142143
fmt.Println("odyShowErrsTestSet: Ok")
143144

docker/functional/tests/ody_integration_test/pkg/signals.go

Lines changed: 0 additions & 48 deletions
This file was deleted.

docker/functional/tests/ody_integration_test/pkg/util.go

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717

1818
const pgCtlcluster = "/usr/lib/postgresql/16/bin/pg_ctl"
1919
const restartOdysseyCmd = "/usr/bin/ody-restart"
20+
const stopOdysseyCmd = "/usr/bin/ody-stop"
2021
const startOdysseyCmd = "/usr/bin/ody-start"
2122

2223
func restartPg(ctx context.Context) error {
@@ -63,10 +64,11 @@ func ensureOdysseyRunning(ctx context.Context) error {
6364

6465
func restartOdyssey(ctx context.Context,
6566
) error {
66-
_, err := exec.CommandContext(ctx, restartOdysseyCmd).Output()
67+
output, err := exec.CommandContext(ctx, restartOdysseyCmd).Output()
6768
if err != nil {
6869
err = fmt.Errorf("error due odyssey restarting %w", err)
6970
fmt.Println(err)
71+
fmt.Println(string(output))
7072
return err
7173
}
7274
fmt.Print("command restart odyssey executed\n")
@@ -75,6 +77,24 @@ func restartOdyssey(ctx context.Context,
7577
return nil
7678
}
7779

80+
func stopOdyssey(ctx context.Context) error {
81+
output, err := exec.CommandContext(ctx, stopOdysseyCmd).Output()
82+
if err != nil {
83+
err = fmt.Errorf("error due odyssey stop %w", err)
84+
fmt.Println(err)
85+
fmt.Println(string(output))
86+
return err
87+
}
88+
fmt.Print("command stop odyssey executed\n")
89+
90+
fmt.Print("stop odyssey: OK\n")
91+
return nil
92+
}
93+
94+
func logTestDone(name string) {
95+
fmt.Printf("==== done test %s ====\n", name)
96+
}
97+
7898
func pidNyName(procName string) (int, error) {
7999
d, err := ioutil.ReadFile(fmt.Sprintf("/var/run/%s.pid", procName))
80100
if err != nil {
@@ -91,7 +111,7 @@ func signalToProc(sig syscall.Signal, procName string) (*os.Process, error) {
91111
fmt.Println(err)
92112
return nil, err
93113
}
94-
fmt.Println(fmt.Sprintf("signalToProc: using pid %d", pid))
114+
fmt.Printf("signalToProc: %v to %s using pid %d\n", sig, procName, pid)
95115

96116
p, err := os.FindProcess(pid)
97117
if err != nil {

odyssey.conf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,11 @@ keepalive_usr_timeout 0
312312
#
313313
backend_connect_timeout_ms 20000
314314

315+
#
316+
# Maximum SIGTERM count before hard exit
317+
#
318+
max_sigterms_to_die 3
319+
315320
###
316321
### GLOBAL LIMITS
317322
###

0 commit comments

Comments
 (0)