Skip to content

Commit f991aca

Browse files
committed
Fix deadlock in refresh debouncer stop
Deadlock could happen when closing the refresh debouncer (i.e. when closing the session). This would result in either a panic or the goroutine calling session.Close() to hang. patch by João Reis; reviewed by James Hartig, Kevin Kyyro for CASSGO-41
1 parent d32a392 commit f991aca

File tree

3 files changed

+35
-28
lines changed

3 files changed

+35
-28
lines changed

CHANGELOG.md

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,65 +5,43 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8-
## [Unreleased]
8+
## [2.0.0] - Unreleased
99

1010
### Added
1111

12-
- Support vector type [CASSGO-11](https://issues.apache.org/jira/browse/CASSGO-11)
13-
14-
- Allow SERIAL and LOCAL_SERIAL on SELECT statements [CASSGO-26](https://issues.apache.org/jira/browse/CASSGO-26)
15-
12+
- Support vector type (CASSGO-11)
13+
- Allow SERIAL and LOCAL_SERIAL on SELECT statements (CASSGO-26)
1614
- Support of sending queries to the specific node with Query.SetHostID() (CASSGO-4)
17-
18-
- Support for Native Protocol 5. Following protocol changes exposed new API
19-
Query.SetKeyspace(), Query.WithNowInSeconds(), Batch.SetKeyspace(), Batch.WithNowInSeconds() (CASSGO-1)
15+
- Support for Native Protocol 5 (CASSGO-1)
2016

2117
### Changed
2218

2319
- Move lz4 compressor to lz4 package within the gocql module (CASSGO-32)
24-
2520
- Don't restrict server authenticator unless PasswordAuthentictor.AllowedAuthenticators is provided (CASSGO-19)
26-
2721
- Cleanup of deprecated elements (CASSGO-12)
28-
2922
- Remove global NewBatch function (CASSGO-15)
30-
3123
- Detailed description for NumConns (CASSGO-3)
32-
3324
- Change Batch API to be consistent with Query() (CASSGO-7)
34-
3525
- Added Cassandra 4.0 table options support(CASSGO-13)
36-
3726
- Remove deprecated global logger (CASSGO-24)
38-
3927
- Bumped actions/upload-artifact and actions/cache versions to v4 in CI workflow (CASSGO-48)
40-
4128
- Keep nil slices in MapScan (CASSGO-44)
42-
4329
- Improve error messages for marshalling (CASSGO-38)
44-
4530
- Remove HostPoolHostPolicy from gocql package (CASSGO-21)
46-
4731
- Standardized spelling of datacenter (CASSGO-35)
48-
4932
- Refactor HostInfo creation and ConnectAddress() method (CASSGO-45)
50-
5133
- gocql.Compressor interface changes to follow append-like design. Bumped Go version to 1.19 (CASSGO-1)
52-
5334
- Refactoring hostpool package test and Expose HostInfo creation (CASSGO-59)
5435

5536
### Fixed
5637
- Cassandra version unmarshal fix (CASSGO-49)
57-
5838
- Retry policy now takes into account query idempotency (CASSGO-27)
59-
6039
- Don't return error to caller with RetryType Ignore (CASSGO-28)
6140
- The marshalBigInt return 8 bytes slice in all cases except for big.Int,
6241
which returns a variable length slice, but should be 8 bytes slice as well (CASSGO-2)
63-
6442
- Skip metadata only if the prepared result includes metadata (CASSGO-40)
65-
6643
- Don't panic in MapExecuteBatchCAS if no `[applied]` column is returned (CASSGO-42)
44+
- Fix deadlock in refresh debouncer stop (CASSGO-41)
6745

6846
## [1.7.0] - 2024-09-23
6947

host_source.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -807,6 +807,7 @@ type refreshDebouncer struct {
807807
timer *time.Timer
808808
refreshNowCh chan struct{}
809809
quit chan struct{}
810+
done chan struct{}
810811
refreshFn func() error
811812
}
812813

@@ -816,6 +817,7 @@ func newRefreshDebouncer(interval time.Duration, refreshFn func() error) *refres
816817
broadcaster: nil,
817818
refreshNowCh: make(chan struct{}, 1),
818819
quit: make(chan struct{}),
820+
done: make(chan struct{}),
819821
interval: interval,
820822
timer: time.NewTimer(interval),
821823
refreshFn: refreshFn,
@@ -851,6 +853,7 @@ func (d *refreshDebouncer) refreshNow() <-chan error {
851853
}
852854

853855
func (d *refreshDebouncer) flusher() {
856+
defer close(d.done)
854857
for {
855858
select {
856859
case <-d.refreshNowCh:
@@ -899,8 +902,8 @@ func (d *refreshDebouncer) stop() {
899902
}
900903
d.stopped = true
901904
d.mu.Unlock()
902-
d.quit <- struct{}{} // sync with flusher
903905
close(d.quit)
906+
<-d.done
904907
}
905908

906909
// broadcasts an error to multiple channels (listeners)

host_source_test.go

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,32 @@ func TestRefreshDebouncer_EventsAfterRefreshNow(t *testing.T) {
307307
}
308308
}
309309

310+
// https://github.com/gocql/gocql/issues/1752
311+
func TestRefreshDebouncer_DeadlockOnStop(t *testing.T) {
312+
// there's no way to guarantee this bug manifests because it depends on which `case` is picked from the `select`
313+
// with 4 iterations of this test the deadlock would be hit pretty consistently
314+
const iterations = 4
315+
for i := 0; i < iterations; i++ {
316+
refreshCalledCh := make(chan int, 5)
317+
refreshDuration := 500 * time.Millisecond
318+
fn := func() error {
319+
refreshCalledCh <- 0
320+
time.Sleep(refreshDuration)
321+
return nil
322+
}
323+
d := newRefreshDebouncer(50*time.Millisecond, fn)
324+
timeBeforeRefresh := time.Now()
325+
_ = d.refreshNow()
326+
<-refreshCalledCh
327+
d.debounce()
328+
d.stop()
329+
timeAfterRefresh := time.Now()
330+
if timeAfterRefresh.Sub(timeBeforeRefresh) < refreshDuration {
331+
t.Errorf("refresh debouncer stop() didn't wait until flusher stopped")
332+
}
333+
}
334+
}
335+
310336
func TestErrorBroadcaster_MultipleListeners(t *testing.T) {
311337
b := newErrorBroadcaster()
312338
defer b.stop()

0 commit comments

Comments
 (0)