Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .chloggen/fix-async-error-channel-deadlock.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. receiver/otlp)
component: pkg/otelcol

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Fix shutdown deadlock when async error channel blocks during fatal error reporting

# One or more tracking issues or pull requests related to the change
issues: [14452]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
1 change: 1 addition & 0 deletions .github/workflows/utils/cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@
"extensionz",
"fanout",
"fanoutconsumer",
"fatalonshutdown",
"featureflags",
"featuregate",
"featuregates",
Expand Down
2 changes: 1 addition & 1 deletion otelcol/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ func NewCollector(set CollectorSettings) (*Collector, error) {
// Per signal.Notify documentation, a size of the channel equaled with
// the number of signals getting notified on is recommended.
signalsChannel: make(chan os.Signal, 3),
asyncErrorChannel: make(chan error),
asyncErrorChannel: make(chan error, 1),
configProvider: configProvider,
bc: bc,
updateConfigProviderLogger: cc.SetCore,
Expand Down
64 changes: 64 additions & 0 deletions otelcol/collector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,70 @@ func TestCollectorReportError(t *testing.T) {
assert.Equal(t, StateClosed, col.GetState())
}

func TestCollectorShutdownWithFatalError(t *testing.T) {
factories, err := nopFactories()
require.NoError(t, err)

factory := newFatalOnShutdownExtensionFactory()
factories.Extensions[factory.Type()] = factory

col, err := NewCollector(CollectorSettings{
BuildInfo: component.NewDefaultBuildInfo(),
Factories: func() (Factories, error) { return factories, nil },
ConfigProviderSettings: newDefaultConfigProviderSettings(t, []string{filepath.Join("testdata", "otelcol-fatalerror.yaml")}),
})
require.NoError(t, err)

wg := startCollector(context.Background(), t, col)

assert.Eventually(t, func() bool {
return StateRunning == col.GetState()
}, 2*time.Second, 200*time.Millisecond)

col.Shutdown()

done := make(chan struct{})
go func() {
wg.Wait()
close(done)
}()

select {
case <-done:
assert.Equal(t, StateClosed, col.GetState())
case <-time.After(10 * time.Second):
t.Fatal("collector shutdown deadlocked")
}
}

type fatalOnShutdownExtension struct {
component.StartFunc
host component.Host
}

func (e *fatalOnShutdownExtension) Start(_ context.Context, host component.Host) error {
e.host = host
return nil
}

func (e *fatalOnShutdownExtension) Shutdown(context.Context) error {
componentstatus.ReportStatus(e.host, componentstatus.NewFatalErrorEvent(errors.New("shutdown error")))
return nil
}

func newFatalOnShutdownExtensionFactory() extension.Factory {
return extension.NewFactory(
component.MustNewType("fatalonshutdown"),
func() component.Config {
return &struct{}{}
},
func(context.Context, extension.Settings, component.Config) (extension.Extension, error) {
return &fatalOnShutdownExtension{}, nil
},
component.StabilityLevelStable,
)
}

// NewStatusWatcherExtensionFactory returns a component.ExtensionFactory to construct a status watcher extension.
func NewStatusWatcherExtensionFactory(
onStatusChanged func(source *componentstatus.InstanceID, event *componentstatus.Event),
Expand Down
15 changes: 15 additions & 0 deletions otelcol/testdata/otelcol-fatalerror.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
receivers:
nop:

exporters:
nop:

extensions:
fatalonshutdown:

service:
extensions: [fatalonshutdown]
pipelines:
traces:
receivers: [nop]
exporters: [nop]
Loading