-
Notifications
You must be signed in to change notification settings - Fork 132
pkg/docker: Add event-based Docker container info caching #1990
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
florianl
wants to merge
21
commits into
open-telemetry:main
Choose a base branch
from
florianl:docker-metadata-cache
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
21 commits
Select commit
Hold shift + click to select a range
398570b
pkg/docker: Add event-based Docker container info caching
florianl d8e6b4b
Merge branch 'main' into docker-metadata-cache
florianl 31d6ae9
use full ID for ContainerMeta.ID
florianl d516805
don't drop comment
florianl 3cbd10b
Apply suggestions from code review
florianl b243ba4
use short ID in metadata
florianl 0ab215b
expose and use InvalidatePID
florianl 5f4e4d5
Apply suggestions from code review
florianl 105df71
Merge branch 'main' into docker-metadata-cache
florianl 99cffdd
merge pidToID into byPID
florianl 2e4dbef
refactor around abbreviationLength
florianl aaad9dd
fix linting
florianl 56db551
Merge branch 'main' into docker-metadata-cache
florianl 5450ba4
introduce type ContainerID
florianl ff8a4c1
fix tests
florianl 6647265
extend byID to byContainerID and include metadata
florianl 61b0c40
Update pkg/docker/docker_api_client.go
florianl d00ddfa
drop custom docker info cache
florianl dd98be7
Merge branch 'main' into docker-metadata-cache
florianl 0f7865a
apply feedback
florianl 600f825
Merge branch 'main' into docker-metadata-cache
florianl File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,11 +5,15 @@ package docker // import "go.opentelemetry.io/obi/pkg/docker" | |
|
|
||
| import ( | ||
| "context" | ||
| "errors" | ||
| "io" | ||
| "log/slog" | ||
| "maps" | ||
| "strings" | ||
| "sync" | ||
| "time" | ||
|
|
||
| "github.com/moby/moby/api/types/events" | ||
| "github.com/moby/moby/client" | ||
|
|
||
| "go.opentelemetry.io/obi/pkg/appolly/app" | ||
|
|
@@ -18,21 +22,42 @@ import ( | |
| "go.opentelemetry.io/obi/pkg/internal/helpers/container" | ||
| ) | ||
|
|
||
| const composeServiceLabelKey = "com.docker.compose.service" | ||
| const ( | ||
| composeServiceLabelKey = "com.docker.compose.service" | ||
| // abbreviationLength defines the length for the short ID form | ||
| abbreviationLength = 12 | ||
| ) | ||
|
|
||
| func cmlog() *slog.Logger { | ||
| return slog.With("component", "docker.ContainerStore") | ||
| } | ||
|
|
||
| var osInfoForPID = container.InfoForPID | ||
|
|
||
| // Full length ID as provided by the docker API | ||
| type ContainerID string | ||
|
|
||
| type ContainerMeta struct { | ||
| // TODO: add other fields https://opentelemetry.io/docs/specs/semconv/resource/container/ | ||
| ID string | ||
| ID string // short form ID limited to abbreviationLength | ||
| FullID ContainerID | ||
| Name string | ||
| ComposeService string | ||
| } | ||
|
|
||
| // containerEntry groups container metadata with the PIDs known to belong to it. | ||
| // This allows a single map lookup to both retrieve metadata and support PID-based invalidation. | ||
| type containerEntry struct { | ||
| meta ContainerMeta | ||
| pids []app.PID | ||
| } | ||
|
|
||
| // dockerClient defines the Docker API methods needed by ContainerStore. | ||
| type dockerClient interface { | ||
| ContainerInspect(ctx context.Context, container string, options client.ContainerInspectOptions) (client.ContainerInspectResult, error) | ||
| Events(ctx context.Context, options client.EventsListOptions) client.EventsResult | ||
| } | ||
|
|
||
| // ContainerStore caches access to the Docker container API. | ||
| // The behavior can be overridden via environment variables: | ||
| // - DOCKER_HOST to set the URL to the docker server. | ||
|
|
@@ -43,14 +68,21 @@ type ContainerMeta struct { | |
| // - DOCKER_TLS_VERIFY to enable or disable TLS verification | ||
| // (off by default). | ||
| type ContainerStore struct { | ||
| initMutex sync.Mutex | ||
| docker client.ContainerAPIClient | ||
| log *slog.Logger | ||
| initMutex sync.Mutex | ||
| docker dockerClient | ||
| log *slog.Logger | ||
| watcherStarted sync.Once | ||
|
|
||
| cacheMu sync.RWMutex | ||
| byPID map[app.PID]ContainerMeta | ||
| byContainerID map[ContainerID]containerEntry // metadata + PIDs keyed by full container ID | ||
| } | ||
|
|
||
| func NewStore() *ContainerStore { | ||
| return &ContainerStore{ | ||
| log: cmlog(), | ||
| log: cmlog(), | ||
| byPID: make(map[app.PID]ContainerMeta), | ||
| byContainerID: make(map[ContainerID]containerEntry), | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -93,11 +125,49 @@ func (s *ContainerStore) initialize(ctx context.Context) { | |
| // ContainerInfo returns the ContainerMeta that is associated to the provided PID. | ||
| // It also returns true if the ContainerMeta was found for the provided PID. False otherwise | ||
| func (s *ContainerStore) ContainerInfo(ctx context.Context, pid app.PID) (ContainerMeta, bool) { | ||
| s.cacheMu.RLock() | ||
| if ci, ok := s.byPID[pid]; ok { | ||
| s.cacheMu.RUnlock() | ||
| return ci, true | ||
| } | ||
| s.cacheMu.RUnlock() | ||
|
|
||
| osCntInfo, err := osInfoForPID(pid) | ||
| if err != nil { | ||
| s.log.Debug("failed to get OS container info for pid", "pid", pid, "error", err) | ||
| return ContainerMeta{}, false | ||
| } | ||
|
|
||
| // Reuse metadata if another PID from the same container is already cached. | ||
| // We acquire the write lock directly to avoid a TOCTOU race: if the container | ||
| // is invalidated between the read check and the write, we must not cache stale metadata. | ||
| fullContainerID := ContainerID(osCntInfo.ContainerID) | ||
| s.cacheMu.Lock() | ||
| if entry, ok := s.byContainerID[fullContainerID]; ok { | ||
| // Re-validate that the PID still belongs to this container while holding the lock. | ||
| currentInfo, err := osInfoForPID(pid) | ||
| if err != nil || ContainerID(currentInfo.ContainerID) != fullContainerID { | ||
| s.cacheMu.Unlock() | ||
| return ContainerMeta{}, false | ||
| } | ||
| meta := entry.meta | ||
| seen := false | ||
| for _, cachedPID := range entry.pids { | ||
| if cachedPID == pid { | ||
| seen = true | ||
| break | ||
| } | ||
| } | ||
| if !seen { | ||
| entry.pids = append(entry.pids, pid) | ||
| s.byContainerID[fullContainerID] = entry | ||
| } | ||
| s.byPID[pid] = meta | ||
| s.cacheMu.Unlock() | ||
| return meta, true | ||
| } | ||
| s.cacheMu.Unlock() | ||
|
|
||
| inspectResult, err := s.docker.ContainerInspect(ctx, osCntInfo.ContainerID, client.ContainerInspectOptions{}) | ||
| if err != nil { | ||
| s.log.Debug("failed to inspect docker container", | ||
|
|
@@ -108,7 +178,6 @@ func (s *ContainerStore) ContainerInfo(ctx context.Context, pid app.PID) (Contai | |
| } | ||
|
|
||
| inspectInfo := inspectResult.Container | ||
| const abbreviationLength = 12 | ||
| containerID := inspectInfo.ID | ||
| if len(containerID) > abbreviationLength { | ||
| containerID = containerID[:abbreviationLength] | ||
|
|
@@ -119,12 +188,45 @@ func (s *ContainerStore) ContainerInfo(ctx context.Context, pid app.PID) (Contai | |
| composeSvcName = inspectInfo.Config.Labels[composeServiceLabelKey] | ||
| } | ||
|
|
||
| return ContainerMeta{ | ||
| meta := ContainerMeta{ | ||
| // some containers start with '/'. Removing it | ||
| Name: strings.Trim(inspectInfo.Name, "/"), | ||
| ID: containerID, | ||
| FullID: ContainerID(inspectInfo.ID), | ||
| ComposeService: composeSvcName, | ||
| }, true | ||
| } | ||
|
|
||
| s.cacheMu.Lock() | ||
| // Re-validate that the PID still belongs to the inspected container: the process | ||
| // may have exited while ContainerInspect was in flight, causing InvalidatePID to | ||
| // be a no-op (byPID entry didn't exist yet), and we would cache stale metadata. | ||
| currentInfo, err := osInfoForPID(pid) | ||
| if err != nil || ContainerID(currentInfo.ContainerID) != meta.FullID { | ||
| s.cacheMu.Unlock() | ||
| return ContainerMeta{}, false | ||
| } | ||
| if entry, ok := s.byContainerID[meta.FullID]; ok { | ||
| meta = entry.meta | ||
| seen := false | ||
| for _, cachedPID := range entry.pids { | ||
| if cachedPID == pid { | ||
| seen = true | ||
| break | ||
| } | ||
| } | ||
| if !seen { | ||
| entry.pids = append(entry.pids, pid) | ||
| } | ||
| s.byPID[pid] = meta | ||
| s.byContainerID[meta.FullID] = entry | ||
| s.cacheMu.Unlock() | ||
| return meta, true | ||
| } | ||
| s.byPID[pid] = meta | ||
| s.byContainerID[meta.FullID] = containerEntry{meta: meta, pids: []app.PID{pid}} | ||
| s.cacheMu.Unlock() | ||
|
florianl marked this conversation as resolved.
florianl marked this conversation as resolved.
|
||
|
|
||
| return meta, true | ||
| } | ||
|
|
||
| func (ci *ContainerMeta) DecorateService(s *svc.Attrs) { | ||
|
|
@@ -167,3 +269,102 @@ func ContainerMetadata[T ~string](dst map[T]string, ci *ContainerMeta, stringer | |
| out[stringer(attr.ContainerID)] = ci.ID | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for data consistency, I'd remove |
||
| return out | ||
| } | ||
|
|
||
| // Start begins the event watcher goroutine to invalidate and remove | ||
| // metadata of destroyed containers. | ||
| func (s *ContainerStore) Start(ctx context.Context) { | ||
| s.watcherStarted.Do(func() { | ||
| s.initMutex.Lock() | ||
| s.initialize(ctx) | ||
| s.initMutex.Unlock() | ||
| go s.watchContainerEvents(ctx) | ||
| }) | ||
| } | ||
|
|
||
| func (s *ContainerStore) watchContainerEvents(ctx context.Context) { | ||
| for { | ||
| s.initMutex.Lock() | ||
| s.initialize(ctx) | ||
| docker := s.docker | ||
| s.initMutex.Unlock() | ||
|
|
||
| if docker == nil { | ||
| select { | ||
| case <-time.After(time.Second): | ||
| case <-ctx.Done(): | ||
| return | ||
| } | ||
| continue | ||
| } | ||
|
|
||
| fltrs := make(client.Filters). | ||
| Add("type", string(events.ContainerEventType)). | ||
| Add("event", string(events.ActionDie), string(events.ActionDestroy)) | ||
|
|
||
| if err := s.eventsLoop(ctx, fltrs); err != nil && !errors.Is(err, context.Canceled) { | ||
| s.log.Debug("docker event stream error", "error", err) | ||
| } | ||
|
|
||
| select { | ||
| case <-time.After(time.Second): | ||
| case <-ctx.Done(): | ||
| return | ||
| } | ||
| } | ||
|
florianl marked this conversation as resolved.
|
||
| } | ||
|
|
||
| func (s *ContainerStore) eventsLoop(ctx context.Context, fltrs client.Filters) error { | ||
| result := s.docker.Events(ctx, client.EventsListOptions{Filters: fltrs}) | ||
| for { | ||
| select { | ||
| case msg, ok := <-result.Messages: | ||
| if !ok { | ||
| return nil | ||
| } | ||
| if msg.Actor.ID != "" { | ||
| s.invalidateContainer(msg.Actor.ID) | ||
| } | ||
| case err, ok := <-result.Err: | ||
| if !ok || errors.Is(err, io.EOF) { | ||
| return nil | ||
| } | ||
| return err | ||
| case <-ctx.Done(): | ||
| return context.Canceled | ||
| } | ||
| } | ||
| } | ||
|
|
||
| func (s *ContainerStore) InvalidatePID(pid app.PID) { | ||
| s.cacheMu.Lock() | ||
| defer s.cacheMu.Unlock() | ||
|
|
||
| meta, ok := s.byPID[pid] | ||
| if !ok { | ||
| return | ||
| } | ||
| delete(s.byPID, pid) | ||
|
|
||
| entry := s.byContainerID[meta.FullID] | ||
| newPIDs := entry.pids[:0] | ||
| for _, cachedPID := range entry.pids { | ||
| if cachedPID != pid { | ||
| newPIDs = append(newPIDs, cachedPID) | ||
| } | ||
| } | ||
|
|
||
| if len(newPIDs) == 0 { | ||
| delete(s.byContainerID, meta.FullID) | ||
| return | ||
| } | ||
| s.byContainerID[meta.FullID] = containerEntry{meta: entry.meta, pids: newPIDs} | ||
| } | ||
|
|
||
| func (s *ContainerStore) invalidateContainer(containerID string) { | ||
| s.cacheMu.Lock() | ||
| defer s.cacheMu.Unlock() | ||
| for _, pid := range s.byContainerID[ContainerID(containerID)].pids { | ||
| delete(s.byPID, pid) | ||
| } | ||
| delete(s.byContainerID, ContainerID(containerID)) | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.