Skip to content

feat: add opentelemetry tracer to record the sync trace #697

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ require (
github.com/google/uuid v1.6.0
github.com/spf13/cobra v1.9.1
github.com/stretchr/testify v1.10.0
go.opentelemetry.io/otel v1.28.0
go.opentelemetry.io/otel/trace v1.28.0
go.uber.org/mock v0.5.0
golang.org/x/sync v0.12.0
google.golang.org/protobuf v1.36.5
Expand Down
3 changes: 2 additions & 1 deletion pkg/engine/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/argoproj/gitops-engine/pkg/sync"
"github.com/argoproj/gitops-engine/pkg/sync/common"
"github.com/argoproj/gitops-engine/pkg/utils/kube"
"github.com/argoproj/gitops-engine/pkg/utils/tracing"
)

const (
Expand Down Expand Up @@ -84,7 +85,7 @@ func (e *gitOpsEngine) Sync(ctx context.Context,
return nil, err
}
opts = append(opts, sync.WithSkipHooks(!diffRes.Modified))
syncCtx, cleanup, err := sync.NewSyncContext(revision, result, e.config, e.config, e.kubectl, namespace, e.cache.GetOpenAPISchema(), opts...)
syncCtx, cleanup, err := sync.NewSyncContext(revision, result, e.config, e.config, e.kubectl, namespace, e.cache.GetOpenAPISchema(), tracing.NopTracer{}, "", "", opts...)
if err != nil {
return nil, err
}
Expand Down
55 changes: 53 additions & 2 deletions pkg/sync/sync_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"sort"
"strconv"
"strings"
"sync"
"time"
Expand Down Expand Up @@ -32,6 +33,7 @@ import (
"github.com/argoproj/gitops-engine/pkg/sync/hook"
resourceutil "github.com/argoproj/gitops-engine/pkg/sync/resource"
kubeutil "github.com/argoproj/gitops-engine/pkg/utils/kube"
"github.com/argoproj/gitops-engine/pkg/utils/tracing"
)

type reconciledResource struct {
Expand Down Expand Up @@ -221,6 +223,8 @@ func NewSyncContext(
kubectl kubeutil.Kubectl,
namespace string,
openAPISchema openapi.Resources,
syncTracer tracing.Tracer,
syncTraceID, syncTraceRootSpanID string,
opts ...SyncOpt,
) (SyncContext, func(), error) {
dynamicIf, err := dynamic.NewForConfig(restConfig)
Expand Down Expand Up @@ -258,6 +262,9 @@ func NewSyncContext(
permissionValidator: func(_ *unstructured.Unstructured, _ *metav1.APIResource) error {
return nil
},
syncTracer: syncTracer,
syncTraceID: syncTraceID,
syncTraceRootSpanID: syncTraceRootSpanID,
}
for _, opt := range opts {
opt(ctx)
Expand Down Expand Up @@ -371,6 +378,11 @@ type syncContext struct {
// lock to protect concurrent updates of the result list
lock sync.Mutex

// tracer for tracing the sync operation
syncTraceID string
syncTraceRootSpanID string
syncTracer tracing.Tracer

// syncNamespace is a function that will determine if the managed
// namespace should be synced
syncNamespace func(*unstructured.Unstructured, *unstructured.Unstructured) (bool, error)
Expand Down Expand Up @@ -1291,6 +1303,8 @@ func (sc *syncContext) runTasks(tasks syncTasks, dryRun bool) runState {
ss.Go(func(state runState) runState {
logCtx := sc.log.WithValues("dryRun", dryRun, "task", t)
logCtx.V(1).Info("Pruning")
span := sc.createSpan("pruneObject", dryRun)
defer span.Finish()
result, message := sc.pruneObject(t.liveObj, sc.prune, dryRun)
if result == common.ResultCodeSyncFailed {
state = failed
Expand All @@ -1299,6 +1313,7 @@ func (sc *syncContext) runTasks(tasks syncTasks, dryRun bool) runState {
if !dryRun || sc.dryRun || result == common.ResultCodeSyncFailed {
sc.setResourceResult(t, result, operationPhases[result], message)
}
sc.setBaggageItemForTasks(&span, t, message, result, operationPhases[result])
return state
})
}
Expand All @@ -1317,20 +1332,29 @@ func (sc *syncContext) runTasks(tasks syncTasks, dryRun bool) runState {
t := task
ss.Go(func(state runState) runState {
sc.log.WithValues("dryRun", dryRun, "task", t).V(1).Info("Deleting")
span := sc.createSpan("hooksDeletion", dryRun)
defer span.Finish()
message := "deleted"
operationPhase := common.OperationRunning
if !dryRun {
err := sc.deleteResource(t)
if err != nil {
// it is possible to get a race condition here, such that the resource does not exist when
// delete is requested, we treat this as a nop
if !apierrors.IsNotFound(err) {
state = failed
sc.setResourceResult(t, "", common.OperationError, fmt.Sprintf("failed to delete resource: %v", err))
message = fmt.Sprintf("failed to delete resource: %v", err)
operationPhase = common.OperationError
sc.setResourceResult(t, "", operationPhase, message)
}
} else {
message = "deleted(dry-run)"
// if there is anything that needs deleting, we are at best now in pending and
// want to return and wait for sync to be invoked again
state = pending
operationPhase = common.OperationSucceeded
}
sc.setBaggageItemForTasks(&span, t, message, "", operationPhase)
}
return state
})
Expand Down Expand Up @@ -1359,6 +1383,29 @@ func (sc *syncContext) runTasks(tasks syncTasks, dryRun bool) runState {
return state
}

func (sc *syncContext) createSpan(operation string, dryrun bool) tracing.Span {
// NOTE: we use context.Background() here because we don't want to inherit any trace context from the parent
var span tracing.Span
ctx := context.Background()
if sc.syncTracer == nil {
span = tracing.NopTracer{}.StartSpan(ctx, operation)
} else {
span = sc.syncTracer.StartSpanFromTraceParent(ctx, operation, sc.syncTraceID, sc.syncTraceRootSpanID)
}
span.SetBaggageItem("dryrun", strconv.FormatBool(dryrun))
return span
}

func (sc *syncContext) setBaggageItemForTasks(span *tracing.Span, t *syncTask, message string, result common.ResultCode, operationPhase common.OperationPhase) {
resourceKey := t.resourceKey()
(*span).SetBaggageItem("resource", resourceKey.String())
(*span).SetBaggageItem("result", string(result))
(*span).SetBaggageItem("operationPhase", string(operationPhase))
(*span).SetBaggageItem("message", message)
(*span).SetBaggageItem("phase", string(t.phase))
(*span).SetBaggageItem("wave", strconv.Itoa(t.wave()))
}

func (sc *syncContext) processCreateTasks(state runState, tasks syncTasks, dryRun bool) runState {
ss := newStateSync(state)
for _, task := range tasks {
Expand All @@ -1370,20 +1417,24 @@ func (sc *syncContext) processCreateTasks(state runState, tasks syncTasks, dryRu
logCtx := sc.log.WithValues("dryRun", dryRun, "task", t)
logCtx.V(1).Info("Applying")
validate := sc.validate && !resourceutil.HasAnnotationOption(t.targetObj, common.AnnotationSyncOptions, common.SyncOptionsDisableValidation)
span := sc.createSpan("applyObject", dryRun)
defer span.Finish()
result, message := sc.applyObject(t, dryRun, validate)
if result == common.ResultCodeSyncFailed {
logCtx.WithValues("message", message).Info("Apply failed")
state = failed
}
var phase common.OperationPhase
if !dryRun || sc.dryRun || result == common.ResultCodeSyncFailed {
phase := operationPhases[result]
phase = operationPhases[result]
// no resources are created in dry-run, so running phase means validation was
// successful and sync operation succeeded
if sc.dryRun && phase == common.OperationRunning {
phase = common.OperationSucceeded
}
sc.setResourceResult(t, result, phase, message)
}
sc.setBaggageItemForTasks(&span, t, message, result, phase)
return state
})
}
Expand Down
14 changes: 7 additions & 7 deletions pkg/utils/kube/ctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ func (k *KubectlCmd) newGVKParser(oapiGetter discovery.OpenAPISchemaInterface) (
}

func (k *KubectlCmd) GetAPIResources(config *rest.Config, preferred bool, resourceFilter ResourceFilter) ([]APIResourceInfo, error) {
span := k.Tracer.StartSpan("GetAPIResources")
span := k.Tracer.StartSpan(context.Background(), "GetAPIResources")
defer span.Finish()
apiResIfs, err := k.filterAPIResources(config, preferred, resourceFilter, func(apiResource *metav1.APIResource) bool {
return isSupportedVerb(apiResource, listVerb) && isSupportedVerb(apiResource, watchVerb)
Expand All @@ -176,7 +176,7 @@ func (k *KubectlCmd) GetAPIResources(config *rest.Config, preferred bool, resour

// GetResource returns resource
func (k *KubectlCmd) GetResource(ctx context.Context, config *rest.Config, gvk schema.GroupVersionKind, name string, namespace string) (*unstructured.Unstructured, error) {
span := k.Tracer.StartSpan("GetResource")
span := k.Tracer.StartSpan(context.Background(), "GetResource")
span.SetBaggageItem("kind", gvk.Kind)
span.SetBaggageItem("name", name)
defer span.Finish()
Expand All @@ -199,7 +199,7 @@ func (k *KubectlCmd) GetResource(ctx context.Context, config *rest.Config, gvk s

// CreateResource creates resource
func (k *KubectlCmd) CreateResource(ctx context.Context, config *rest.Config, gvk schema.GroupVersionKind, name string, namespace string, obj *unstructured.Unstructured, createOptions metav1.CreateOptions, subresources ...string) (*unstructured.Unstructured, error) {
span := k.Tracer.StartSpan("CreateResource")
span := k.Tracer.StartSpan(context.Background(), "CreateResource")
span.SetBaggageItem("kind", gvk.Kind)
span.SetBaggageItem("name", name)
defer span.Finish()
Expand All @@ -222,7 +222,7 @@ func (k *KubectlCmd) CreateResource(ctx context.Context, config *rest.Config, gv

// PatchResource patches resource
func (k *KubectlCmd) PatchResource(ctx context.Context, config *rest.Config, gvk schema.GroupVersionKind, name string, namespace string, patchType types.PatchType, patchBytes []byte, subresources ...string) (*unstructured.Unstructured, error) {
span := k.Tracer.StartSpan("PatchResource")
span := k.Tracer.StartSpan(context.Background(), "PatchResource")
span.SetBaggageItem("kind", gvk.Kind)
span.SetBaggageItem("name", name)
defer span.Finish()
Expand All @@ -245,7 +245,7 @@ func (k *KubectlCmd) PatchResource(ctx context.Context, config *rest.Config, gvk

// DeleteResource deletes resource
func (k *KubectlCmd) DeleteResource(ctx context.Context, config *rest.Config, gvk schema.GroupVersionKind, name string, namespace string, deleteOptions metav1.DeleteOptions) error {
span := k.Tracer.StartSpan("DeleteResource")
span := k.Tracer.StartSpan(context.Background(), "DeleteResource")
span.SetBaggageItem("kind", gvk.Kind)
span.SetBaggageItem("name", name)
defer span.Finish()
Expand Down Expand Up @@ -323,7 +323,7 @@ func ManageServerSideDiffDryRuns(config *rest.Config, openAPISchema openapi.Reso

// ConvertToVersion converts an unstructured object into the specified group/version
func (k *KubectlCmd) ConvertToVersion(obj *unstructured.Unstructured, group string, version string) (*unstructured.Unstructured, error) {
span := k.Tracer.StartSpan("ConvertToVersion")
span := k.Tracer.StartSpan(context.Background(), "ConvertToVersion")
from := obj.GroupVersionKind().GroupVersion()
span.SetBaggageItem("from", from.String())
span.SetBaggageItem("to", schema.GroupVersion{Group: group, Version: version}.String())
Expand All @@ -335,7 +335,7 @@ func (k *KubectlCmd) ConvertToVersion(obj *unstructured.Unstructured, group stri
}

func (k *KubectlCmd) GetServerVersion(config *rest.Config) (string, error) {
span := k.Tracer.StartSpan("GetServerVersion")
span := k.Tracer.StartSpan(context.Background(), "GetServerVersion")
defer span.Finish()
client, err := discovery.NewDiscoveryClientForConfig(config)
if err != nil {
Expand Down
10 changes: 5 additions & 5 deletions pkg/utils/kube/resource_ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ func kubeCmdFactory(kubeconfig, ns string, config *rest.Config) cmdutil.Factory
}

func (k *kubectlResourceOperations) ReplaceResource(ctx context.Context, obj *unstructured.Unstructured, dryRunStrategy cmdutil.DryRunStrategy, force bool) (string, error) {
span := k.tracer.StartSpan("ReplaceResource")
span := k.tracer.StartSpan(context.Background(), "ReplaceResource")
span.SetBaggageItem("kind", obj.GetKind())
span.SetBaggageItem("name", obj.GetName())
defer span.Finish()
Expand All @@ -243,7 +243,7 @@ func (k *kubectlResourceOperations) ReplaceResource(ctx context.Context, obj *un

func (k *kubectlResourceOperations) CreateResource(ctx context.Context, obj *unstructured.Unstructured, dryRunStrategy cmdutil.DryRunStrategy, validate bool) (string, error) {
gvk := obj.GroupVersionKind()
span := k.tracer.StartSpan("CreateResource")
span := k.tracer.StartSpan(context.Background(), "CreateResource")
span.SetBaggageItem("kind", gvk.Kind)
span.SetBaggageItem("name", obj.GetName())
defer span.Finish()
Expand Down Expand Up @@ -273,7 +273,7 @@ func (k *kubectlResourceOperations) CreateResource(ctx context.Context, obj *uns

func (k *kubectlResourceOperations) UpdateResource(ctx context.Context, obj *unstructured.Unstructured, dryRunStrategy cmdutil.DryRunStrategy) (*unstructured.Unstructured, error) {
gvk := obj.GroupVersionKind()
span := k.tracer.StartSpan("UpdateResource")
span := k.tracer.StartSpan(context.Background(), "UpdateResource")
span.SetBaggageItem("kind", gvk.Kind)
span.SetBaggageItem("name", obj.GetName())
defer span.Finish()
Expand Down Expand Up @@ -302,7 +302,7 @@ func (k *kubectlResourceOperations) UpdateResource(ctx context.Context, obj *uns

// ApplyResource performs an apply of a unstructured resource
func (k *kubectlServerSideDiffDryRunApplier) ApplyResource(_ context.Context, obj *unstructured.Unstructured, dryRunStrategy cmdutil.DryRunStrategy, force bool, validate bool, serverSideApply bool, manager string) (string, error) {
span := k.tracer.StartSpan("ApplyResource")
span := k.tracer.StartSpan(context.Background(), "ApplyResource")
span.SetBaggageItem("kind", obj.GetKind())
span.SetBaggageItem("name", obj.GetName())
defer span.Finish()
Expand All @@ -328,7 +328,7 @@ func (k *kubectlServerSideDiffDryRunApplier) ApplyResource(_ context.Context, ob

// ApplyResource performs an apply of a unstructured resource
func (k *kubectlResourceOperations) ApplyResource(ctx context.Context, obj *unstructured.Unstructured, dryRunStrategy cmdutil.DryRunStrategy, force, validate, serverSideApply bool, manager string) (string, error) {
span := k.tracer.StartSpan("ApplyResource")
span := k.tracer.StartSpan(context.Background(), "ApplyResource")
span.SetBaggageItem("kind", obj.GetKind())
span.SetBaggageItem("name", obj.GetName())
defer span.Finish()
Expand Down
7 changes: 6 additions & 1 deletion pkg/utils/tracing/api.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
package tracing

import "context"

/*
Poor Mans OpenTracing.

Standardizes logging of operation duration.
*/

type Tracer interface {
StartSpan(operationName string) Span
StartSpan(_ context.Context, operationName string) Span
StartSpanFromTraceParent(ctx context.Context, operationName string, parentTraceId, parentSpanId string) Span
}

type Span interface {
SetBaggageItem(key string, value any)
Finish()
SpanID() string
TraceID() string
}
17 changes: 16 additions & 1 deletion pkg/utils/tracing/logging.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package tracing

import (
"context"
"time"

"github.com/go-logr/logr"
Expand All @@ -21,7 +22,7 @@ func NewLoggingTracer(logger logr.Logger) *LoggingTracer {
}
}

func (l LoggingTracer) StartSpan(operationName string) Span {
func (l LoggingTracer) StartSpan(_ context.Context, operationName string) Span {
return loggingSpan{
logger: l.logger,
operationName: operationName,
Expand All @@ -30,6 +31,12 @@ func (l LoggingTracer) StartSpan(operationName string) Span {
}
}

// loggingSpan is not a real distributed tracing system.
// so no need to implement real StartSpanFromTraceParent method.
func (l LoggingTracer) StartSpanFromTraceParent(ctx context.Context, operationName string, _, _ string) Span {
return l.StartSpan(ctx, operationName)
}

type loggingSpan struct {
logger logr.Logger
operationName string
Expand All @@ -54,3 +61,11 @@ func baggageToVals(baggage map[string]any) []any {
}
return result
}

func (s loggingSpan) TraceID() string {
return ""
}

func (s loggingSpan) SpanID() string {
return ""
}
3 changes: 2 additions & 1 deletion pkg/utils/tracing/logging_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package tracing

import (
"context"
"testing"

"github.com/go-logr/logr"
Expand All @@ -22,7 +23,7 @@ func TestLoggingTracer(t *testing.T) {

tr := NewLoggingTracer(logr.New(l))

span := tr.StartSpan("my-operation")
span := tr.StartSpan(context.Background(), "my-operation")
span.SetBaggageItem("my-key", "my-value")
span.Finish()
}
16 changes: 15 additions & 1 deletion pkg/utils/tracing/nop.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
package tracing

import "context"

var (
_ Tracer = NopTracer{}
_ Span = nopSpan{}
)

type NopTracer struct{}

func (n NopTracer) StartSpan(_ string) Span {
func (n NopTracer) StartSpan(_ context.Context, _ string) Span {
return nopSpan{}
}

func (n NopTracer) StartSpanFromTraceParent(_ context.Context, _, _, _ string) Span {
return nopSpan{}
}

Expand All @@ -18,3 +24,11 @@ func (n nopSpan) SetBaggageItem(_ string, _ any) {

func (n nopSpan) Finish() {
}

func (n nopSpan) TraceID() string {
return ""
}

func (n nopSpan) SpanID() string {
return ""
}
Loading
Loading