smartcontractkit · patrickhuie19 · Apr 4, 2025 · Apr 1, 2025 · Apr 1, 2025 · Apr 2, 2025
@@ -12,9 +12,15 @@ const (
 	KeyWorkflowID          = "workflowID"
 	KeyWorkflowExecutionID = "workflowExecutionID"
 	KeyWorkflowName        = "workflowName"
+	KeyWorkflowVersion     = "workflowVersion"
 	KeyWorkflowOwner       = "workflowOwner"
 	KeyStepID              = "stepID"
 	KeyStepRef             = "stepRef"
+	KeyDonID               = "DonID"
+	KeyDonF                = "F"
+	KeyDonN                = "N"
+	KeyDonQ                = "Q"
+	KeyP2PID               = "p2pID"
 )
 
 func LabelKeysSorted() iter.Seq[string] {

@@ -6,11 +6,15 @@ import (
 	"encoding/hex"
 	"errors"
 	"fmt"
+	"strconv"
 	"sync"
 	"time"
 
 	"github.com/jonboulle/clockwork"
+	"google.golang.org/protobuf/proto"
 
+	"github.com/smartcontractkit/chainlink-common/pkg/aggregation"
+	"github.com/smartcontractkit/chainlink-common/pkg/beholder"
 	"github.com/smartcontractkit/chainlink-common/pkg/capabilities"
 	"github.com/smartcontractkit/chainlink-common/pkg/custmsg"
 	"github.com/smartcontractkit/chainlink-common/pkg/metrics"
@@ -20,10 +24,10 @@ import (
 	"github.com/smartcontractkit/chainlink-common/pkg/workflows"
 	"github.com/smartcontractkit/chainlink-common/pkg/workflows/exec"
 	"github.com/smartcontractkit/chainlink-common/pkg/workflows/sdk"
-
 	"github.com/smartcontractkit/chainlink/v2/core/capabilities/transmission"
 	"github.com/smartcontractkit/chainlink/v2/core/logger"
 	"github.com/smartcontractkit/chainlink/v2/core/platform"
+	"github.com/smartcontractkit/chainlink/v2/core/services/workflows/pb"
 	"github.com/smartcontractkit/chainlink/v2/core/services/workflows/ratelimiter"
 	"github.com/smartcontractkit/chainlink/v2/core/services/workflows/store"
 	"github.com/smartcontractkit/chainlink/v2/core/services/workflows/syncerlimiter"
@@ -502,9 +506,14 @@ func generateExecutionID(workflowID, eventID string) (string, error) {
 }
 
 // startExecution kicks off a new workflow execution when a trigger event is received.
-func (e *Engine) startExecution(ctx context.Context, executionID string, event *values.Map) error {
+func (e *Engine) startExecution(ctx context.Context, executionID string, triggerID string, event *values.Map) error {
 	e.meterReports.Add(executionID, NewMeteringReport())
 
+	err := emitExecutionStartedEvent(ctx, e.cma, triggerID)
+	if err != nil {
+		e.logger.Errorf("failed to emit execution started event: %+v", err)
+	}
+
 	lggr := e.logger.With("event", event, platform.KeyWorkflowExecutionID, executionID)
 	lggr.Debug("executing on a trigger event")
 	workflowExecution, err := e.executionsStore.Add(ctx, map[string]*store.WorkflowExecutionStep{
@@ -676,6 +685,10 @@ func (e *Engine) finishExecution(ctx context.Context, cma custmsg.MessageEmitter
 	}
 	logCustMsg(ctx, cma, fmt.Sprintf("execution duration: %d (seconds)", executionDuration), l)
 	l.Infof("execution duration: %d (seconds)", executionDuration)
+	err = emitExecutionFinishedEvent(ctx, cma, status)
+	if err != nil {
+		e.logger.Errorf("failed to emit execution finished event: %+v", err)
+	}
 	e.onExecutionFinished(executionID)
 	return nil
 }
@@ -731,7 +744,7 @@ func (e *Engine) worker(ctx context.Context) {
 			}
 
 			cma := e.cma.With(platform.KeyWorkflowExecutionID, executionID)
-			err = e.startExecution(ctx, executionID, resp.Event.Outputs)
+			err = e.startExecution(ctx, executionID, te.ID, resp.Event.Outputs)
 			if err != nil {
 				e.logger.With(platform.KeyWorkflowExecutionID, executionID).Errorf("failed to start execution: %v", err)
 				logCustMsg(ctx, cma, fmt.Sprintf("failed to start execution: %s", err), e.logger)
@@ -997,13 +1010,32 @@ func (e *Engine) executeStep(ctx context.Context, lggr logger.Logger, msg stepRe
 	defer cancel()
 
 	e.metrics.with(platform.KeyCapabilityID, curStep.ID).incrementCapabilityInvocationCounter(ctx)
-	output, err := curStep.capability.Execute(stepCtx, tr)
+	err = emitCapabilityStartedEvent(ctx, e.cma, curStep.ID, msg.stepRef)
 	if err != nil {
+		e.logger.Errorf("failed to emit capability event: %v", err)
+	}
+	output, capErr := curStep.capability.Execute(stepCtx, tr)
+	status := store.StatusCompleted
+
+	if capErr != nil {
+		status = store.StatusErrored
+		if capabilities.ErrStopExecution.Is(capErr) {
+			status = store.StatusCompletedEarlyExit
+		}
+	}
+
+	defer func() {
+		if err := emitCapabilityFinishedEvent(ctx, e.cma, curStep.ID, msg.stepRef, status); err != nil {
+			e.logger.Errorf("failed to emit capability event: %v", err)
+		}
+	}()
+
+	if capErr != nil {
 		e.metrics.with(platform.KeyStepRef, msg.stepRef, platform.KeyCapabilityID, curStep.ID).incrementCapabilityFailureCounter(ctx)
-		return inputsMap, capabilities.CapabilityResponse{}, err
+		return inputsMap, capabilities.CapabilityResponse{}, capErr
 	}
 
-	return inputsMap, output, err
+	return inputsMap, output, nil
 }
 
 func (e *Engine) deregisterTrigger(ctx context.Context, t *triggerCapability, triggerIdx int) error {
@@ -1355,7 +1387,22 @@ func NewEngine(ctx context.Context, cfg Config) (engine *Engine, err error) {
 		return nil, fmt.Errorf("could not initialize monitoring resources: %w", err)
 	}
 
-	cma := custmsg.NewLabeler().With(platform.KeyWorkflowID, cfg.WorkflowID, platform.KeyWorkflowOwner, cfg.WorkflowOwner, platform.KeyWorkflowName, cfg.WorkflowName.String())
+	nodeState, err := cfg.Registry.LocalNode(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("could not get local node state: %w", err)
+	}
+	cma := custmsg.NewLabeler().With(platform.KeyWorkflowID, cfg.WorkflowID,
+		platform.KeyWorkflowOwner, cfg.WorkflowOwner,
+		platform.KeyWorkflowName, cfg.WorkflowName.String(),
+		platform.KeyDonID, strconv.Itoa(int(nodeState.WorkflowDON.ID)),
+		platform.KeyDonF, strconv.Itoa(int(nodeState.WorkflowDON.F)),
+		platform.KeyDonN, strconv.Itoa(len(nodeState.WorkflowDON.Members)),
+		platform.KeyDonQ, strconv.Itoa(aggregation.ByzantineQuorum(
+			len(nodeState.WorkflowDON.Members),
+			int(nodeState.WorkflowDON.F),
+		)),
+		platform.KeyP2PID, nodeState.PeerID.String(),
+	)
 	workflow, err := Parse(cfg.Workflow)
 	if err != nil {
 		logCustMsg(ctx, cma, fmt.Sprintf("failed to parse workflow: %s", err), cfg.Lggr)
@@ -1440,3 +1487,125 @@ func logCustMsg(ctx context.Context, cma custmsg.MessageEmitter, msg string, log
 		log.Errorf("failed to send custom message with msg: %s, err: %v", msg, err)
 	}
 }
+
+// buildWorkflowMetadata populates a WorkflowMetadata from kvs (map[string]string).
+func buildWorkflowMetadata(kvs map[string]string) *pb.WorkflowMetadata {
+	m := &pb.WorkflowMetadata{}
+
+	m.WorkflowName = kvs[platform.KeyWorkflowName]
+	m.Version = kvs[platform.KeyWorkflowVersion]
+	m.WorkflowID = kvs[platform.KeyWorkflowID]
+	m.WorkflowExecutionID = kvs[platform.KeyWorkflowExecutionID]
+
+	if donIDStr, ok := kvs[platform.KeyDonID]; ok {
+		if id, err := strconv.ParseInt(donIDStr, 10, 32); err == nil {
+			m.DonF = int32(id)
+		}
+	}
+
+	m.P2PID = kvs[platform.KeyP2PID]
+
+	if donFStr, ok := kvs[platform.KeyDonF]; ok {
+		if id, err := strconv.ParseInt(donFStr, 10, 32); err == nil {
+			m.DonF = int32(id)
+		}
+	}
+	if donNStr, ok := kvs[platform.KeyDonN]; ok {
+		if id, err := strconv.ParseInt(donNStr, 10, 32); err == nil {
+			m.DonN = int32(id)
+		}
+	}
+	if donQStr, ok := kvs[platform.KeyDonQ]; ok {
+		if id, err := strconv.ParseInt(donQStr, 10, 32); err == nil {
+			m.DonQ = int32(id)
+		}
+	}
+
+	return m
+}
+
+// emitProtoMessage marshals a proto.Message and emits it via beholder.
+func emitProtoMessage(ctx context.Context, msg proto.Message) error {
+	b, err := proto.Marshal(msg)
+	if err != nil {
+		return err
+	}
+
+	// Determine the schema and entity based on the message type
+	var schema, entity string
+	switch msg.(type) {
+	case *pb.WorkflowExecutionStarted:
+		schema = SchemaWorkflowStarted
+		entity = EventWorkflowExecutionStarted
+	case *pb.WorkflowExecutionFinished:
+		schema = SchemaWorkflowFinished
+		entity = EventWorkflowExecutionFinished
+	case *pb.CapabilityExecutionStarted:
+		schema = SchemaCapabilityStarted
+		entity = EventCapabilityExecutionStarted
+	case *pb.CapabilityExecutionFinished:
+		schema = SchemaCapabilityFinished
+		entity = EventCapabilityExecutionFinished
+	default:
+		return fmt.Errorf("unknown message type: %T", msg)
+	}
+
+	// entity must be prefixed with the proto package name
+	entity = fmt.Sprintf("%s.%s", EventsProtoPkg, entity)
+
+	return beholder.GetEmitter().Emit(ctx, b,
+		"beholder_data_schema", schema, // required
+		"beholder_domain", "platform", // required
+		"beholder_entity", entity) // required
+}
+
+func emitExecutionStartedEvent(ctx context.Context, cma custmsg.MessageEmitter, triggerID string) error {
+	metadata := buildWorkflowMetadata(cma.Labels())
+
+	event := &pb.WorkflowExecutionStarted{
+		M:         metadata,
+		Timestamp: time.Now().String(),
+		TriggerID: triggerID,
+	}
+
+	return emitProtoMessage(ctx, event)
+}
+
+func emitExecutionFinishedEvent(ctx context.Context, cma custmsg.MessageEmitter, status string) error {
+	metadata := buildWorkflowMetadata(cma.Labels())
+
+	event := &pb.WorkflowExecutionFinished{
+		M:         metadata,
+		Timestamp: time.Now().String(),
+		Status:    status,
+	}
+
+	return emitProtoMessage(ctx, event)
+}
+
+func emitCapabilityStartedEvent(ctx context.Context, cma custmsg.MessageEmitter, capabilityID, stepRef string) error {
+	metadata := buildWorkflowMetadata(cma.Labels())
+
+	event := &pb.CapabilityExecutionStarted{
+		M:            metadata,
+		Timestamp:    time.Now().String(),
+		CapabilityID: capabilityID,
+		StepRef:      stepRef,
+	}
+
+	return emitProtoMessage(ctx, event)
+}
+
+func emitCapabilityFinishedEvent(ctx context.Context, cma custmsg.MessageEmitter, capabilityID, stepRef, status string) error {
+	metadata := buildWorkflowMetadata(cma.Labels())
+
+	event := &pb.CapabilityExecutionFinished{
+		M:            metadata,
+		Timestamp:    time.Now().String(),
+		CapabilityID: capabilityID,
+		StepRef:      stepRef,
+		Status:       status,
+	}
+
+	return emitProtoMessage(ctx, event)
+}
@@ -259,7 +259,8 @@ func newTestEngine(t *testing.T, reg *coreCap.Registry, sdkSpec sdk.WorkflowSpec
 			detail := report.Description()
 			bClient := beholder.GetClient()
 			// kvAttrs is what the test client matches on, view pkg/utils/test in common for more detail
-			kvAttrs := []any{"beholder_data_schema", detail.Schema, "beholder_domain", detail.Domain, "beholder_entity", detail.Entity,
+			kvAttrs := []any{"beholder_data_schema", detail.Schema, "beholder_domain", detail.Domain,
+				"beholder_entity", fmt.Sprintf("%s.%s", MeteringProtoPkg, detail.Entity),
 				platform.KeyWorkflowName, name, platform.KeyWorkflowID, ID, platform.KeyWorkflowExecutionID, execID}
 
 			data, mErr := proto.Marshal(report.Message())
@@ -398,7 +399,12 @@ func TestEngineWithHardcodedWorkflow(t *testing.T) {
 	require.NoError(t, err)
 
 	assert.Equal(t, store.StatusCompleted, state.Status)
-	assert.Equal(t, 1, beholderTester.Len(t, "beholder_entity", MeteringReportEntity))
+
+	assert.Equal(t, 1, beholderTester.Len(t, "beholder_entity", fmt.Sprintf("%s.%s", MeteringProtoPkg, MeteringReportEntity)))
+	assert.Equal(t, 1, beholderTester.Len(t, "beholder_entity", fmt.Sprintf("%s.%s", EventsProtoPkg, EventWorkflowExecutionStarted)))
+	assert.Equal(t, 1, beholderTester.Len(t, "beholder_entity", fmt.Sprintf("%s.%s", EventsProtoPkg, EventWorkflowExecutionFinished)))
+	assert.Equal(t, 3, beholderTester.Len(t, "beholder_entity", fmt.Sprintf("%s.%s", EventsProtoPkg, EventCapabilityExecutionStarted)))
+	assert.Equal(t, 3, beholderTester.Len(t, "beholder_entity", fmt.Sprintf("%s.%s", EventsProtoPkg, EventCapabilityExecutionFinished)))
 }
 
 const (
@@ -2274,7 +2280,7 @@ func TestEngine_ConcurrentExecutions(t *testing.T) {
 	eid2 := getExecutionID(t, eng, testHooks)
 
 	assert.Equal(t, store.StatusCompleted, state.Status)
-	assert.Equal(t, 2, beholderTester.Len(t, "beholder_entity", MeteringReportEntity))
+	assert.Equal(t, 2, beholderTester.Len(t, "beholder_entity", fmt.Sprintf("%s.%s", MeteringProtoPkg, MeteringReportEntity)))
 	assert.Equal(t, 1, beholderTester.Len(t, platform.KeyWorkflowExecutionID, eid))
 	assert.Equal(t, 1, beholderTester.Len(t, platform.KeyWorkflowExecutionID, eid2))
 }
@@ -0,0 +1,22 @@
+package workflows
+
+const (
+	EventsProtoPkg = "pb"
+	// EventWorkflowExecutionStarted represents a workflow execution started event
+	EventWorkflowExecutionStarted string = "WorkflowExecutionStarted"
+	// EventWorkflowExecutionFinished represents a workflow execution finished event
+	EventWorkflowExecutionFinished string = "WorkflowExecutionFinished"
+	// EventCapabilityExecutionStarted represents a capability execution started event
+	EventCapabilityExecutionStarted string = "CapabilityExecutionStarted"
+	// EventCapabilityExecutionFinished represents a capability execution finished event
+	EventCapabilityExecutionFinished string = "CapabilityExecutionFinished"
+
+	// SchemaWorkflowStarted represents the schema for workflow started events
+	SchemaWorkflowStarted string = "/cre-events-workflow-started/v1"
+	// SchemaWorkflowFinished represents the schema for workflow finished events
+	SchemaWorkflowFinished string = "/cre-events-workflow-finished/v1"
+	// SchemaCapabilityStarted represents the schema for capability started events
+	SchemaCapabilityStarted string = "/cre-events-capability-started/v1"
+	// SchemaCapabilityFinished represents the schema for capability finished events
+	SchemaCapabilityFinished string = "/cre-events-capability-finished/v1"
+)
@@ -15,6 +15,7 @@ const (
 	MeteringReportSchema string = "github.com/smartcontractkit/chainlink-common/pkg/capabilities/pb/capabilities.proto"
 	MeteringReportDomain string = "platform"
 	MeteringReportEntity string = "MeteringReport"
+	MeteringProtoPkg     string = "pb"
 )
 
 type MeteringReportStepRef string