From 7840bdca0ff8a93221f21dc880dc12ddc3347a88 Mon Sep 17 00:00:00 2001 From: Mauricio Zanetti Salomao Date: Wed, 11 Mar 2026 13:10:41 -0300 Subject: [PATCH 01/11] feat(eventing): implement data stream processing for GroupsIO entities - Add event handler for processing DynamoDB change events related to GroupsIO services, subgroups, and members. - Introduce mapping functions to track processed entities and manage idempotency. - Implement message publishing for indexer and access control messages. - Create a data stream consumer to route JetStream KV messages to the appropriate event handlers. - Enhance error handling with transient error detection for retry logic. - Update main application to start the data stream processor based on environment configuration. Jira Ticket: https://linuxfoundation.atlassian.net/browse/LFXV2-1223 Assisted by [Claude Code](https://claude.ai/code) Signed-off-by: Mauricio Zanetti Salomao --- cmd/mailing-list-api/data_stream.go | 150 ++++++++++++++++++ .../eventing/datastream/handler.go | 93 +++++++++++ .../eventing/datastream/mapping.go | 67 ++++++++ .../eventing/datastream/member_handler.go | 107 +++++++++++++ .../eventing/datastream/service_handler.go | 110 +++++++++++++ .../eventing/datastream/subgroup_handler.go | 121 ++++++++++++++ .../eventing/event_processor.go | 130 +++++++++++++++ cmd/mailing-list-api/main.go | 6 + .../domain/port/data_stream_event_handler.go | 21 +++ .../nats/data_stream_consumer.go | 111 +++++++++++++ pkg/constants/storage.go | 18 +++ pkg/errors/transient.go | 20 +++ pkg/mapconv/field_extract.go | 123 ++++++++++++++ pkg/mapconv/field_extract_test.go | 116 ++++++++++++++ 14 files changed, 1193 insertions(+) create mode 100644 cmd/mailing-list-api/data_stream.go create mode 100644 cmd/mailing-list-api/eventing/datastream/handler.go create mode 100644 cmd/mailing-list-api/eventing/datastream/mapping.go create mode 100644 cmd/mailing-list-api/eventing/datastream/member_handler.go create mode 100644 cmd/mailing-list-api/eventing/datastream/service_handler.go create mode 100644 cmd/mailing-list-api/eventing/datastream/subgroup_handler.go create mode 100644 cmd/mailing-list-api/eventing/event_processor.go create mode 100644 internal/domain/port/data_stream_event_handler.go create mode 100644 internal/infrastructure/nats/data_stream_consumer.go create mode 100644 pkg/errors/transient.go create mode 100644 pkg/mapconv/field_extract.go create mode 100644 pkg/mapconv/field_extract_test.go diff --git a/cmd/mailing-list-api/data_stream.go b/cmd/mailing-list-api/data_stream.go new file mode 100644 index 0000000..3421690 --- /dev/null +++ b/cmd/mailing-list-api/data_stream.go @@ -0,0 +1,150 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package main + +import ( + "context" + "fmt" + "log/slog" + "os" + "strconv" + "sync" + "time" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/cmd/mailing-list-api/eventing" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/cmd/mailing-list-api/eventing/datastream" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/cmd/mailing-list-api/service" + infraNATS "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/infrastructure/nats" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" + "github.com/nats-io/nats.go" + "github.com/nats-io/nats.go/jetstream" +) + +// handleDataStream starts the durable JetStream consumer that processes DynamoDB KV +// change events for GroupsIO entities (service, subgroup, member). +// +// Enabled only when MAILING_LIST_EVENTING_ENABLED=true. If disabled, the function +// is a no-op and returns nil. +func handleDataStream(ctx context.Context, wg *sync.WaitGroup) error { + if !dataStreamEnabled() { + slog.InfoContext(ctx, "data stream processor disabled (MAILING_LIST_EVENTING_ENABLED not set to true)") + return nil + } + + cfg := dataStreamConfig() + natsURL := dataStreamNATSURL() + + conn, err := nats.Connect(natsURL, + nats.DrainTimeout(30*time.Second), + nats.ErrorHandler(func(_ *nats.Conn, sub *nats.Subscription, err error) { + attrs := []any{"error", err} + if sub != nil { + attrs = append(attrs, "subject", sub.Subject) + } + slog.With(attrs...).Error("NATS async error (data stream eventing)") + }), + nats.ClosedHandler(func(_ *nats.Conn) { + slog.Warn("NATS data stream eventing connection closed") + }), + ) + if err != nil { + return fmt.Errorf("failed to connect to NATS for data stream eventing: %w", err) + } + + js, err := jetstream.New(conn) + if err != nil { + conn.Close() + return fmt.Errorf("failed to create JetStream context: %w", err) + } + + mappingsKV, err := js.KeyValue(ctx, constants.KVBucketNameV1Mappings) + if err != nil { + conn.Close() + return fmt.Errorf("failed to access %s KV bucket: %w", constants.KVBucketNameV1Mappings, err) + } + + publisher := service.MessagePublisher(ctx) + handler := datastream.NewEventHandler(publisher, mappingsKV) + streamConsumer := infraNATS.NewDataStreamConsumer(handler) + + processor, err := eventing.NewEventProcessor(ctx, cfg, conn, streamConsumer) + if err != nil { + conn.Close() + return fmt.Errorf("failed to create data stream processor: %w", err) + } + + slog.InfoContext(ctx, "data stream processor created", + "consumer_name", cfg.ConsumerName, + "stream_name", cfg.StreamName, + ) + + wg.Add(1) + go func() { + defer wg.Done() + if err := processor.Start(ctx); err != nil { + slog.ErrorContext(ctx, "data stream processor exited with error", "error", err) + } + }() + + wg.Add(1) + go func() { + defer wg.Done() + <-ctx.Done() + stopCtx, cancel := context.WithTimeout(context.Background(), gracefulShutdownSeconds*time.Second) + defer cancel() + if err := processor.Stop(stopCtx); err != nil { + slog.ErrorContext(stopCtx, "error stopping data stream processor", "error", err) + } + }() + + return nil +} + +// dataStreamEnabled reports whether the data stream processor has been opted into. +func dataStreamEnabled() bool { + return os.Getenv("MAILING_LIST_EVENTING_ENABLED") == "true" +} + +// dataStreamNATSURL returns the NATS URL for the data stream connection. +func dataStreamNATSURL() string { + if url := os.Getenv("NATS_URL"); url != "" { + return url + } + return "nats://localhost:4222" +} + +// dataStreamConfig builds eventing.Config from environment variables with +// sensible defaults. +func dataStreamConfig() eventing.Config { + consumerName := os.Getenv("MAILING_LIST_EVENTING_CONSUMER_NAME") + if consumerName == "" { + consumerName = "mailing-list-service-kv-consumer" + } + + maxDeliver := envInt("MAILING_LIST_EVENTING_MAX_DELIVER", 3) + maxAckPending := envInt("MAILING_LIST_EVENTING_MAX_ACK_PENDING", 1000) + ackWaitSecs := envInt("MAILING_LIST_EVENTING_ACK_WAIT_SECS", 30) + + return eventing.Config{ + ConsumerName: consumerName, + StreamName: "KV_" + constants.KVBucketV1Objects, + MaxDeliver: maxDeliver, + AckWait: time.Duration(ackWaitSecs) * time.Second, + MaxAckPending: maxAckPending, + } +} + +// envInt reads an integer environment variable, returning defaultVal if the +// variable is absent or cannot be parsed. +func envInt(key string, defaultVal int) int { + s := os.Getenv(key) + if s == "" { + return defaultVal + } + n, err := strconv.Atoi(s) + if err != nil { + return defaultVal + } + return n +} diff --git a/cmd/mailing-list-api/eventing/datastream/handler.go b/cmd/mailing-list-api/eventing/datastream/handler.go new file mode 100644 index 0000000..aab1934 --- /dev/null +++ b/cmd/mailing-list-api/eventing/datastream/handler.go @@ -0,0 +1,93 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +// Package datastream implements port.DataEventHandler for GroupsIO v1 DynamoDB +// change events sourced from the lfx-v1-sync-helper pipeline. +package datastream + +import ( + "context" + "log/slog" + "strings" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/port" + "github.com/nats-io/nats.go/jetstream" +) + +const ( + // KV key prefixes matching lfx-v1-sync-helper's naming convention. + kvPrefixService = "itx-groupsio-v2-service." + kvPrefixSubgroup = "itx-groupsio-v2-subgroup." + kvPrefixMember = "itx-groupsio-v2-member." + + // sdcDeletedAt is the field injected by lfx-v1-sync-helper on DynamoDB REMOVE events. + sdcDeletedAt = "_sdc_deleted_at" +) + +// eventHandler implements port.DataEventHandler and routes KV events to the +// appropriate per-entity handler based on the key prefix. +type eventHandler struct { + publisher port.MessagePublisher + mappingsKV jetstream.KeyValue +} + +// NewEventHandler constructs a DataEventHandler for GroupsIO entities. +// publisher is used to emit indexer and access control messages. +// mappingsKV is the v1-mappings KV bucket used for idempotency tracking. +func NewEventHandler(publisher port.MessagePublisher, mappingsKV jetstream.KeyValue) port.DataEventHandler { + return &eventHandler{ + publisher: publisher, + mappingsKV: mappingsKV, + } +} + +// HandleChange dispatches a PUT event to the correct entity handler. +// Payloads containing _sdc_deleted_at are treated as soft-deletes. +func (h *eventHandler) HandleChange(ctx context.Context, key string, data map[string]any) bool { + _, isSoftDelete := data[sdcDeletedAt] + + switch { + case strings.HasPrefix(key, kvPrefixService): + uid := key[len(kvPrefixService):] + if isSoftDelete { + return handleServiceDelete(ctx, uid, h.publisher, h.mappingsKV) + } + return handleServiceUpdate(ctx, uid, data, h.publisher, h.mappingsKV) + + case strings.HasPrefix(key, kvPrefixSubgroup): + uid := key[len(kvPrefixSubgroup):] + if isSoftDelete { + return handleSubgroupDelete(ctx, uid, h.publisher, h.mappingsKV) + } + return handleSubgroupUpdate(ctx, uid, data, h.publisher, h.mappingsKV) + + case strings.HasPrefix(key, kvPrefixMember): + uid := key[len(kvPrefixMember):] + if isSoftDelete { + return handleMemberDelete(ctx, uid, h.publisher, h.mappingsKV) + } + return handleMemberUpdate(ctx, uid, data, h.publisher, h.mappingsKV) + + default: + slog.WarnContext(ctx, "unrecognized KV key prefix in HandleChange, ACKing", "key", key) + return false + } +} + +// HandleRemoval dispatches a hard DELETE or PURGE event to the correct entity handler. +func (h *eventHandler) HandleRemoval(ctx context.Context, key string) bool { + switch { + case strings.HasPrefix(key, kvPrefixService): + return handleServiceDelete(ctx, key[len(kvPrefixService):], h.publisher, h.mappingsKV) + + case strings.HasPrefix(key, kvPrefixSubgroup): + return handleSubgroupDelete(ctx, key[len(kvPrefixSubgroup):], h.publisher, h.mappingsKV) + + case strings.HasPrefix(key, kvPrefixMember): + return handleMemberDelete(ctx, key[len(kvPrefixMember):], h.publisher, h.mappingsKV) + + default: + slog.WarnContext(ctx, "unrecognized KV key prefix in HandleRemoval, ACKing", "key", key) + return false + } +} diff --git a/cmd/mailing-list-api/eventing/datastream/mapping.go b/cmd/mailing-list-api/eventing/datastream/mapping.go new file mode 100644 index 0000000..e7375b5 --- /dev/null +++ b/cmd/mailing-list-api/eventing/datastream/mapping.go @@ -0,0 +1,67 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package datastream + +import ( + "context" + "fmt" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" + "github.com/nats-io/nats.go/jetstream" +) + +// buildMappingKey returns the v1-mappings KV key for a given prefix and UID. +func buildMappingKey(prefix, uid string) string { + return fmt.Sprintf("%s.%s", prefix, uid) +} + +// resolveAction queries the v1-mappings KV bucket to decide whether this event +// represents a create or an update. +// +// A missing key or a tombstone ("!del") entry both yield ActionCreated — the +// former because the record has never been seen, the latter because the entity +// was previously deleted and is now being re-created. +func resolveAction(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKey string) model.MessageAction { + entry, err := mappingsKV.Get(ctx, mappingKey) + if err != nil || entry == nil { + return model.ActionCreated + } + if string(entry.Value()) == constants.KVTombstoneMarker { + return model.ActionCreated + } + return model.ActionUpdated +} + +// isMappingPresent reports whether a mapping key exists and is not tombstoned. +// Used for parent dependency checks (service before subgroup, subgroup before member). +func isMappingPresent(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKey string) bool { + entry, err := mappingsKV.Get(ctx, mappingKey) + if err != nil || entry == nil { + return false + } + return string(entry.Value()) != constants.KVTombstoneMarker +} + +// isTombstoned reports whether the v1-mappings entry for mappingKey has already +// been marked as deleted, so that duplicate delete events can be skipped. +func isTombstoned(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKey string) bool { + entry, err := mappingsKV.Get(ctx, mappingKey) + if err != nil || entry == nil { + return false + } + return string(entry.Value()) == constants.KVTombstoneMarker +} + +// putMapping records that uid has been successfully processed so subsequent +// events for the same key are treated as updates rather than creates. +func putMapping(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKey, uid string) { + _, _ = mappingsKV.Put(ctx, mappingKey, []byte(uid)) +} + +// putTombstone writes the deletion marker into v1-mappings to prevent duplicate +// processing of the same delete on consumer redelivery. +func putTombstone(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKey string) { + _, _ = mappingsKV.Put(ctx, mappingKey, []byte(constants.KVTombstoneMarker)) +} diff --git a/cmd/mailing-list-api/eventing/datastream/member_handler.go b/cmd/mailing-list-api/eventing/datastream/member_handler.go new file mode 100644 index 0000000..0594a81 --- /dev/null +++ b/cmd/mailing-list-api/eventing/datastream/member_handler.go @@ -0,0 +1,107 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package datastream + +import ( + "context" + "log/slog" + "time" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/port" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" + pkgerrors "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/errors" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/mapconv" + "github.com/nats-io/nats.go/jetstream" +) + +// handleMemberUpdate transforms the v1 payload into a GrpsIOMember and publishes an +// indexer message. Returns true to NAK when the parent subgroup mapping is absent +// (ordering guarantee) or on transient errors. +// +// No FGA access message is published — member access is inherited from the parent +// mailing list's access record. +func handleMemberUpdate(ctx context.Context, uid string, data map[string]any, publisher port.MessagePublisher, mappingsKV jetstream.KeyValue) bool { + member := transformToGrpsIOMember(uid, data) + + // Parent dependency check: ensure the parent mailing list is already indexed. + subgroupKey := buildMappingKey(constants.KVMappingPrefixSubgroup, member.MailingListUID) + if !isMappingPresent(ctx, mappingsKV, subgroupKey) { + slog.WarnContext(ctx, "parent subgroup not yet processed, NAKing member for retry", + "uid", uid, "mailing_list_uid", member.MailingListUID) + return true // NAK — retry with backoff + } + + mKey := buildMappingKey(constants.KVMappingPrefixMember, uid) + action := resolveAction(ctx, mappingsKV, mKey) + + msg := &model.IndexerMessage{Action: action, Tags: member.Tags()} + built, err := msg.Build(ctx, member) + if err != nil { + slog.ErrorContext(ctx, "failed to build member indexer message", "uid", uid, "error", err) + return false + } + + if err := publisher.Indexer(ctx, constants.IndexGroupsIOMemberSubject, built); err != nil { + slog.ErrorContext(ctx, "failed to publish member indexer message", "uid", uid, "error", err) + return pkgerrors.IsTransient(err) + } + + putMapping(ctx, mappingsKV, mKey, uid) + return false +} + +// handleMemberDelete publishes a delete indexer message and tombstones the mapping. +func handleMemberDelete(ctx context.Context, uid string, publisher port.MessagePublisher, mappingsKV jetstream.KeyValue) bool { + mKey := buildMappingKey(constants.KVMappingPrefixMember, uid) + + if isTombstoned(ctx, mappingsKV, mKey) { + slog.InfoContext(ctx, "member already deleted, ACKing duplicate", "uid", uid) + return false + } + + msg := &model.IndexerMessage{Action: model.ActionDeleted} + built, err := msg.Build(ctx, uid) + if err != nil { + slog.ErrorContext(ctx, "failed to build member delete indexer message", "uid", uid, "error", err) + return false + } + + if err := publisher.Indexer(ctx, constants.IndexGroupsIOMemberSubject, built); err != nil { + slog.ErrorContext(ctx, "failed to publish member delete indexer message", "uid", uid, "error", err) + return pkgerrors.IsTransient(err) + } + + putTombstone(ctx, mappingsKV, mKey) + return false +} + +// transformToGrpsIOMember maps v1 DynamoDB fields to the GrpsIOMember domain model. +func transformToGrpsIOMember(uid string, data map[string]any) *model.GrpsIOMember { + member := &model.GrpsIOMember{ + UID: uid, + MemberID: mapconv.Int64Ptr(data, "member_id"), + GroupID: mapconv.Int64Ptr(data, "group_id"), + MailingListUID: mapconv.StringVal(data, "mailing_list_uid"), + Username: mapconv.StringVal(data, "username"), + FirstName: mapconv.StringVal(data, "first_name"), + LastName: mapconv.StringVal(data, "last_name"), + Email: mapconv.StringVal(data, "email"), + Organization: mapconv.StringVal(data, "organization"), + JobTitle: mapconv.StringVal(data, "job_title"), + MemberType: mapconv.StringVal(data, "member_type"), + DeliveryMode: mapconv.StringVal(data, "delivery_mode"), + ModStatus: mapconv.StringVal(data, "mod_status"), + Status: mapconv.StringVal(data, "status"), + Source: "v1-sync", + } + + if ts := mapconv.StringVal(data, "last_modified_at"); ts != "" { + if t, err := time.Parse(time.RFC3339, ts); err == nil { + member.UpdatedAt = t + } + } + + return member +} diff --git a/cmd/mailing-list-api/eventing/datastream/service_handler.go b/cmd/mailing-list-api/eventing/datastream/service_handler.go new file mode 100644 index 0000000..967cd07 --- /dev/null +++ b/cmd/mailing-list-api/eventing/datastream/service_handler.go @@ -0,0 +1,110 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package datastream + +import ( + "context" + "log/slog" + "time" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/port" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" + pkgerrors "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/errors" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/mapconv" + "github.com/nats-io/nats.go/jetstream" +) + +// handleServiceUpdate transforms the v1 payload into a GrpsIOService and publishes +// indexer + access control messages. Returns true to NAK on transient errors. +func handleServiceUpdate(ctx context.Context, uid string, data map[string]any, publisher port.MessagePublisher, mappingsKV jetstream.KeyValue) bool { + svc := transformToGrpsIOService(uid, data) + mKey := buildMappingKey(constants.KVMappingPrefixService, uid) + action := resolveAction(ctx, mappingsKV, mKey) + + msg := &model.IndexerMessage{Action: action, Tags: svc.Tags()} + built, err := msg.Build(ctx, svc) + if err != nil { + slog.ErrorContext(ctx, "failed to build service indexer message", "uid", uid, "error", err) + return false + } + + if err := publisher.Indexer(ctx, constants.IndexGroupsIOServiceSubject, built); err != nil { + slog.ErrorContext(ctx, "failed to publish service indexer message", "uid", uid, "error", err) + return pkgerrors.IsTransient(err) + } + + accessMsg := &model.AccessMessage{ + UID: uid, + ObjectType: "groupsio_service", + Public: svc.Public, + References: map[string][]string{"project": {svc.ProjectUID}}, + } + if err := publisher.Access(ctx, constants.UpdateAccessGroupsIOServiceSubject, accessMsg); err != nil { + slog.WarnContext(ctx, "failed to publish service access message", "uid", uid, "error", err) + } + + putMapping(ctx, mappingsKV, mKey, uid) + return false +} + +// handleServiceDelete publishes a delete indexer message and tombstones the mapping. +// Returns true to NAK on transient errors. +func handleServiceDelete(ctx context.Context, uid string, publisher port.MessagePublisher, mappingsKV jetstream.KeyValue) bool { + mKey := buildMappingKey(constants.KVMappingPrefixService, uid) + + if isTombstoned(ctx, mappingsKV, mKey) { + slog.InfoContext(ctx, "service already deleted, ACKing duplicate", "uid", uid) + return false + } + + msg := &model.IndexerMessage{Action: model.ActionDeleted} + built, err := msg.Build(ctx, uid) + if err != nil { + slog.ErrorContext(ctx, "failed to build service delete indexer message", "uid", uid, "error", err) + return false + } + + if err := publisher.Indexer(ctx, constants.IndexGroupsIOServiceSubject, built); err != nil { + slog.ErrorContext(ctx, "failed to publish service delete indexer message", "uid", uid, "error", err) + return pkgerrors.IsTransient(err) + } + + accessMsg := &model.AccessMessage{UID: uid, ObjectType: "groupsio_service"} + if err := publisher.Access(ctx, constants.DeleteAllAccessGroupsIOServiceSubject, accessMsg); err != nil { + slog.WarnContext(ctx, "failed to publish service delete access message", "uid", uid, "error", err) + } + + putTombstone(ctx, mappingsKV, mKey) + return false +} + +// transformToGrpsIOService maps v1 DynamoDB fields to the GrpsIOService domain model. +// Source is always "v1-sync" to distinguish these from API-created records. +func transformToGrpsIOService(uid string, data map[string]any) *model.GrpsIOService { + svc := &model.GrpsIOService{ + UID: uid, + Type: mapconv.StringVal(data, "type"), + Domain: mapconv.StringVal(data, "domain"), + GroupID: mapconv.Int64Ptr(data, "group_id"), + Status: mapconv.StringVal(data, "status"), + Source: "v1-sync", + GlobalOwners: mapconv.StringSliceVal(data, "global_owners"), + Prefix: mapconv.StringVal(data, "prefix"), + ParentServiceUID: mapconv.StringVal(data, "parent_service_uid"), + ProjectSlug: mapconv.StringVal(data, "project_slug"), + ProjectUID: mapconv.StringVal(data, "project_uid"), + URL: mapconv.StringVal(data, "url"), + GroupName: mapconv.StringVal(data, "group_name"), + Public: mapconv.BoolVal(data, "public"), + } + + if ts := mapconv.StringVal(data, "last_modified_at"); ts != "" { + if t, err := time.Parse(time.RFC3339, ts); err == nil { + svc.UpdatedAt = t + } + } + + return svc +} diff --git a/cmd/mailing-list-api/eventing/datastream/subgroup_handler.go b/cmd/mailing-list-api/eventing/datastream/subgroup_handler.go new file mode 100644 index 0000000..1df6edd --- /dev/null +++ b/cmd/mailing-list-api/eventing/datastream/subgroup_handler.go @@ -0,0 +1,121 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package datastream + +import ( + "context" + "log/slog" + "time" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/port" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" + pkgerrors "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/errors" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/mapconv" + "github.com/nats-io/nats.go/jetstream" +) + +// handleSubgroupUpdate transforms the v1 payload into a GrpsIOMailingList and publishes +// indexer + access control messages. Returns true to NAK when the parent service mapping +// is absent (ordering guarantee) or on transient errors. +func handleSubgroupUpdate(ctx context.Context, uid string, data map[string]any, publisher port.MessagePublisher, mappingsKV jetstream.KeyValue) bool { + list := transformToGrpsIOMailingList(uid, data) + + // Parent dependency check: the indexer must have the parent service record before + // the child mailing list to avoid orphaned documents in OpenSearch. + serviceKey := buildMappingKey(constants.KVMappingPrefixService, list.ServiceUID) + if !isMappingPresent(ctx, mappingsKV, serviceKey) { + slog.WarnContext(ctx, "parent service not yet processed, NAKing subgroup for retry", + "uid", uid, "service_uid", list.ServiceUID) + return true // NAK — retry with backoff + } + + mKey := buildMappingKey(constants.KVMappingPrefixSubgroup, uid) + action := resolveAction(ctx, mappingsKV, mKey) + + msg := &model.IndexerMessage{Action: action, Tags: list.Tags()} + built, err := msg.Build(ctx, list) + if err != nil { + slog.ErrorContext(ctx, "failed to build subgroup indexer message", "uid", uid, "error", err) + return false + } + + if err := publisher.Indexer(ctx, constants.IndexGroupsIOMailingListSubject, built); err != nil { + slog.ErrorContext(ctx, "failed to publish subgroup indexer message", "uid", uid, "error", err) + return pkgerrors.IsTransient(err) + } + + accessMsg := &model.AccessMessage{ + UID: uid, + ObjectType: "groupsio_mailing_list", + Public: list.Public, + References: map[string][]string{ + "project": {list.ProjectUID}, + "groupsio_service": {list.ServiceUID}, + }, + } + if err := publisher.Access(ctx, constants.UpdateAccessGroupsIOMailingListSubject, accessMsg); err != nil { + slog.WarnContext(ctx, "failed to publish subgroup access message", "uid", uid, "error", err) + } + + putMapping(ctx, mappingsKV, mKey, uid) + return false +} + +// handleSubgroupDelete publishes a delete indexer message and tombstones the mapping. +func handleSubgroupDelete(ctx context.Context, uid string, publisher port.MessagePublisher, mappingsKV jetstream.KeyValue) bool { + mKey := buildMappingKey(constants.KVMappingPrefixSubgroup, uid) + + if isTombstoned(ctx, mappingsKV, mKey) { + slog.InfoContext(ctx, "subgroup already deleted, ACKing duplicate", "uid", uid) + return false + } + + msg := &model.IndexerMessage{Action: model.ActionDeleted} + built, err := msg.Build(ctx, uid) + if err != nil { + slog.ErrorContext(ctx, "failed to build subgroup delete indexer message", "uid", uid, "error", err) + return false + } + + if err := publisher.Indexer(ctx, constants.IndexGroupsIOMailingListSubject, built); err != nil { + slog.ErrorContext(ctx, "failed to publish subgroup delete indexer message", "uid", uid, "error", err) + return pkgerrors.IsTransient(err) + } + + accessMsg := &model.AccessMessage{UID: uid, ObjectType: "groupsio_mailing_list"} + if err := publisher.Access(ctx, constants.DeleteAllAccessGroupsIOMailingListSubject, accessMsg); err != nil { + slog.WarnContext(ctx, "failed to publish subgroup delete access message", "uid", uid, "error", err) + } + + putTombstone(ctx, mappingsKV, mKey) + return false +} + +// transformToGrpsIOMailingList maps v1 DynamoDB fields to the GrpsIOMailingList domain model. +func transformToGrpsIOMailingList(uid string, data map[string]any) *model.GrpsIOMailingList { + list := &model.GrpsIOMailingList{ + UID: uid, + GroupID: mapconv.Int64Ptr(data, "group_id"), + GroupName: mapconv.StringVal(data, "group_name"), + Public: mapconv.BoolVal(data, "public"), + AudienceAccess: mapconv.StringVal(data, "audience_access"), + Type: mapconv.StringVal(data, "type"), + Description: mapconv.StringVal(data, "description"), + Title: mapconv.StringVal(data, "title"), + SubjectTag: mapconv.StringVal(data, "subject_tag"), + ServiceUID: mapconv.StringVal(data, "service_uid"), + ProjectUID: mapconv.StringVal(data, "project_uid"), + SubscriberCount: mapconv.IntVal(data, "subscriber_count"), + Source: "v1-sync", + } + + if ts := mapconv.StringVal(data, "last_modified_at"); ts != "" { + if t, err := time.Parse(time.RFC3339, ts); err == nil { + list.UpdatedAt = t + } + } + + return list +} diff --git a/cmd/mailing-list-api/eventing/event_processor.go b/cmd/mailing-list-api/eventing/event_processor.go new file mode 100644 index 0000000..dbf5c82 --- /dev/null +++ b/cmd/mailing-list-api/eventing/event_processor.go @@ -0,0 +1,130 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package eventing + +import ( + "context" + "fmt" + "log/slog" + "time" + + infraNATS "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/infrastructure/nats" + "github.com/nats-io/nats.go" + "github.com/nats-io/nats.go/jetstream" +) + +// Config holds the configuration for an EventProcessor. +type Config struct { + // ConsumerName is the durable consumer name (survives restarts). + ConsumerName string + // StreamName is the JetStream stream to consume from (e.g. "KV_v1-objects"). + StreamName string + // MaxDeliver is the maximum number of delivery attempts before giving up. + MaxDeliver int + // AckWait is how long the server waits for an ACK before redelivering. + AckWait time.Duration + // MaxAckPending is the maximum number of unacknowledged messages in flight. + MaxAckPending int +} + +// EventProcessor is the interface for JetStream KV bucket event consumers. +// Start blocks until ctx is cancelled; Stop performs a graceful shutdown. +type EventProcessor interface { + Start(ctx context.Context) error + Stop(ctx context.Context) error +} + +// natsEventProcessor is the NATS JetStream implementation of EventProcessor. +// It takes ownership of conn — draining and closing it on Stop. +type natsEventProcessor struct { + natsConn *nats.Conn + jsInstance jetstream.JetStream + consumer jetstream.Consumer + consumeCtx jetstream.ConsumeContext + streamConsumer infraNATS.DataStreamProcessor + config Config +} + +// NewEventProcessor creates an EventProcessor from an existing NATS connection and handler. +// conn ownership is transferred — the processor drains and closes it on Stop. +func NewEventProcessor(ctx context.Context, cfg Config, conn *nats.Conn, streamConsumer infraNATS.DataStreamProcessor) (EventProcessor, error) { + js, err := jetstream.New(conn) + if err != nil { + return nil, fmt.Errorf("failed to create JetStream context: %w", err) + } + + return &natsEventProcessor{ + natsConn: conn, + jsInstance: js, + streamConsumer: streamConsumer, + config: cfg, + }, nil +} + +// Start creates (or resumes) the durable JetStream consumer and processes messages +// until ctx is cancelled. +func (ep *natsEventProcessor) Start(ctx context.Context) error { + slog.InfoContext(ctx, "starting data stream processor", "consumer_name", ep.config.ConsumerName) + + consumer, err := ep.jsInstance.CreateOrUpdateConsumer(ctx, ep.config.StreamName, jetstream.ConsumerConfig{ + Name: ep.config.ConsumerName, + Durable: ep.config.ConsumerName, + // DeliverLastPerSubjectPolicy resumes from the last seen record per KV key after a + // restart, avoiding a full replay while ensuring no in-flight event is dropped. + DeliverPolicy: jetstream.DeliverLastPerSubjectPolicy, + AckPolicy: jetstream.AckExplicitPolicy, + FilterSubjects: []string{ + "$KV.v1-objects.itx-groupsio-v2-service.>", + "$KV.v1-objects.itx-groupsio-v2-subgroup.>", + "$KV.v1-objects.itx-groupsio-v2-member.>", + }, + MaxDeliver: ep.config.MaxDeliver, + AckWait: ep.config.AckWait, + MaxAckPending: ep.config.MaxAckPending, + Description: "Durable KV watcher for mailing-list-service GroupsIO entities", + }) + if err != nil { + return fmt.Errorf("failed to create or update consumer: %w", err) + } + ep.consumer = consumer + + consumeCtx, err := consumer.Consume( + func(msg jetstream.Msg) { + ep.streamConsumer.Process(ctx, msg) + }, + jetstream.ConsumeErrHandler(func(_ jetstream.ConsumeContext, err error) { + slog.With("error", err).Error("data stream KV consumer error") + }), + ) + if err != nil { + return fmt.Errorf("failed to start consuming messages: %w", err) + } + ep.consumeCtx = consumeCtx + + slog.InfoContext(ctx, "data stream processor started successfully") + <-ctx.Done() + slog.InfoContext(ctx, "data stream processor context cancelled") + return nil +} + +// Stop drains and closes the dedicated NATS connection. +func (ep *natsEventProcessor) Stop(ctx context.Context) error { + slog.InfoContext(ctx, "stopping data stream processor") + + if ep.consumeCtx != nil { + ep.consumeCtx.Stop() + slog.InfoContext(ctx, "data stream consumer stopped") + } + + if ep.natsConn != nil { + if err := ep.natsConn.Drain(); err != nil { + slog.ErrorContext(ctx, "error draining data stream NATS connection", "error", err) + } + ep.natsConn.Close() + slog.InfoContext(ctx, "data stream NATS connection closed") + } + + slog.InfoContext(ctx, "data stream processor stopped") + return nil +} diff --git a/cmd/mailing-list-api/main.go b/cmd/mailing-list-api/main.go index 1f3f986..c9a47cb 100644 --- a/cmd/mailing-list-api/main.go +++ b/cmd/mailing-list-api/main.go @@ -153,6 +153,12 @@ func main() { os.Exit(1) } + // Start data stream processor for v1 DynamoDB KV events (optional — enabled via env var) + if err := handleDataStream(ctx, &wg); err != nil { + slog.ErrorContext(ctx, "FATAL: failed to start data stream processor", "error", err) + os.Exit(1) + } + // Wait for signal. slog.InfoContext(ctx, "received shutdown signal, stopping servers", "signal", <-errc, diff --git a/internal/domain/port/data_stream_event_handler.go b/internal/domain/port/data_stream_event_handler.go new file mode 100644 index 0000000..84e122c --- /dev/null +++ b/internal/domain/port/data_stream_event_handler.go @@ -0,0 +1,21 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package port + +import "context" + +// DataEventHandler handles keyed data change events from an external data source +// (e.g. a NATS JetStream KV bucket sourced from DynamoDB via lfx-v1-sync-helper). +// +// HandleChange is called when a record is created, updated, or soft-deleted (the +// caller is responsible for detecting soft-deletes from the payload fields). +// HandleRemoval is called when a record is hard-deleted or purged. +// +// Both methods return true to signal a transient error (retry with exponential +// backoff) or false to acknowledge the event (success, or a permanent error that +// should not be retried). +type DataEventHandler interface { + HandleChange(ctx context.Context, key string, data map[string]any) bool + HandleRemoval(ctx context.Context, key string) bool +} diff --git a/internal/infrastructure/nats/data_stream_consumer.go b/internal/infrastructure/nats/data_stream_consumer.go new file mode 100644 index 0000000..0231666 --- /dev/null +++ b/internal/infrastructure/nats/data_stream_consumer.go @@ -0,0 +1,111 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package nats + +import ( + "context" + "encoding/json" + "log/slog" + "strings" + "time" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/port" + "github.com/nats-io/nats.go/jetstream" +) + +// DataStreamProcessor routes JetStream KV messages to a DataEventHandler and +// manages ACK/NAK with exponential backoff. It is the interface used by the +// eventing layer to decouple message dispatch from NATS internals. +type DataStreamProcessor interface { + Process(ctx context.Context, msg jetstream.Msg) +} + +// dataStreamConsumer is the NATS JetStream implementation of DataStreamProcessor. +type dataStreamConsumer struct { + handler port.DataEventHandler +} + +// Process routes a single JetStream KV message to the appropriate DataEventHandler +// method (HandleChange or HandleRemoval), then ACKs or NAKs with exponential backoff +// based on the handler's return value. +// +// Unrecoverable parse errors (bad metadata, invalid JSON) are always ACKed to +// prevent a poison-pill loop. +func (c *dataStreamConsumer) Process(ctx context.Context, msg jetstream.Msg) { + meta, err := msg.Metadata() + if err != nil { + slog.ErrorContext(ctx, "failed to read stream message metadata, ACKing to avoid poison pill", + "subject", msg.Subject(), "error", err) + c.ack(ctx, msg) + return + } + + key := keyFromSubject(msg.Subject()) + + var nak bool + if isRemoval(msg) { + nak = c.handler.HandleRemoval(ctx, key) + } else { + var data map[string]any + if jsonErr := json.Unmarshal(msg.Data(), &data); jsonErr != nil { + slog.ErrorContext(ctx, "failed to unmarshal stream message payload, ACKing to avoid poison pill", + "key", key, "error", jsonErr) + c.ack(ctx, msg) + return + } + nak = c.handler.HandleChange(ctx, key, data) + } + + if nak { + delay := nakDelay(meta.NumDelivered) + if nakErr := msg.NakWithDelay(delay); nakErr != nil { + slog.ErrorContext(ctx, "failed to NAK stream message", + "key", key, "delay", delay, "error", nakErr) + } + return + } + + c.ack(ctx, msg) +} + +func (c *dataStreamConsumer) ack(ctx context.Context, msg jetstream.Msg) { + if err := msg.Ack(); err != nil { + slog.ErrorContext(ctx, "failed to ACK stream message", "subject", msg.Subject(), "error", err) + } +} + +// keyFromSubject strips the "$KV.." prefix from a JetStream KV subject, +// returning the bare key. Subject format: $KV.. +func keyFromSubject(subject string) string { + idx := strings.Index(subject, ".") + if idx == -1 { + return subject + } + idx2 := strings.Index(subject[idx+1:], ".") + if idx2 == -1 { + return subject + } + return subject[idx+idx2+2:] +} + +func isRemoval(msg jetstream.Msg) bool { + op := msg.Headers().Get("Kv-Operation") + return op == "DEL" || op == "PURGE" +} + +func nakDelay(numDelivered uint64) time.Duration { + switch numDelivered { + case 1: + return 2 * time.Second + case 2: + return 10 * time.Second + default: + return 20 * time.Second + } +} + +// NewDataStreamConsumer creates a DataStreamProcessor that dispatches messages to handler. +func NewDataStreamConsumer(handler port.DataEventHandler) DataStreamProcessor { + return &dataStreamConsumer{handler: handler} +} diff --git a/pkg/constants/storage.go b/pkg/constants/storage.go index dc4a46e..da7831a 100644 --- a/pkg/constants/storage.go +++ b/pkg/constants/storage.go @@ -51,6 +51,24 @@ const ( // KVLookupGroupsIOMemberByGroupIDPrefix is the key pattern for GroupsIOGroupID index (lookup by Groups.io group ID) KVLookupGroupsIOMemberByGroupIDPrefix = "lookup/groupsio-member-groupid/%d" + // KVBucketNameV1Mappings is the shared KV bucket used by v1 eventing consumers to track + // processed entities (idempotency, created-vs-updated, tombstone markers for deletes). + KVBucketNameV1Mappings = "v1-mappings" + + // KVBucketV1Objects is the NATS KV bucket that lfx-v1-sync-helper writes DynamoDB records into. + KVBucketV1Objects = "v1-objects" + + // KVTombstoneMarker is the value written to v1-mappings after a successful delete, + // preventing duplicate delete processing on consumer redelivery. + KVTombstoneMarker = "!del" + + // KVMappingPrefixService is the v1-mappings key prefix for GroupsIO services. + KVMappingPrefixService = "groupsio-service" + // KVMappingPrefixSubgroup is the v1-mappings key prefix for GroupsIO subgroups (mailing lists). + KVMappingPrefixSubgroup = "groupsio-subgroup" + // KVMappingPrefixMember is the v1-mappings key prefix for GroupsIO members. + KVMappingPrefixMember = "groupsio-member" + // Key prefixes for bucket detection // GroupsIOMailingListKeyPrefix is the common prefix for all mailing list related keys GroupsIOMailingListKeyPrefix = "lookup/groupsio-mailing-list/" diff --git a/pkg/errors/transient.go b/pkg/errors/transient.go new file mode 100644 index 0000000..7cc93a7 --- /dev/null +++ b/pkg/errors/transient.go @@ -0,0 +1,20 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package errors + +import "strings" + +// IsTransient reports whether err is likely to resolve on retry. +// It matches against common keywords indicating network or availability failures: +// timeout, connection, unavailable, and deadline. +func IsTransient(err error) bool { + if err == nil { + return false + } + s := strings.ToLower(err.Error()) + return strings.Contains(s, "timeout") || + strings.Contains(s, "connection") || + strings.Contains(s, "unavailable") || + strings.Contains(s, "deadline") +} diff --git a/pkg/mapconv/field_extract.go b/pkg/mapconv/field_extract.go new file mode 100644 index 0000000..ffedb58 --- /dev/null +++ b/pkg/mapconv/field_extract.go @@ -0,0 +1,123 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +// Package mapconv provides typed field extraction from map[string]any payloads. +// +// JSON decoded with json.Unmarshal produces map[string]any where numeric values +// are float64 and compound values are []any or map[string]any. Each function +// handles these standard representations so callers do not need to branch on +// the raw type. +package mapconv + +import ( + "fmt" + "strings" +) + +// StringVal extracts a string value from data[key]. +// Numeric values are formatted with %g (no unnecessary trailing zeros). +// Returns "" if the key is absent or nil. +func StringVal(data map[string]any, key string) string { + v, ok := data[key] + if !ok || v == nil { + return "" + } + switch t := v.(type) { + case string: + return t + case float64: + return fmt.Sprintf("%g", t) + default: + return fmt.Sprintf("%v", t) + } +} + +// Int64Ptr extracts a nullable int64 from data[key]. +// Accepts float64 (standard JSON number) or string representations. +// Returns nil if the key is absent, nil, or unparseable. +func Int64Ptr(data map[string]any, key string) *int64 { + v, ok := data[key] + if !ok || v == nil { + return nil + } + var n int64 + switch t := v.(type) { + case float64: + n = int64(t) + case string: + if t == "" { + return nil + } + if _, err := fmt.Sscanf(t, "%d", &n); err != nil { + return nil + } + default: + return nil + } + return &n +} + +// IntVal extracts an int from data[key]. +// Accepts float64 or string representations. +// Returns 0 if the key is absent, nil, or unparseable. +func IntVal(data map[string]any, key string) int { + v, ok := data[key] + if !ok || v == nil { + return 0 + } + switch t := v.(type) { + case float64: + return int(t) + case string: + var n int + fmt.Sscanf(t, "%d", &n) //nolint:errcheck + return n + default: + return 0 + } +} + +// BoolVal extracts a bool from data[key]. +// Accepts JSON boolean or the strings "true" / "false" (case-insensitive). +// Returns false if the key is absent or the value cannot be interpreted. +func BoolVal(data map[string]any, key string) bool { + v, ok := data[key] + if !ok || v == nil { + return false + } + switch t := v.(type) { + case bool: + return t + case string: + return strings.EqualFold(t, "true") + default: + return false + } +} + +// StringSliceVal extracts a []string from data[key]. +// Accepts a JSON array of strings or a bare string (returned as a one-element slice). +// Returns nil if the key is absent or the value is an empty string. +func StringSliceVal(data map[string]any, key string) []string { + v, ok := data[key] + if !ok || v == nil { + return nil + } + switch t := v.(type) { + case []any: + out := make([]string, 0, len(t)) + for _, item := range t { + if s, ok := item.(string); ok { + out = append(out, s) + } + } + return out + case string: + if t == "" { + return nil + } + return []string{t} + default: + return nil + } +} diff --git a/pkg/mapconv/field_extract_test.go b/pkg/mapconv/field_extract_test.go new file mode 100644 index 0000000..417a306 --- /dev/null +++ b/pkg/mapconv/field_extract_test.go @@ -0,0 +1,116 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package mapconv_test + +import ( + "testing" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/mapconv" + "github.com/stretchr/testify/assert" +) + +func TestStringVal(t *testing.T) { + tests := []struct { + name string + data map[string]any + key string + expected string + }{ + {"string value", map[string]any{"k": "hello"}, "k", "hello"}, + {"float64 whole number", map[string]any{"k": float64(42)}, "k", "42"}, + {"float64 with decimals", map[string]any{"k": float64(3.14)}, "k", "3.14"}, + {"nil value", map[string]any{"k": nil}, "k", ""}, + {"missing key", map[string]any{}, "k", ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, mapconv.StringVal(tt.data, tt.key)) + }) + } +} + +func TestInt64Ptr(t *testing.T) { + ptr := func(n int64) *int64 { return &n } + + tests := []struct { + name string + data map[string]any + key string + expected *int64 + }{ + {"float64 value", map[string]any{"k": float64(99)}, "k", ptr(99)}, + {"string value", map[string]any{"k": "12345"}, "k", ptr(12345)}, + {"empty string", map[string]any{"k": ""}, "k", nil}, + {"nil value", map[string]any{"k": nil}, "k", nil}, + {"missing key", map[string]any{}, "k", nil}, + {"unparseable string", map[string]any{"k": "abc"}, "k", nil}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, mapconv.Int64Ptr(tt.data, tt.key)) + }) + } +} + +func TestIntVal(t *testing.T) { + tests := []struct { + name string + data map[string]any + key string + expected int + }{ + {"float64 value", map[string]any{"k": float64(7)}, "k", 7}, + {"string value", map[string]any{"k": "250"}, "k", 250}, + {"nil value", map[string]any{"k": nil}, "k", 0}, + {"missing key", map[string]any{}, "k", 0}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, mapconv.IntVal(tt.data, tt.key)) + }) + } +} + +func TestBoolVal(t *testing.T) { + tests := []struct { + name string + data map[string]any + key string + expected bool + }{ + {"bool true", map[string]any{"k": true}, "k", true}, + {"bool false", map[string]any{"k": false}, "k", false}, + {"string true", map[string]any{"k": "true"}, "k", true}, + {"string TRUE uppercase", map[string]any{"k": "TRUE"}, "k", true}, + {"string false", map[string]any{"k": "false"}, "k", false}, + {"nil value", map[string]any{"k": nil}, "k", false}, + {"missing key", map[string]any{}, "k", false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, mapconv.BoolVal(tt.data, tt.key)) + }) + } +} + +func TestStringSliceVal(t *testing.T) { + tests := []struct { + name string + data map[string]any + key string + expected []string + }{ + {"array of strings", map[string]any{"k": []any{"a", "b", "c"}}, "k", []string{"a", "b", "c"}}, + {"single string", map[string]any{"k": "only"}, "k", []string{"only"}}, + {"empty string", map[string]any{"k": ""}, "k", nil}, + {"nil value", map[string]any{"k": nil}, "k", nil}, + {"missing key", map[string]any{}, "k", nil}, + {"non-string items in array are skipped", map[string]any{"k": []any{"a", float64(1), "b"}}, "k", []string{"a", "b"}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, mapconv.StringSliceVal(tt.data, tt.key)) + }) + } +} From f2bfb193f4d006d13b59a65a4a34b1688ebd1a1c Mon Sep 17 00:00:00 2001 From: Mauricio Zanetti Salomao Date: Wed, 11 Mar 2026 14:06:53 -0300 Subject: [PATCH 02/11] feat(eventing): refactor data stream processing to use NATS client and improve error handling Jira Ticket: https://linuxfoundation.atlassian.net/browse/LFXV2-1223 Assisted by [Claude Code](https://claude.ai/code) Signed-off-by: Mauricio Zanetti Salomao --- cmd/mailing-list-api/data_stream.go | 45 ++-------- .../eventing/datastream/mapping.go | 13 ++- .../eventing/event_processor.go | 85 +++++++++++------- internal/domain/model/stream_message.go | 23 +++++ internal/domain/port/data_stream_processor.go | 15 ++++ internal/infrastructure/nats/client.go | 31 ++++--- .../nats/data_stream_consumer.go | 90 ++++++------------- 7 files changed, 155 insertions(+), 147 deletions(-) create mode 100644 internal/domain/model/stream_message.go create mode 100644 internal/domain/port/data_stream_processor.go diff --git a/cmd/mailing-list-api/data_stream.go b/cmd/mailing-list-api/data_stream.go index 3421690..891e9ee 100644 --- a/cmd/mailing-list-api/data_stream.go +++ b/cmd/mailing-list-api/data_stream.go @@ -17,8 +17,6 @@ import ( "github.com/linuxfoundation/lfx-v2-mailing-list-service/cmd/mailing-list-api/service" infraNATS "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/infrastructure/nats" "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" - "github.com/nats-io/nats.go" - "github.com/nats-io/nats.go/jetstream" ) // handleDataStream starts the durable JetStream consumer that processes DynamoDB KV @@ -32,35 +30,10 @@ func handleDataStream(ctx context.Context, wg *sync.WaitGroup) error { return nil } - cfg := dataStreamConfig() - natsURL := dataStreamNATSURL() - - conn, err := nats.Connect(natsURL, - nats.DrainTimeout(30*time.Second), - nats.ErrorHandler(func(_ *nats.Conn, sub *nats.Subscription, err error) { - attrs := []any{"error", err} - if sub != nil { - attrs = append(attrs, "subject", sub.Subject) - } - slog.With(attrs...).Error("NATS async error (data stream eventing)") - }), - nats.ClosedHandler(func(_ *nats.Conn) { - slog.Warn("NATS data stream eventing connection closed") - }), - ) - if err != nil { - return fmt.Errorf("failed to connect to NATS for data stream eventing: %w", err) - } + natsClient := service.GetNATSClient(ctx) - js, err := jetstream.New(conn) + mappingsKV, err := natsClient.KeyValue(ctx, constants.KVBucketNameV1Mappings) if err != nil { - conn.Close() - return fmt.Errorf("failed to create JetStream context: %w", err) - } - - mappingsKV, err := js.KeyValue(ctx, constants.KVBucketNameV1Mappings) - if err != nil { - conn.Close() return fmt.Errorf("failed to access %s KV bucket: %w", constants.KVBucketNameV1Mappings, err) } @@ -68,9 +41,9 @@ func handleDataStream(ctx context.Context, wg *sync.WaitGroup) error { handler := datastream.NewEventHandler(publisher, mappingsKV) streamConsumer := infraNATS.NewDataStreamConsumer(handler) - processor, err := eventing.NewEventProcessor(ctx, cfg, conn, streamConsumer) + cfg := dataStreamConfig() + processor, err := eventing.NewEventProcessor(ctx, cfg, natsClient) if err != nil { - conn.Close() return fmt.Errorf("failed to create data stream processor: %w", err) } @@ -82,7 +55,7 @@ func handleDataStream(ctx context.Context, wg *sync.WaitGroup) error { wg.Add(1) go func() { defer wg.Done() - if err := processor.Start(ctx); err != nil { + if err := processor.Start(ctx, streamConsumer); err != nil { slog.ErrorContext(ctx, "data stream processor exited with error", "error", err) } }() @@ -106,14 +79,6 @@ func dataStreamEnabled() bool { return os.Getenv("MAILING_LIST_EVENTING_ENABLED") == "true" } -// dataStreamNATSURL returns the NATS URL for the data stream connection. -func dataStreamNATSURL() string { - if url := os.Getenv("NATS_URL"); url != "" { - return url - } - return "nats://localhost:4222" -} - // dataStreamConfig builds eventing.Config from environment variables with // sensible defaults. func dataStreamConfig() eventing.Config { diff --git a/cmd/mailing-list-api/eventing/datastream/mapping.go b/cmd/mailing-list-api/eventing/datastream/mapping.go index e7375b5..53a0194 100644 --- a/cmd/mailing-list-api/eventing/datastream/mapping.go +++ b/cmd/mailing-list-api/eventing/datastream/mapping.go @@ -6,6 +6,7 @@ package datastream import ( "context" "fmt" + "log/slog" "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" @@ -39,6 +40,7 @@ func resolveAction(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKe func isMappingPresent(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKey string) bool { entry, err := mappingsKV.Get(ctx, mappingKey) if err != nil || entry == nil { + slog.WarnContext(ctx, "mapping key not found in v1-mappings", "mapping_key", mappingKey) return false } return string(entry.Value()) != constants.KVTombstoneMarker @@ -49,6 +51,7 @@ func isMappingPresent(ctx context.Context, mappingsKV jetstream.KeyValue, mappin func isTombstoned(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKey string) bool { entry, err := mappingsKV.Get(ctx, mappingKey) if err != nil || entry == nil { + slog.WarnContext(ctx, "mapping key not found in v1-mappings - treating as not tombstoned", "mapping_key", mappingKey) return false } return string(entry.Value()) == constants.KVTombstoneMarker @@ -57,11 +60,17 @@ func isTombstoned(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKey // putMapping records that uid has been successfully processed so subsequent // events for the same key are treated as updates rather than creates. func putMapping(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKey, uid string) { - _, _ = mappingsKV.Put(ctx, mappingKey, []byte(uid)) + _, errPut := mappingsKV.Put(ctx, mappingKey, []byte(uid)) + if errPut != nil { + slog.ErrorContext(ctx, "failed to put mapping key", "mapping_key", mappingKey, "error", errPut) + } } // putTombstone writes the deletion marker into v1-mappings to prevent duplicate // processing of the same delete on consumer redelivery. func putTombstone(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKey string) { - _, _ = mappingsKV.Put(ctx, mappingKey, []byte(constants.KVTombstoneMarker)) + _, errPut := mappingsKV.Put(ctx, mappingKey, []byte(constants.KVTombstoneMarker)) + if errPut != nil { + slog.ErrorContext(ctx, "failed to put tombstone", "mapping_key", mappingKey, "error", errPut) + } } diff --git a/cmd/mailing-list-api/eventing/event_processor.go b/cmd/mailing-list-api/eventing/event_processor.go index dbf5c82..55ae80b 100644 --- a/cmd/mailing-list-api/eventing/event_processor.go +++ b/cmd/mailing-list-api/eventing/event_processor.go @@ -7,10 +7,12 @@ import ( "context" "fmt" "log/slog" + "strings" "time" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/port" infraNATS "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/infrastructure/nats" - "github.com/nats-io/nats.go" "github.com/nats-io/nats.go/jetstream" ) @@ -31,43 +33,32 @@ type Config struct { // EventProcessor is the interface for JetStream KV bucket event consumers. // Start blocks until ctx is cancelled; Stop performs a graceful shutdown. type EventProcessor interface { - Start(ctx context.Context) error + Start(ctx context.Context, streamConsumer port.DataStreamProcessor) error Stop(ctx context.Context) error } // natsEventProcessor is the NATS JetStream implementation of EventProcessor. -// It takes ownership of conn — draining and closing it on Stop. type natsEventProcessor struct { - natsConn *nats.Conn - jsInstance jetstream.JetStream - consumer jetstream.Consumer - consumeCtx jetstream.ConsumeContext - streamConsumer infraNATS.DataStreamProcessor - config Config + natsClient *infraNATS.NATSClient + consumer jetstream.Consumer + consumeCtx jetstream.ConsumeContext + config Config } -// NewEventProcessor creates an EventProcessor from an existing NATS connection and handler. -// conn ownership is transferred — the processor drains and closes it on Stop. -func NewEventProcessor(ctx context.Context, cfg Config, conn *nats.Conn, streamConsumer infraNATS.DataStreamProcessor) (EventProcessor, error) { - js, err := jetstream.New(conn) - if err != nil { - return nil, fmt.Errorf("failed to create JetStream context: %w", err) - } - +// NewEventProcessor creates an EventProcessor backed by the given NATSClient. +func NewEventProcessor(_ context.Context, cfg Config, natsClient *infraNATS.NATSClient) (EventProcessor, error) { return &natsEventProcessor{ - natsConn: conn, - jsInstance: js, - streamConsumer: streamConsumer, - config: cfg, + natsClient: natsClient, + config: cfg, }, nil } // Start creates (or resumes) the durable JetStream consumer and processes messages // until ctx is cancelled. -func (ep *natsEventProcessor) Start(ctx context.Context) error { +func (ep *natsEventProcessor) Start(ctx context.Context, streamConsumer port.DataStreamProcessor) error { slog.InfoContext(ctx, "starting data stream processor", "consumer_name", ep.config.ConsumerName) - consumer, err := ep.jsInstance.CreateOrUpdateConsumer(ctx, ep.config.StreamName, jetstream.ConsumerConfig{ + consumer, err := ep.natsClient.CreateOrUpdateConsumer(ctx, ep.config.StreamName, jetstream.ConsumerConfig{ Name: ep.config.ConsumerName, Durable: ep.config.ConsumerName, // DeliverLastPerSubjectPolicy resumes from the last seen record per KV key after a @@ -90,8 +81,22 @@ func (ep *natsEventProcessor) Start(ctx context.Context) error { ep.consumer = consumer consumeCtx, err := consumer.Consume( - func(msg jetstream.Msg) { - ep.streamConsumer.Process(ctx, msg) + func(jMsg jetstream.Msg) { + meta, err := jMsg.Metadata() + if err != nil { + slog.ErrorContext(ctx, "failed to read stream message metadata, ACKing to avoid poison pill", + "subject", jMsg.Subject(), "error", err) + _ = jMsg.Ack() + return + } + streamConsumer.Process(ctx, model.StreamMessage{ + Key: kvKey(jMsg.Subject()), + Data: jMsg.Data(), + IsRemoval: isKVRemoval(jMsg), + DeliveryCount: meta.NumDelivered, + Ack: jMsg.Ack, + Nak: jMsg.NakWithDelay, + }) }, jetstream.ConsumeErrHandler(func(_ jetstream.ConsumeContext, err error) { slog.With("error", err).Error("data stream KV consumer error") @@ -108,7 +113,8 @@ func (ep *natsEventProcessor) Start(ctx context.Context) error { return nil } -// Stop drains and closes the dedicated NATS connection. +// Stop halts the JetStream consumer. The NATS connection lifecycle is managed +// by the caller (NATSClient). func (ep *natsEventProcessor) Stop(ctx context.Context) error { slog.InfoContext(ctx, "stopping data stream processor") @@ -117,14 +123,25 @@ func (ep *natsEventProcessor) Stop(ctx context.Context) error { slog.InfoContext(ctx, "data stream consumer stopped") } - if ep.natsConn != nil { - if err := ep.natsConn.Drain(); err != nil { - slog.ErrorContext(ctx, "error draining data stream NATS connection", "error", err) - } - ep.natsConn.Close() - slog.InfoContext(ctx, "data stream NATS connection closed") - } - slog.InfoContext(ctx, "data stream processor stopped") return nil } + +// kvKey strips the "$KV.." prefix from a JetStream KV subject, +// returning the bare key. Subject format: $KV.. +func kvKey(subject string) string { + idx := strings.Index(subject, ".") + if idx == -1 { + return subject + } + idx2 := strings.Index(subject[idx+1:], ".") + if idx2 == -1 { + return subject + } + return subject[idx+idx2+2:] +} + +func isKVRemoval(msg jetstream.Msg) bool { + op := msg.Headers().Get("Kv-Operation") + return op == "DEL" || op == "PURGE" +} diff --git a/internal/domain/model/stream_message.go b/internal/domain/model/stream_message.go new file mode 100644 index 0000000..d584c93 --- /dev/null +++ b/internal/domain/model/stream_message.go @@ -0,0 +1,23 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package model + +import "time" + +// StreamMessage represents a single message from a JetStream KV change stream, +// decoupled from any NATS-specific types. +type StreamMessage struct { + // Key is the bare KV key (subject prefix stripped). + Key string + // Data is the raw JSON payload. Nil for removal operations. + Data []byte + // IsRemoval is true for DEL/PURGE operations. + IsRemoval bool + // DeliveryCount is the number of times this message has been delivered. + DeliveryCount uint64 + // Ack acknowledges successful processing. + Ack func() error + // Nak requeues the message with the given backoff delay. + Nak func(delay time.Duration) error +} diff --git a/internal/domain/port/data_stream_processor.go b/internal/domain/port/data_stream_processor.go new file mode 100644 index 0000000..f60f796 --- /dev/null +++ b/internal/domain/port/data_stream_processor.go @@ -0,0 +1,15 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package port + +import ( + "context" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" +) + +// DataStreamProcessor processes individual messages dispatched by an EventProcessor. +type DataStreamProcessor interface { + Process(ctx context.Context, msg model.StreamMessage) +} diff --git a/internal/infrastructure/nats/client.go b/internal/infrastructure/nats/client.go index 01d997c..16a24d3 100644 --- a/internal/infrastructure/nats/client.go +++ b/internal/infrastructure/nats/client.go @@ -19,11 +19,23 @@ import ( // NATSClient wraps the NATS connection and provides access control operations type NATSClient struct { conn *nats.Conn + js jetstream.JetStream config Config kvStore map[string]jetstream.KeyValue timeout time.Duration } +// KeyValue opens the named KV bucket. The bucket is not cached — use KeyValueStore +// for buckets that are accessed repeatedly via the storage adapters. +func (c *NATSClient) KeyValue(ctx context.Context, bucketName string) (jetstream.KeyValue, error) { + return c.js.KeyValue(ctx, bucketName) +} + +// CreateOrUpdateConsumer creates or updates a durable JetStream consumer. +func (c *NATSClient) CreateOrUpdateConsumer(ctx context.Context, streamName string, cfg jetstream.ConsumerConfig) (jetstream.Consumer, error) { + return c.js.CreateOrUpdateConsumer(ctx, streamName, cfg) +} + // NATSClientInterface defines the interface for NATS operations // This allows for easy mocking and testing type NATSClientInterface interface { @@ -68,17 +80,9 @@ func (c *NATSClient) QueueSubscribe(subject, queue string, handler nats.MsgHandl return c.conn.QueueSubscribe(subject, queue, handler) } -// KeyValueStore creates a JetStream client and gets the key-value store for projects. +// KeyValueStore opens the named KV bucket and caches it on the client. func (c *NATSClient) KeyValueStore(ctx context.Context, bucketName string) error { - js, err := jetstream.New(c.conn) - if err != nil { - slog.ErrorContext(ctx, "error creating NATS JetStream client", - "error", err, - "nats_url", c.conn.ConnectedUrl(), - ) - return err - } - kvStore, err := js.KeyValue(ctx, bucketName) + kvStore, err := c.js.KeyValue(ctx, bucketName) if err != nil { slog.ErrorContext(ctx, "error getting NATS JetStream key-value store", "error", err, @@ -144,8 +148,15 @@ func NewClient(ctx context.Context, config Config) (*NATSClient, error) { return nil, errors.NewServiceUnavailable("failed to connect to NATS", err) } + js, err := jetstream.New(conn) + if err != nil { + conn.Close() + return nil, errors.NewServiceUnavailable("failed to create JetStream context", err) + } + client := &NATSClient{ conn: conn, + js: js, config: config, timeout: config.Timeout, } diff --git a/internal/infrastructure/nats/data_stream_consumer.go b/internal/infrastructure/nats/data_stream_consumer.go index 0231666..ff63eb9 100644 --- a/internal/infrastructure/nats/data_stream_consumer.go +++ b/internal/infrastructure/nats/data_stream_consumer.go @@ -7,91 +7,59 @@ import ( "context" "encoding/json" "log/slog" - "strings" "time" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/port" - "github.com/nats-io/nats.go/jetstream" ) -// DataStreamProcessor routes JetStream KV messages to a DataEventHandler and -// manages ACK/NAK with exponential backoff. It is the interface used by the -// eventing layer to decouple message dispatch from NATS internals. -type DataStreamProcessor interface { - Process(ctx context.Context, msg jetstream.Msg) -} - -// dataStreamConsumer is the NATS JetStream implementation of DataStreamProcessor. +// dataStreamConsumer is the NATS JetStream implementation of port.DataStreamProcessor. type dataStreamConsumer struct { handler port.DataEventHandler } -// Process routes a single JetStream KV message to the appropriate DataEventHandler -// method (HandleChange or HandleRemoval), then ACKs or NAKs with exponential backoff -// based on the handler's return value. +// Process routes a single stream message to the appropriate DataEventHandler method +// (HandleChange or HandleRemoval), then ACKs or NAKs with exponential backoff based +// on the handler's return value. // -// Unrecoverable parse errors (bad metadata, invalid JSON) are always ACKed to -// prevent a poison-pill loop. -func (c *dataStreamConsumer) Process(ctx context.Context, msg jetstream.Msg) { - meta, err := msg.Metadata() - if err != nil { - slog.ErrorContext(ctx, "failed to read stream message metadata, ACKing to avoid poison pill", - "subject", msg.Subject(), "error", err) +// Unrecoverable parse errors (invalid JSON) are always ACKed to prevent a poison-pill loop. +func (c *dataStreamConsumer) Process(ctx context.Context, msg model.StreamMessage) { + if msg.IsRemoval { + if nak := c.handler.HandleRemoval(ctx, msg.Key); nak { + c.nak(ctx, msg) + return + } c.ack(ctx, msg) return } - key := keyFromSubject(msg.Subject()) - - var nak bool - if isRemoval(msg) { - nak = c.handler.HandleRemoval(ctx, key) - } else { - var data map[string]any - if jsonErr := json.Unmarshal(msg.Data(), &data); jsonErr != nil { - slog.ErrorContext(ctx, "failed to unmarshal stream message payload, ACKing to avoid poison pill", - "key", key, "error", jsonErr) - c.ack(ctx, msg) - return - } - nak = c.handler.HandleChange(ctx, key, data) + var data map[string]any + if err := json.Unmarshal(msg.Data, &data); err != nil { + slog.ErrorContext(ctx, "failed to unmarshal stream message payload, ACKing to avoid poison pill", + "key", msg.Key, "error", err) + c.ack(ctx, msg) + return } - if nak { - delay := nakDelay(meta.NumDelivered) - if nakErr := msg.NakWithDelay(delay); nakErr != nil { - slog.ErrorContext(ctx, "failed to NAK stream message", - "key", key, "delay", delay, "error", nakErr) - } + if nak := c.handler.HandleChange(ctx, msg.Key, data); nak { + c.nak(ctx, msg) return } - c.ack(ctx, msg) } -func (c *dataStreamConsumer) ack(ctx context.Context, msg jetstream.Msg) { +func (c *dataStreamConsumer) ack(ctx context.Context, msg model.StreamMessage) { if err := msg.Ack(); err != nil { - slog.ErrorContext(ctx, "failed to ACK stream message", "subject", msg.Subject(), "error", err) + slog.ErrorContext(ctx, "failed to ACK stream message", "key", msg.Key, "error", err) } } -// keyFromSubject strips the "$KV.." prefix from a JetStream KV subject, -// returning the bare key. Subject format: $KV.. -func keyFromSubject(subject string) string { - idx := strings.Index(subject, ".") - if idx == -1 { - return subject +func (c *dataStreamConsumer) nak(ctx context.Context, msg model.StreamMessage) { + delay := nakDelay(msg.DeliveryCount) + if err := msg.Nak(delay); err != nil { + slog.ErrorContext(ctx, "failed to NAK stream message", + "key", msg.Key, "delay", delay, "error", err) } - idx2 := strings.Index(subject[idx+1:], ".") - if idx2 == -1 { - return subject - } - return subject[idx+idx2+2:] -} - -func isRemoval(msg jetstream.Msg) bool { - op := msg.Headers().Get("Kv-Operation") - return op == "DEL" || op == "PURGE" } func nakDelay(numDelivered uint64) time.Duration { @@ -105,7 +73,7 @@ func nakDelay(numDelivered uint64) time.Duration { } } -// NewDataStreamConsumer creates a DataStreamProcessor that dispatches messages to handler. -func NewDataStreamConsumer(handler port.DataEventHandler) DataStreamProcessor { +// NewDataStreamConsumer creates a port.DataStreamProcessor that dispatches messages to handler. +func NewDataStreamConsumer(handler port.DataEventHandler) port.DataStreamProcessor { return &dataStreamConsumer{handler: handler} } From b9c9acb3b9ee3c35569bf7e5c590948ee90e2398 Mon Sep 17 00:00:00 2001 From: Mauricio Zanetti Salomao Date: Wed, 11 Mar 2026 18:17:23 -0300 Subject: [PATCH 03/11] feat(eventing): update environment variable names and enhance member handling logic Jira Ticket: https://linuxfoundation.atlassian.net/browse/LFXV2-1223 Assisted by [Claude Code](https://claude.ai/code) Signed-off-by: Mauricio Zanetti Salomao --- cmd/mailing-list-api/data_stream.go | 12 ++--- .../eventing/datastream/member_handler.go | 54 +++++++++++++++---- .../eventing/datastream/service_handler.go | 21 +++----- .../eventing/datastream/subgroup_handler.go | 38 ++++++++----- pkg/constants/storage.go | 3 ++ 5 files changed, 85 insertions(+), 43 deletions(-) diff --git a/cmd/mailing-list-api/data_stream.go b/cmd/mailing-list-api/data_stream.go index 891e9ee..4f84583 100644 --- a/cmd/mailing-list-api/data_stream.go +++ b/cmd/mailing-list-api/data_stream.go @@ -26,7 +26,7 @@ import ( // is a no-op and returns nil. func handleDataStream(ctx context.Context, wg *sync.WaitGroup) error { if !dataStreamEnabled() { - slog.InfoContext(ctx, "data stream processor disabled (MAILING_LIST_EVENTING_ENABLED not set to true)") + slog.InfoContext(ctx, "data stream processor disabled (EVENTING_ENABLED not set to true)") return nil } @@ -76,20 +76,20 @@ func handleDataStream(ctx context.Context, wg *sync.WaitGroup) error { // dataStreamEnabled reports whether the data stream processor has been opted into. func dataStreamEnabled() bool { - return os.Getenv("MAILING_LIST_EVENTING_ENABLED") == "true" + return os.Getenv("EVENTING_ENABLED") == "true" } // dataStreamConfig builds eventing.Config from environment variables with // sensible defaults. func dataStreamConfig() eventing.Config { - consumerName := os.Getenv("MAILING_LIST_EVENTING_CONSUMER_NAME") + consumerName := os.Getenv("EVENTING_CONSUMER_NAME") if consumerName == "" { consumerName = "mailing-list-service-kv-consumer" } - maxDeliver := envInt("MAILING_LIST_EVENTING_MAX_DELIVER", 3) - maxAckPending := envInt("MAILING_LIST_EVENTING_MAX_ACK_PENDING", 1000) - ackWaitSecs := envInt("MAILING_LIST_EVENTING_ACK_WAIT_SECS", 30) + maxDeliver := envInt("EVENTING_MAX_DELIVER", 3) + maxAckPending := envInt("EVENTING_MAX_ACK_PENDING", 1000) + ackWaitSecs := envInt("EVENTING_ACK_WAIT_SECS", 30) return eventing.Config{ ConsumerName: consumerName, diff --git a/cmd/mailing-list-api/eventing/datastream/member_handler.go b/cmd/mailing-list-api/eventing/datastream/member_handler.go index 0594a81..f5cfb3e 100644 --- a/cmd/mailing-list-api/eventing/datastream/member_handler.go +++ b/cmd/mailing-list-api/eventing/datastream/member_handler.go @@ -5,7 +5,9 @@ package datastream import ( "context" + "fmt" "log/slog" + "strings" "time" "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" @@ -23,15 +25,24 @@ import ( // No FGA access message is published — member access is inherited from the parent // mailing list's access record. func handleMemberUpdate(ctx context.Context, uid string, data map[string]any, publisher port.MessagePublisher, mappingsKV jetstream.KeyValue) bool { - member := transformToGrpsIOMember(uid, data) + // Members carry group_id (Groups.io numeric ID) rather than a direct mailing_list_uid. + // Resolve the parent subgroup UID via the reverse index written by the subgroup handler. + groupID := mapconv.Int64Ptr(data, "group_id") + if groupID == nil { + slog.ErrorContext(ctx, "member has no group_id, cannot determine parent mailing list — ACKing", "uid", uid) + return false + } - // Parent dependency check: ensure the parent mailing list is already indexed. - subgroupKey := buildMappingKey(constants.KVMappingPrefixSubgroup, member.MailingListUID) - if !isMappingPresent(ctx, mappingsKV, subgroupKey) { + gidKey := buildMappingKey(constants.KVMappingPrefixSubgroupByGroupID, fmt.Sprintf("%d", *groupID)) + gidEntry, err := mappingsKV.Get(ctx, gidKey) + if err != nil || gidEntry == nil || string(gidEntry.Value()) == constants.KVTombstoneMarker { slog.WarnContext(ctx, "parent subgroup not yet processed, NAKing member for retry", - "uid", uid, "mailing_list_uid", member.MailingListUID) + "uid", uid, "group_id", *groupID) return true // NAK — retry with backoff } + mailingListUID := string(gidEntry.Value()) + + member := transformToGrpsIOMember(uid, mailingListUID, data) mKey := buildMappingKey(constants.KVMappingPrefixMember, uid) action := resolveAction(ctx, mappingsKV, mKey) @@ -78,15 +89,23 @@ func handleMemberDelete(ctx context.Context, uid string, publisher port.MessageP } // transformToGrpsIOMember maps v1 DynamoDB fields to the GrpsIOMember domain model. -func transformToGrpsIOMember(uid string, data map[string]any) *model.GrpsIOMember { +// mailingListUID is resolved from the reverse group_id index before calling this function. +// +// DynamoDB fields available: member_id, committee_id, created_at, created_by, +// delivery_mode, delivery_mode_list, email, full_name, group_id, groups_email, +// groups_full_name, job_title, last_modified_at, last_modified_by, +// last_system_modified_at, member_type, mod_status, organization, status, +// sync_status, user_id. +func transformToGrpsIOMember(uid, mailingListUID string, data map[string]any) *model.GrpsIOMember { + firstName, lastName := splitFullName(mapconv.StringVal(data, "full_name")) + member := &model.GrpsIOMember{ UID: uid, + MailingListUID: mailingListUID, MemberID: mapconv.Int64Ptr(data, "member_id"), GroupID: mapconv.Int64Ptr(data, "group_id"), - MailingListUID: mapconv.StringVal(data, "mailing_list_uid"), - Username: mapconv.StringVal(data, "username"), - FirstName: mapconv.StringVal(data, "first_name"), - LastName: mapconv.StringVal(data, "last_name"), + FirstName: firstName, + LastName: lastName, Email: mapconv.StringVal(data, "email"), Organization: mapconv.StringVal(data, "organization"), JobTitle: mapconv.StringVal(data, "job_title"), @@ -97,6 +116,11 @@ func transformToGrpsIOMember(uid string, data map[string]any) *model.GrpsIOMembe Source: "v1-sync", } + if ts := mapconv.StringVal(data, "created_at"); ts != "" { + if t, err := time.Parse(time.RFC3339, ts); err == nil { + member.CreatedAt = t + } + } if ts := mapconv.StringVal(data, "last_modified_at"); ts != "" { if t, err := time.Parse(time.RFC3339, ts); err == nil { member.UpdatedAt = t @@ -105,3 +129,13 @@ func transformToGrpsIOMember(uid string, data map[string]any) *model.GrpsIOMembe return member } + +// splitFullName splits "First Last" into (first, last). +// For single-token names (no space), the whole string is returned as first name. +func splitFullName(fullName string) (string, string) { + idx := strings.Index(fullName, " ") + if idx == -1 { + return fullName, "" + } + return fullName[:idx], fullName[idx+1:] +} diff --git a/cmd/mailing-list-api/eventing/datastream/service_handler.go b/cmd/mailing-list-api/eventing/datastream/service_handler.go index 967cd07..c17b4f0 100644 --- a/cmd/mailing-list-api/eventing/datastream/service_handler.go +++ b/cmd/mailing-list-api/eventing/datastream/service_handler.go @@ -84,20 +84,13 @@ func handleServiceDelete(ctx context.Context, uid string, publisher port.Message // Source is always "v1-sync" to distinguish these from API-created records. func transformToGrpsIOService(uid string, data map[string]any) *model.GrpsIOService { svc := &model.GrpsIOService{ - UID: uid, - Type: mapconv.StringVal(data, "type"), - Domain: mapconv.StringVal(data, "domain"), - GroupID: mapconv.Int64Ptr(data, "group_id"), - Status: mapconv.StringVal(data, "status"), - Source: "v1-sync", - GlobalOwners: mapconv.StringSliceVal(data, "global_owners"), - Prefix: mapconv.StringVal(data, "prefix"), - ParentServiceUID: mapconv.StringVal(data, "parent_service_uid"), - ProjectSlug: mapconv.StringVal(data, "project_slug"), - ProjectUID: mapconv.StringVal(data, "project_uid"), - URL: mapconv.StringVal(data, "url"), - GroupName: mapconv.StringVal(data, "group_name"), - Public: mapconv.BoolVal(data, "public"), + UID: uid, + Type: mapconv.StringVal(data, "group_service_type"), + Domain: mapconv.StringVal(data, "domain"), + GroupID: mapconv.Int64Ptr(data, "group_id"), + Prefix: mapconv.StringVal(data, "prefix"), + ProjectUID: mapconv.StringVal(data, "project_id"), + Source: "v1-sync", } if ts := mapconv.StringVal(data, "last_modified_at"); ts != "" { diff --git a/cmd/mailing-list-api/eventing/datastream/subgroup_handler.go b/cmd/mailing-list-api/eventing/datastream/subgroup_handler.go index 1df6edd..3f7ad78 100644 --- a/cmd/mailing-list-api/eventing/datastream/subgroup_handler.go +++ b/cmd/mailing-list-api/eventing/datastream/subgroup_handler.go @@ -5,6 +5,7 @@ package datastream import ( "context" + "fmt" "log/slog" "time" @@ -60,6 +61,13 @@ func handleSubgroupUpdate(ctx context.Context, uid string, data map[string]any, } putMapping(ctx, mappingsKV, mKey, uid) + + // Store reverse index: group_id → subgroup UID so member events can resolve MailingListUID. + if list.GroupID != nil { + gidKey := buildMappingKey(constants.KVMappingPrefixSubgroupByGroupID, fmt.Sprintf("%d", *list.GroupID)) + putMapping(ctx, mappingsKV, gidKey, uid) + } + return false } @@ -96,19 +104,23 @@ func handleSubgroupDelete(ctx context.Context, uid string, publisher port.Messag // transformToGrpsIOMailingList maps v1 DynamoDB fields to the GrpsIOMailingList domain model. func transformToGrpsIOMailingList(uid string, data map[string]any) *model.GrpsIOMailingList { list := &model.GrpsIOMailingList{ - UID: uid, - GroupID: mapconv.Int64Ptr(data, "group_id"), - GroupName: mapconv.StringVal(data, "group_name"), - Public: mapconv.BoolVal(data, "public"), - AudienceAccess: mapconv.StringVal(data, "audience_access"), - Type: mapconv.StringVal(data, "type"), - Description: mapconv.StringVal(data, "description"), - Title: mapconv.StringVal(data, "title"), - SubjectTag: mapconv.StringVal(data, "subject_tag"), - ServiceUID: mapconv.StringVal(data, "service_uid"), - ProjectUID: mapconv.StringVal(data, "project_uid"), - SubscriberCount: mapconv.IntVal(data, "subscriber_count"), - Source: "v1-sync", + UID: uid, + GroupID: mapconv.Int64Ptr(data, "group_id"), + GroupName: mapconv.StringVal(data, "group_name"), + Public: mapconv.StringVal(data, "visibility") == "Public", + Type: mapconv.StringVal(data, "type"), + Description: mapconv.StringVal(data, "description"), + SubjectTag: mapconv.StringVal(data, "subject_tag"), + ServiceUID: mapconv.StringVal(data, "parent_id"), + ProjectUID: mapconv.StringVal(data, "project_id"), + Source: "v1-sync", + } + + if committeeUID := mapconv.StringVal(data, "committee"); committeeUID != "" { + list.Committees = []model.Committee{{ + UID: committeeUID, + AllowedVotingStatuses: mapconv.StringSliceVal(data, "committee_filters"), + }} } if ts := mapconv.StringVal(data, "last_modified_at"); ts != "" { diff --git a/pkg/constants/storage.go b/pkg/constants/storage.go index da7831a..f232b27 100644 --- a/pkg/constants/storage.go +++ b/pkg/constants/storage.go @@ -68,6 +68,9 @@ const ( KVMappingPrefixSubgroup = "groupsio-subgroup" // KVMappingPrefixMember is the v1-mappings key prefix for GroupsIO members. KVMappingPrefixMember = "groupsio-member" + // KVMappingPrefixSubgroupByGroupID is the v1-mappings reverse index: Groups.io group_id → subgroup UID. + // Written by the subgroup handler so the member handler can resolve MailingListUID from group_id. + KVMappingPrefixSubgroupByGroupID = "groupsio-subgroup-gid" // Key prefixes for bucket detection // GroupsIOMailingListKeyPrefix is the common prefix for all mailing list related keys From 4b8df54697e735e73e884170d0c670aeb8c29266 Mon Sep 17 00:00:00 2001 From: Mauricio Zanetti Salomao Date: Thu, 12 Mar 2026 11:36:00 -0300 Subject: [PATCH 04/11] feat(eventing): implement NATS JetStream KV-bucket event processor for GroupsIO entities - Add event handler for processing KV events related to services, subgroups, and members. - Implement mapping reader/writer for idempotency tracking in the v1-mappings KV bucket. - Create documentation for event processing architecture, event flow, and data transformation. - Introduce service, subgroup, and member handlers for transforming v1 payloads to v2 domain models. - Add constants for project and committee mappings to facilitate resolution of v1 SFIDs to v2 UIDs. - Implement error handling and logging for transient and permanent errors during event processing. Jira Ticket: https://linuxfoundation.atlassian.net/browse/LFXV2-1223 Assisted by [Claude Code](https://claude.ai/code) Signed-off-by: Mauricio Zanetti Salomao --- .env.example | 3 + README.md | 19 +- cmd/mailing-list-api/data_stream.go | 9 +- .../eventing/datastream/mapping.go | 76 ---- .../eventing/{datastream => }/handler.go | 36 +- cmd/mailing-list-api/service/providers.go | 12 + docs/event-processing.md | 372 ++++++++++++++++++ internal/domain/port/mapping_store.go | 50 +++ internal/infrastructure/nats/mapping_store.go | 76 ++++ .../service/datastream_member_handler.go | 46 +-- .../service/datastream_service_handler.go | 53 ++- .../service/datastream_subgroup_handler.go | 84 ++-- pkg/constants/storage.go | 8 + 13 files changed, 672 insertions(+), 172 deletions(-) delete mode 100644 cmd/mailing-list-api/eventing/datastream/mapping.go rename cmd/mailing-list-api/eventing/{datastream => }/handler.go (63%) create mode 100644 docs/event-processing.md create mode 100644 internal/domain/port/mapping_store.go create mode 100644 internal/infrastructure/nats/mapping_store.go rename cmd/mailing-list-api/eventing/datastream/member_handler.go => internal/service/datastream_member_handler.go (70%) rename cmd/mailing-list-api/eventing/datastream/service_handler.go => internal/service/datastream_service_handler.go (57%) rename cmd/mailing-list-api/eventing/datastream/subgroup_handler.go => internal/service/datastream_subgroup_handler.go (51%) diff --git a/.env.example b/.env.example index c93ad88..d49dc84 100644 --- a/.env.example +++ b/.env.example @@ -36,3 +36,6 @@ GROUPSIO_WEBHOOK_SECRET= GROUPSIO_TIMEOUT=30s GROUPSIO_MAX_RETRIES=3 GROUPSIO_RETRY_DELAY=1s + +# Eventing Configuration +EVENTING_ENABLED=true diff --git a/README.md b/README.md index 2042704..2f545ca 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,11 @@ lfx-v2-mailing-list-service/ │ ├── design/ # Goa API design files │ │ ├── mailing_list.go # Service and endpoint definitions │ │ └── type.go # Type definitions and data structures +│ ├── eventing/ # v1→v2 data stream event processing +│ │ ├── event_processor.go # JetStream consumer lifecycle +│ │ └── handler.go # Key-prefix router (delegates to internal/service) │ ├── service/ # GOA service implementations +│ ├── data_stream.go # Data stream startup wiring and env config │ ├── main.go # Application entry point │ └── http.go # HTTP server setup ├── charts/ # Helm chart for Kubernetes deployment @@ -95,6 +99,8 @@ lfx-v2-mailing-list-service/ │ ├── templates/ # Kubernetes resource templates │ ├── values.yaml # Production configuration │ └── values.local.yaml # Local development configuration +├── docs/ # Additional documentation +│ └── event-processing.md # v1→v2 data stream event processing ├── gen/ # Generated code (DO NOT EDIT) │ ├── http/ # HTTP transport layer │ │ ├── openapi.yaml # OpenAPI 2.0 specification @@ -104,12 +110,17 @@ lfx-v2-mailing-list-service/ │ ├── domain/ # Business domain layer │ │ ├── model/ # Domain models and conversions │ │ └── port/ # Repository and service interfaces +│ │ └── mapping_store.go # MappingReader / MappingWriter / MappingReaderWriter │ ├── service/ # Service layer implementation -│ │ └── grpsio_service_reader.go # GroupsIO service reader +│ │ ├── grpsio_*.go # GroupsIO CRUD orchestrators +│ │ ├── datastream_service_handler.go # v1-sync service transform + publish +│ │ ├── datastream_subgroup_handler.go # v1-sync mailing list transform + publish +│ │ └── datastream_member_handler.go # v1-sync member transform + publish │ ├── infrastructure/ # Infrastructure layer │ │ ├── auth/ # JWT authentication │ │ ├── groupsio/ # GroupsIO API client implementation │ │ ├── nats/ # NATS messaging and storage +│ │ │ ├── mapping_store.go # MappingReaderWriter backed by JetStream KV │ │ │ ├── messaging_publish.go # Message publishing │ │ │ ├── messaging_request.go # Request/reply messaging │ │ │ └── storage.go # KV store repositories @@ -133,6 +144,12 @@ lfx-v2-mailing-list-service/ └── go.mod # Go module definition ``` +## 📚 Additional Documentation + +| Document | Description | +|---|---| +| [docs/event-processing.md](docs/event-processing.md) | v1→v2 data stream: how DynamoDB change events are consumed, transformed, and published to the indexer and FGA-sync services | + ## 🛠️ Development ### Prerequisites diff --git a/cmd/mailing-list-api/data_stream.go b/cmd/mailing-list-api/data_stream.go index 4f84583..c4db021 100644 --- a/cmd/mailing-list-api/data_stream.go +++ b/cmd/mailing-list-api/data_stream.go @@ -13,7 +13,6 @@ import ( "time" "github.com/linuxfoundation/lfx-v2-mailing-list-service/cmd/mailing-list-api/eventing" - "github.com/linuxfoundation/lfx-v2-mailing-list-service/cmd/mailing-list-api/eventing/datastream" "github.com/linuxfoundation/lfx-v2-mailing-list-service/cmd/mailing-list-api/service" infraNATS "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/infrastructure/nats" "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" @@ -32,13 +31,7 @@ func handleDataStream(ctx context.Context, wg *sync.WaitGroup) error { natsClient := service.GetNATSClient(ctx) - mappingsKV, err := natsClient.KeyValue(ctx, constants.KVBucketNameV1Mappings) - if err != nil { - return fmt.Errorf("failed to access %s KV bucket: %w", constants.KVBucketNameV1Mappings, err) - } - - publisher := service.MessagePublisher(ctx) - handler := datastream.NewEventHandler(publisher, mappingsKV) + handler := eventing.NewEventHandler(service.MessagePublisher(ctx), service.MappingReaderWriter(ctx)) streamConsumer := infraNATS.NewDataStreamConsumer(handler) cfg := dataStreamConfig() diff --git a/cmd/mailing-list-api/eventing/datastream/mapping.go b/cmd/mailing-list-api/eventing/datastream/mapping.go deleted file mode 100644 index 53a0194..0000000 --- a/cmd/mailing-list-api/eventing/datastream/mapping.go +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright The Linux Foundation and each contributor to LFX. -// SPDX-License-Identifier: MIT - -package datastream - -import ( - "context" - "fmt" - "log/slog" - - "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" - "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" - "github.com/nats-io/nats.go/jetstream" -) - -// buildMappingKey returns the v1-mappings KV key for a given prefix and UID. -func buildMappingKey(prefix, uid string) string { - return fmt.Sprintf("%s.%s", prefix, uid) -} - -// resolveAction queries the v1-mappings KV bucket to decide whether this event -// represents a create or an update. -// -// A missing key or a tombstone ("!del") entry both yield ActionCreated — the -// former because the record has never been seen, the latter because the entity -// was previously deleted and is now being re-created. -func resolveAction(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKey string) model.MessageAction { - entry, err := mappingsKV.Get(ctx, mappingKey) - if err != nil || entry == nil { - return model.ActionCreated - } - if string(entry.Value()) == constants.KVTombstoneMarker { - return model.ActionCreated - } - return model.ActionUpdated -} - -// isMappingPresent reports whether a mapping key exists and is not tombstoned. -// Used for parent dependency checks (service before subgroup, subgroup before member). -func isMappingPresent(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKey string) bool { - entry, err := mappingsKV.Get(ctx, mappingKey) - if err != nil || entry == nil { - slog.WarnContext(ctx, "mapping key not found in v1-mappings", "mapping_key", mappingKey) - return false - } - return string(entry.Value()) != constants.KVTombstoneMarker -} - -// isTombstoned reports whether the v1-mappings entry for mappingKey has already -// been marked as deleted, so that duplicate delete events can be skipped. -func isTombstoned(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKey string) bool { - entry, err := mappingsKV.Get(ctx, mappingKey) - if err != nil || entry == nil { - slog.WarnContext(ctx, "mapping key not found in v1-mappings - treating as not tombstoned", "mapping_key", mappingKey) - return false - } - return string(entry.Value()) == constants.KVTombstoneMarker -} - -// putMapping records that uid has been successfully processed so subsequent -// events for the same key are treated as updates rather than creates. -func putMapping(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKey, uid string) { - _, errPut := mappingsKV.Put(ctx, mappingKey, []byte(uid)) - if errPut != nil { - slog.ErrorContext(ctx, "failed to put mapping key", "mapping_key", mappingKey, "error", errPut) - } -} - -// putTombstone writes the deletion marker into v1-mappings to prevent duplicate -// processing of the same delete on consumer redelivery. -func putTombstone(ctx context.Context, mappingsKV jetstream.KeyValue, mappingKey string) { - _, errPut := mappingsKV.Put(ctx, mappingKey, []byte(constants.KVTombstoneMarker)) - if errPut != nil { - slog.ErrorContext(ctx, "failed to put tombstone", "mapping_key", mappingKey, "error", errPut) - } -} diff --git a/cmd/mailing-list-api/eventing/datastream/handler.go b/cmd/mailing-list-api/eventing/handler.go similarity index 63% rename from cmd/mailing-list-api/eventing/datastream/handler.go rename to cmd/mailing-list-api/eventing/handler.go index aab1934..242629b 100644 --- a/cmd/mailing-list-api/eventing/datastream/handler.go +++ b/cmd/mailing-list-api/eventing/handler.go @@ -1,9 +1,7 @@ // Copyright The Linux Foundation and each contributor to LFX. // SPDX-License-Identifier: MIT -// Package datastream implements port.DataEventHandler for GroupsIO v1 DynamoDB -// change events sourced from the lfx-v1-sync-helper pipeline. -package datastream +package eventing import ( "context" @@ -11,7 +9,7 @@ import ( "strings" "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/port" - "github.com/nats-io/nats.go/jetstream" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/service" ) const ( @@ -27,17 +25,17 @@ const ( // eventHandler implements port.DataEventHandler and routes KV events to the // appropriate per-entity handler based on the key prefix. type eventHandler struct { - publisher port.MessagePublisher - mappingsKV jetstream.KeyValue + publisher port.MessagePublisher + mappings port.MappingReaderWriter } // NewEventHandler constructs a DataEventHandler for GroupsIO entities. // publisher is used to emit indexer and access control messages. -// mappingsKV is the v1-mappings KV bucket used for idempotency tracking. -func NewEventHandler(publisher port.MessagePublisher, mappingsKV jetstream.KeyValue) port.DataEventHandler { +// mappings is the v1-mappings abstraction used for idempotency tracking. +func NewEventHandler(publisher port.MessagePublisher, mappings port.MappingReaderWriter) port.DataEventHandler { return &eventHandler{ - publisher: publisher, - mappingsKV: mappingsKV, + publisher: publisher, + mappings: mappings, } } @@ -50,23 +48,23 @@ func (h *eventHandler) HandleChange(ctx context.Context, key string, data map[st case strings.HasPrefix(key, kvPrefixService): uid := key[len(kvPrefixService):] if isSoftDelete { - return handleServiceDelete(ctx, uid, h.publisher, h.mappingsKV) + return service.HandleDataStreamServiceDelete(ctx, uid, h.publisher, h.mappings) } - return handleServiceUpdate(ctx, uid, data, h.publisher, h.mappingsKV) + return service.HandleDataStreamServiceUpdate(ctx, uid, data, h.publisher, h.mappings) case strings.HasPrefix(key, kvPrefixSubgroup): uid := key[len(kvPrefixSubgroup):] if isSoftDelete { - return handleSubgroupDelete(ctx, uid, h.publisher, h.mappingsKV) + return service.HandleDataStreamSubgroupDelete(ctx, uid, h.publisher, h.mappings) } - return handleSubgroupUpdate(ctx, uid, data, h.publisher, h.mappingsKV) + return service.HandleDataStreamSubgroupUpdate(ctx, uid, data, h.publisher, h.mappings) case strings.HasPrefix(key, kvPrefixMember): uid := key[len(kvPrefixMember):] if isSoftDelete { - return handleMemberDelete(ctx, uid, h.publisher, h.mappingsKV) + return service.HandleDataStreamMemberDelete(ctx, uid, h.publisher, h.mappings) } - return handleMemberUpdate(ctx, uid, data, h.publisher, h.mappingsKV) + return service.HandleDataStreamMemberUpdate(ctx, uid, data, h.publisher, h.mappings) default: slog.WarnContext(ctx, "unrecognized KV key prefix in HandleChange, ACKing", "key", key) @@ -78,13 +76,13 @@ func (h *eventHandler) HandleChange(ctx context.Context, key string, data map[st func (h *eventHandler) HandleRemoval(ctx context.Context, key string) bool { switch { case strings.HasPrefix(key, kvPrefixService): - return handleServiceDelete(ctx, key[len(kvPrefixService):], h.publisher, h.mappingsKV) + return service.HandleDataStreamServiceDelete(ctx, key[len(kvPrefixService):], h.publisher, h.mappings) case strings.HasPrefix(key, kvPrefixSubgroup): - return handleSubgroupDelete(ctx, key[len(kvPrefixSubgroup):], h.publisher, h.mappingsKV) + return service.HandleDataStreamSubgroupDelete(ctx, key[len(kvPrefixSubgroup):], h.publisher, h.mappings) case strings.HasPrefix(key, kvPrefixMember): - return handleMemberDelete(ctx, key[len(kvPrefixMember):], h.publisher, h.mappingsKV) + return service.HandleDataStreamMemberDelete(ctx, key[len(kvPrefixMember):], h.publisher, h.mappings) default: slog.WarnContext(ctx, "unrecognized KV key prefix in HandleRemoval, ACKing", "key", key) diff --git a/cmd/mailing-list-api/service/providers.go b/cmd/mailing-list-api/service/providers.go index f3f79dc..0dd1407 100644 --- a/cmd/mailing-list-api/service/providers.go +++ b/cmd/mailing-list-api/service/providers.go @@ -18,6 +18,7 @@ import ( infrastructure "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/infrastructure/mock" "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/infrastructure/nats" "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/service" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" ) var ( @@ -498,3 +499,14 @@ func GetNATSClient(ctx context.Context) *nats.NATSClient { } return storageImpl.Client() } + +// MappingReaderWriter initializes the v1-mappings KV abstraction used by the +// data stream event handler for idempotency tracking. +func MappingReaderWriter(ctx context.Context) port.MappingReaderWriter { + client := GetNATSClient(ctx) + kv, err := client.KeyValue(ctx, constants.KVBucketNameV1Mappings) + if err != nil { + log.Fatalf("failed to access %s KV bucket: %v", constants.KVBucketNameV1Mappings, err) + } + return nats.NewMappingReaderWriter(kv) +} diff --git a/docs/event-processing.md b/docs/event-processing.md new file mode 100644 index 0000000..abca446 --- /dev/null +++ b/docs/event-processing.md @@ -0,0 +1,372 @@ +# Event Processing + +## Overview + +The mailing list service implements a NATS JetStream KV-bucket event processor that syncs GroupsIO entities (services, mailing lists, and members) from v1 DynamoDB into v2 — enabling real-time data synchronization without manual intervention. + +The pipeline is driven by the `lfx-v1-sync-helper` component, which writes DynamoDB change events into the `v1-objects` KV bucket. The mailing list service consumes those events, transforms them into v2 domain models, and publishes the results to the indexer and access-control (FGA-sync) services. + +--- + +## Architecture + +### Components + +```mermaid +graph TD + DDB[DynamoDB v1] + SH[lfx-v1-sync-helper] + KV1[NATS KV: v1-objects] + EP[EventProcessor
mailing-list-service] + KV2[NATS KV: v1-mappings
idempotency store] + IDX[Indexer Service
OpenSearch] + FGA[FGA-Sync Service
OpenFGA] + + DDB -->|CDC stream| SH + SH -->|PUT/DEL to KV| KV1 + KV1 -->|JetStream consumer| EP + EP <-->|read/write mappings| KV2 + EP -->|index messages| IDX + EP -->|access messages| FGA +``` + +### Key Prefix → Entity Mapping + +| KV Key Prefix | Entity Type | +|---|---| +| `itx-groupsio-v2-service.` | GroupsIO Service | +| `itx-groupsio-v2-subgroup.` | Mailing List (subgroup) | +| `itx-groupsio-v2-member.` | Member | + +--- + +## Event Flow + +### Create / Update Flow + +```mermaid +sequenceDiagram + participant KV as v1-objects KV + participant EP as EventProcessor + participant MP as v1-mappings KV + participant IDX as Indexer + participant FGA as FGA-Sync + + KV->>EP: PUT event (key + payload) + EP->>EP: Strip $KV prefix → bare key + EP->>EP: Decode JSON/msgpack payload + EP->>MP: resolveAction(key)
missing/tombstone → Created
present → Updated + EP->>EP: transformTo(data) + EP->>IDX: Publish IndexerMessage (Created|Updated) + EP->>FGA: Publish AccessMessage (update_access) + EP->>MP: putMapping(key, uid) +``` + +### Delete Flow + +Deletes arrive in two forms: + +- **Soft delete** — a regular PUT payload that contains the `_sdc_deleted_at` field (injected by `lfx-v1-sync-helper` on DynamoDB REMOVE events). +- **Hard delete / PURGE** — a KV message with the `Kv-Operation: DEL` or `Kv-Operation: PURGE` header. + +```mermaid +sequenceDiagram + participant KV as v1-objects KV + participant EP as EventProcessor + participant MP as v1-mappings KV + participant IDX as Indexer + participant FGA as FGA-Sync + + KV->>EP: DEL/PURGE event (or PUT with _sdc_deleted_at) + EP->>MP: isTombstoned(key)? + alt already tombstoned + EP->>EP: ACK (duplicate, skip) + else not tombstoned + EP->>IDX: Publish IndexerMessage (Deleted) + EP->>FGA: Publish AccessMessage (delete_all_access) + EP->>MP: putTombstone(key) + end +``` + +### Parent Dependency (Ordering Guarantee) + +To avoid orphaned documents in OpenSearch, child entities wait for their parent to be processed first: + +```mermaid +graph LR + S[Service
itx-groupsio-v2-service] --> SG[Subgroup / Mailing List
itx-groupsio-v2-subgroup] + SG --> M[Member
itx-groupsio-v2-member] +``` + +- A **subgroup** event is NAK'd with backoff if the parent service mapping is absent from `v1-mappings`. +- A **member** event is NAK'd with backoff if the parent subgroup mapping is absent from `v1-mappings`. + +### Reverse Index (group_id → UID) + +Members store a `group_id` (Groups.io numeric ID) rather than the v2 `mailing_list_uid`. When the subgroup handler successfully processes a mailing list, it writes a reverse index entry: + +``` +v1-mappings key: groupsio-subgroup-gid. +value: +``` + +The member handler reads this entry to resolve the parent `MailingListUID` before building the indexer message. + +--- + +## Data Transformation + +### Service (`GrpsIOService`) + +| v1 DynamoDB field | v2 model field | +|---|---| +| `group_service_type` | `Type` | +| `domain` | `Domain` | +| `group_id` | `GroupID` | +| `prefix` | `Prefix` | +| `project_id` | `ProjectUID` | +| `last_modified_at` | `UpdatedAt` | +| _(hardcoded)_ | `Source = "v1-sync"` | + +### Mailing List / Subgroup (`GrpsIOMailingList`) + +| v1 DynamoDB field | v2 model field | +|---|---| +| `group_id` | `GroupID` | +| `group_name` | `GroupName` | +| `visibility == "Public"` | `Public` | +| `type` | `Type` | +| `description` | `Description` | +| `subject_tag` | `SubjectTag` | +| `parent_id` | `ServiceUID` | +| `project_id` | `ProjectUID` | +| `committee` | `Committees[0].UID` | +| `committee_filters` | `Committees[0].AllowedVotingStatuses` | +| `last_modified_at` | `UpdatedAt` | +| _(hardcoded)_ | `Source = "v1-sync"` | + +### Member (`GrpsIOMember`) + +| v1 DynamoDB field | v2 model field | +|---|---| +| `member_id` | `MemberID` | +| `group_id` | `GroupID` | +| `full_name` (split on first space) | `FirstName`, `LastName` | +| `email` | `Email` | +| `organization` | `Organization` | +| `job_title` | `JobTitle` | +| `member_type` | `MemberType` | +| `delivery_mode` | `DeliveryMode` | +| `mod_status` | `ModStatus` | +| `status` | `Status` | +| `created_at` | `CreatedAt` | +| `last_modified_at` | `UpdatedAt` | +| _(resolved from reverse index)_ | `MailingListUID` | +| _(hardcoded)_ | `Source = "v1-sync"` | + +> Note: Members do **not** publish a separate FGA access message — access is inherited from the parent mailing list's access record. + +--- + +## NATS Subjects Published + +| Entity | Action | Subject | +|---|---|---| +| Service | Created / Updated | `lfx.index.groupsio_service` | +| Service | Created / Updated | `lfx.update_access.groupsio_service` | +| Service | Deleted | `lfx.index.groupsio_service` | +| Service | Deleted | `lfx.delete_all_access.groupsio_service` | +| Mailing List | Created / Updated | `lfx.index.groupsio_mailing_list` | +| Mailing List | Created / Updated | `lfx.update_access.groupsio_mailing_list` | +| Mailing List | Deleted | `lfx.index.groupsio_mailing_list` | +| Mailing List | Deleted | `lfx.delete_all_access.groupsio_mailing_list` | +| Member | Created / Updated | `lfx.index.groupsio_member` | +| Member | Deleted | `lfx.index.groupsio_member` | + +--- + +## Deduplication + +The `v1-mappings` KV bucket tracks processing state for each entity: + +| State | Key Pattern | Value | +|---|---|---| +| Synced (service) | `groupsio-service.` | `` | +| Synced (subgroup) | `groupsio-subgroup.` | `` | +| Synced (member) | `groupsio-member.` | `` | +| Reverse index | `groupsio-subgroup-gid.` | `` | +| Deleted (tombstone) | any of the above | `!del` | + +On consumer redelivery, tombstone markers prevent duplicate downstream operations. Missing keys and tombstoned entries are both treated as "never seen" for create-vs-update resolution. + +--- + +## Configuration + +### Environment Variables + +| Variable | Default | Description | +|---|---|---| +| `EVENTING_ENABLED` | _(unset)_ | Set to `true` to enable the data stream processor | +| `EVENTING_CONSUMER_NAME` | `mailing-list-service-kv-consumer` | Durable JetStream consumer name | +| `EVENTING_MAX_DELIVER` | `3` | Maximum delivery attempts before giving up | +| `EVENTING_ACK_WAIT_SECS` | `30` | Seconds the server waits for ACK before redelivering | +| `EVENTING_MAX_ACK_PENDING` | `1000` | Maximum in-flight unacknowledged messages | +| `NATS_URL` | `nats://lfx-platform-nats.lfx.svc.cluster.local:4222` | NATS server connection URL | + +### Consumer Configuration + +| Setting | Value | +|---|---| +| Delivery Policy | `DeliverLastPerSubjectPolicy` (resumes from last seen record per key after restart) | +| Ack Policy | `AckExplicitPolicy` (explicit ACK required) | +| Filter Subjects | `$KV.v1-objects.itx-groupsio-v2-service.>`, `$KV.v1-objects.itx-groupsio-v2-subgroup.>`, `$KV.v1-objects.itx-groupsio-v2-member.>` | +| Stream | `KV_v1-objects` | + +--- + +## Error Handling + +### Transient Errors (NAK — retry) + +The handler returns `true` (NAK) for: +- Parent mapping absent (subgroup waiting for service; member waiting for subgroup) +- Transient publish failures to indexer or FGA-sync (as determined by `pkgerrors.IsTransient`) + +The consumer redelivers after the `AckWait` backoff, up to `MaxDeliver` times. + +### Permanent Errors (ACK — skip) + +The handler returns `false` (ACK) for: +- Unrecognised KV key prefix +- Missing required fields (e.g., member with no `group_id`) +- Message metadata read failure (poison-pill guard) +- Payload decode failure + +These events are logged at `ERROR` level and discarded to prevent the consumer from stalling indefinitely. + +--- + +## Lifecycle + +```mermaid +stateDiagram-v2 + [*] --> Disabled: EVENTING_ENABLED != "true" + [*] --> Starting: EVENTING_ENABLED = "true" + Starting --> Running: Consumer created / resumed + Running --> Stopping: ctx cancelled (SIGTERM / shutdown) + Stopping --> [*]: consumer.Stop() called +``` + +1. **Startup**: `handleDataStream` is called from `main.go` after the NATS client is ready. If `EVENTING_ENABLED` is not `true`, the function is a no-op. +2. **Running**: The processor consumes messages in the background goroutine until context cancellation. +3. **Shutdown**: A second goroutine waits for `ctx.Done()` and calls `processor.Stop()` with a graceful-shutdown timeout. + +--- + +## Operations + +### Enable Event Processing + +```bash +export EVENTING_ENABLED=true +make run +``` + +### Disable Event Processing (e.g., local dev) + +```bash +# Simply omit the variable or set it to anything other than "true" +unset EVENTING_ENABLED +make run +``` + +### Monitoring + +Watch for these log messages: + +| Log message | Meaning | +|---|---| +| `data stream processor started successfully` | Consumer running | +| `data stream processor context cancelled` | Normal shutdown | +| `parent service not yet processed, NAKing subgroup for retry` | Ordering backpressure | +| `parent subgroup not yet processed, NAKing member for retry` | Ordering backpressure | +| `service/subgroup/member already deleted, ACKing duplicate` | Idempotent delete | +| `data stream KV consumer error` | NATS consumer-level error | + +### Troubleshooting + +| Symptom | Action | +|---|---| +| No events processed | Verify `EVENTING_ENABLED=true`, check NATS connectivity, confirm `v1-objects` bucket exists | +| Repeated NAK / ordering failures | Ensure `lfx-v1-sync-helper` is populating the KV bucket in dependency order | +| Duplicate events replayed | Inspect `v1-mappings` bucket for missing tombstones | +| Consumer not progressing | Check downstream indexer / FGA-sync availability; review `EVENTING_MAX_DELIVER` | + +--- + +## Development + +### Code Structure + +``` +cmd/mailing-list-api/ +├── data_stream.go # Startup wiring, env config +└── eventing/ + ├── event_processor.go # JetStream consumer lifecycle + └── handler.go # Key-prefix router (HandleChange / HandleRemoval) + +internal/ +├── domain/port/ +│ └── mapping_store.go # MappingReader / MappingWriter / MappingReaderWriter +├── infrastructure/nats/ +│ └── mapping_store.go # JetStream KV implementation (hides tombstone details) +└── service/ + ├── datastream_service_handler.go # Service transform + publish + ├── datastream_subgroup_handler.go # Mailing list transform + publish + reverse index + └── datastream_member_handler.go # Member transform + publish +``` + +The `MappingReaderWriter` port abstracts all `v1-mappings` KV operations (create-vs-update resolution, parent-dependency checks, tombstone writes) behind domain-meaningful methods. The JetStream KV details — including the `!del` tombstone marker — are encapsulated entirely in `internal/infrastructure/nats/mapping_store.go`. + +### Adding a New Entity Type + +1. Add KV key prefix constant in [eventing/handler.go](../cmd/mailing-list-api/eventing/handler.go) +2. Register the new prefix in the `switch` inside `HandleChange` and `HandleRemoval`, delegating to the new handler functions +3. Create `internal/service/datastream_xxx_handler.go` with `HandleDataStreamXxxUpdate` / `HandleDataStreamXxxDelete` +4. Add `transformV1ToXxx` conversion function in the same file +5. Add mapping prefix constants to [pkg/constants/storage.go](../pkg/constants/storage.go) +6. Add published subject constants to [pkg/constants/subjects.go](../pkg/constants/subjects.go) +7. Write unit tests + +### Testing Locally + +```bash +# Disable event processing for pure API development +unset EVENTING_ENABLED +make run + +# Enable with a local NATS server +export EVENTING_ENABLED=true +export NATS_URL=nats://localhost:4222 +make run + +# Inject a test event manually +nats kv put v1-objects itx-groupsio-v2-service.test-uid '{"group_service_type":"primary","project_id":"proj-1","domain":"groups.io"}' + +# Verify mapping was written +nats kv get v1-mappings groupsio-service.test-uid + +# Run unit tests +go test ./cmd/mailing-list-api/eventing/... ./internal/service/... -v +``` + +--- + +## Related Services + +| Service | Role | +|---|---| +| **lfx-v1-sync-helper** | Bridges DynamoDB CDC into the `v1-objects` NATS KV bucket | +| **Indexer** | Consumes indexer messages and upserts/deletes records in OpenSearch | +| **FGA-Sync** | Consumes access messages and manages OpenFGA relationship tuples | diff --git a/internal/domain/port/mapping_store.go b/internal/domain/port/mapping_store.go new file mode 100644 index 0000000..edd94f5 --- /dev/null +++ b/internal/domain/port/mapping_store.go @@ -0,0 +1,50 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package port + +import ( + "context" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" +) + +// MappingReader abstracts read operations on the v1-mappings KV bucket. +// Implementations hide storage-level details such as tombstone markers and +// key-not-found semantics behind domain-meaningful operations. +type MappingReader interface { + // ResolveAction returns ActionCreated when the key is absent or tombstoned + // (entity never seen, or previously deleted and being re-created), and + // ActionUpdated when a live mapping already exists. + ResolveAction(ctx context.Context, key string) model.MessageAction + + // IsMappingPresent returns true when the key exists and is not tombstoned. + // Used for parent-dependency checks (service before subgroup, subgroup before member). + IsMappingPresent(ctx context.Context, key string) bool + + // IsTombstoned returns true when the key holds the deletion marker, + // so duplicate delete events can be skipped. + IsTombstoned(ctx context.Context, key string) bool + + // GetMappingValue returns the stored value and true when the key exists and + // is not tombstoned. Used when the caller needs the actual value (e.g. the + // reverse group_id → subgroup UID index in the member handler). + GetMappingValue(ctx context.Context, key string) (string, bool) +} + +// MappingWriter abstracts write operations on the v1-mappings KV bucket. +type MappingWriter interface { + // PutMapping records that an entity has been successfully processed so that + // subsequent events for the same key are treated as updates rather than creates. + PutMapping(ctx context.Context, key, value string) error + + // PutTombstone writes the deletion marker to prevent duplicate delete + // processing on consumer redelivery. + PutTombstone(ctx context.Context, key string) error +} + +// MappingReaderWriter combines read and write access to the v1-mappings KV bucket. +type MappingReaderWriter interface { + MappingReader + MappingWriter +} diff --git a/internal/infrastructure/nats/mapping_store.go b/internal/infrastructure/nats/mapping_store.go new file mode 100644 index 0000000..dc0968a --- /dev/null +++ b/internal/infrastructure/nats/mapping_store.go @@ -0,0 +1,76 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package nats + +import ( + "context" + "log/slog" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/port" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" + "github.com/nats-io/nats.go/jetstream" +) + +type natsMappingReaderWriter struct { + kv jetstream.KeyValue +} + +// NewMappingReaderWriter wraps a JetStream KeyValue bucket as a port.MappingReaderWriter. +// All tombstone marker and key-not-found semantics are encapsulated here so the +// service layer remains free of storage-level concerns. +func NewMappingReaderWriter(kv jetstream.KeyValue) port.MappingReaderWriter { + return &natsMappingReaderWriter{kv: kv} +} + +func (m *natsMappingReaderWriter) ResolveAction(ctx context.Context, key string) model.MessageAction { + entry, err := m.kv.Get(ctx, key) + if err != nil || entry == nil { + return model.ActionCreated + } + if string(entry.Value()) == constants.KVTombstoneMarker { + return model.ActionCreated + } + return model.ActionUpdated +} + +func (m *natsMappingReaderWriter) IsMappingPresent(ctx context.Context, key string) bool { + entry, err := m.kv.Get(ctx, key) + if err != nil || entry == nil { + slog.WarnContext(ctx, "mapping key not found in v1-mappings", "mapping_key", key) + return false + } + return string(entry.Value()) != constants.KVTombstoneMarker +} + +func (m *natsMappingReaderWriter) IsTombstoned(ctx context.Context, key string) bool { + entry, err := m.kv.Get(ctx, key) + if err != nil || entry == nil { + slog.WarnContext(ctx, "mapping key not found in v1-mappings - treating as not tombstoned", "mapping_key", key) + return false + } + return string(entry.Value()) == constants.KVTombstoneMarker +} + +func (m *natsMappingReaderWriter) GetMappingValue(ctx context.Context, key string) (string, bool) { + entry, err := m.kv.Get(ctx, key) + if err != nil || entry == nil { + return "", false + } + val := string(entry.Value()) + if val == constants.KVTombstoneMarker { + return "", false + } + return val, true +} + +func (m *natsMappingReaderWriter) PutMapping(ctx context.Context, key, value string) error { + _, err := m.kv.Put(ctx, key, []byte(value)) + return err +} + +func (m *natsMappingReaderWriter) PutTombstone(ctx context.Context, key string) error { + _, err := m.kv.Put(ctx, key, []byte(constants.KVTombstoneMarker)) + return err +} diff --git a/cmd/mailing-list-api/eventing/datastream/member_handler.go b/internal/service/datastream_member_handler.go similarity index 70% rename from cmd/mailing-list-api/eventing/datastream/member_handler.go rename to internal/service/datastream_member_handler.go index f5cfb3e..f0f26be 100644 --- a/cmd/mailing-list-api/eventing/datastream/member_handler.go +++ b/internal/service/datastream_member_handler.go @@ -1,7 +1,7 @@ // Copyright The Linux Foundation and each contributor to LFX. // SPDX-License-Identifier: MIT -package datastream +package service import ( "context" @@ -15,16 +15,15 @@ import ( "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" pkgerrors "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/errors" "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/mapconv" - "github.com/nats-io/nats.go/jetstream" ) -// handleMemberUpdate transforms the v1 payload into a GrpsIOMember and publishes an +// HandleDataStreamMemberUpdate transforms the v1 payload into a GrpsIOMember and publishes an // indexer message. Returns true to NAK when the parent subgroup mapping is absent // (ordering guarantee) or on transient errors. // // No FGA access message is published — member access is inherited from the parent // mailing list's access record. -func handleMemberUpdate(ctx context.Context, uid string, data map[string]any, publisher port.MessagePublisher, mappingsKV jetstream.KeyValue) bool { +func HandleDataStreamMemberUpdate(ctx context.Context, uid string, data map[string]any, publisher port.MessagePublisher, mappings port.MappingReaderWriter) bool { // Members carry group_id (Groups.io numeric ID) rather than a direct mailing_list_uid. // Resolve the parent subgroup UID via the reverse index written by the subgroup handler. groupID := mapconv.Int64Ptr(data, "group_id") @@ -33,19 +32,18 @@ func handleMemberUpdate(ctx context.Context, uid string, data map[string]any, pu return false } - gidKey := buildMappingKey(constants.KVMappingPrefixSubgroupByGroupID, fmt.Sprintf("%d", *groupID)) - gidEntry, err := mappingsKV.Get(ctx, gidKey) - if err != nil || gidEntry == nil || string(gidEntry.Value()) == constants.KVTombstoneMarker { + gidKey := fmt.Sprintf("%s.%d", constants.KVMappingPrefixSubgroupByGroupID, *groupID) + mailingListUID, ok := mappings.GetMappingValue(ctx, gidKey) + if !ok { slog.WarnContext(ctx, "parent subgroup not yet processed, NAKing member for retry", "uid", uid, "group_id", *groupID) return true // NAK — retry with backoff } - mailingListUID := string(gidEntry.Value()) - member := transformToGrpsIOMember(uid, mailingListUID, data) + member := transformV1ToGrpsIOMember(uid, mailingListUID, data) - mKey := buildMappingKey(constants.KVMappingPrefixMember, uid) - action := resolveAction(ctx, mappingsKV, mKey) + mKey := fmt.Sprintf("%s.%s", constants.KVMappingPrefixMember, uid) + action := mappings.ResolveAction(ctx, mKey) msg := &model.IndexerMessage{Action: action, Tags: member.Tags()} built, err := msg.Build(ctx, member) @@ -59,15 +57,17 @@ func handleMemberUpdate(ctx context.Context, uid string, data map[string]any, pu return pkgerrors.IsTransient(err) } - putMapping(ctx, mappingsKV, mKey, uid) + if err := mappings.PutMapping(ctx, mKey, uid); err != nil { + slog.ErrorContext(ctx, "failed to put mapping key", "mapping_key", mKey, "error", err) + } return false } -// handleMemberDelete publishes a delete indexer message and tombstones the mapping. -func handleMemberDelete(ctx context.Context, uid string, publisher port.MessagePublisher, mappingsKV jetstream.KeyValue) bool { - mKey := buildMappingKey(constants.KVMappingPrefixMember, uid) +// HandleDataStreamMemberDelete publishes a delete indexer message and tombstones the mapping. +func HandleDataStreamMemberDelete(ctx context.Context, uid string, publisher port.MessagePublisher, mappings port.MappingReaderWriter) bool { + mKey := fmt.Sprintf("%s.%s", constants.KVMappingPrefixMember, uid) - if isTombstoned(ctx, mappingsKV, mKey) { + if mappings.IsTombstoned(ctx, mKey) { slog.InfoContext(ctx, "member already deleted, ACKing duplicate", "uid", uid) return false } @@ -84,19 +84,15 @@ func handleMemberDelete(ctx context.Context, uid string, publisher port.MessageP return pkgerrors.IsTransient(err) } - putTombstone(ctx, mappingsKV, mKey) + if err := mappings.PutTombstone(ctx, mKey); err != nil { + slog.ErrorContext(ctx, "failed to put tombstone", "mapping_key", mKey, "error", err) + } return false } -// transformToGrpsIOMember maps v1 DynamoDB fields to the GrpsIOMember domain model. +// transformV1ToGrpsIOMember maps v1 DynamoDB fields to the GrpsIOMember domain model. // mailingListUID is resolved from the reverse group_id index before calling this function. -// -// DynamoDB fields available: member_id, committee_id, created_at, created_by, -// delivery_mode, delivery_mode_list, email, full_name, group_id, groups_email, -// groups_full_name, job_title, last_modified_at, last_modified_by, -// last_system_modified_at, member_type, mod_status, organization, status, -// sync_status, user_id. -func transformToGrpsIOMember(uid, mailingListUID string, data map[string]any) *model.GrpsIOMember { +func transformV1ToGrpsIOMember(uid, mailingListUID string, data map[string]any) *model.GrpsIOMember { firstName, lastName := splitFullName(mapconv.StringVal(data, "full_name")) member := &model.GrpsIOMember{ diff --git a/cmd/mailing-list-api/eventing/datastream/service_handler.go b/internal/service/datastream_service_handler.go similarity index 57% rename from cmd/mailing-list-api/eventing/datastream/service_handler.go rename to internal/service/datastream_service_handler.go index c17b4f0..942cce3 100644 --- a/cmd/mailing-list-api/eventing/datastream/service_handler.go +++ b/internal/service/datastream_service_handler.go @@ -1,10 +1,11 @@ // Copyright The Linux Foundation and each contributor to LFX. // SPDX-License-Identifier: MIT -package datastream +package service import ( "context" + "fmt" "log/slog" "time" @@ -13,15 +14,25 @@ import ( "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" pkgerrors "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/errors" "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/mapconv" - "github.com/nats-io/nats.go/jetstream" ) -// handleServiceUpdate transforms the v1 payload into a GrpsIOService and publishes +// HandleDataStreamServiceUpdate transforms the v1 payload into a GrpsIOService and publishes // indexer + access control messages. Returns true to NAK on transient errors. -func handleServiceUpdate(ctx context.Context, uid string, data map[string]any, publisher port.MessagePublisher, mappingsKV jetstream.KeyValue) bool { - svc := transformToGrpsIOService(uid, data) - mKey := buildMappingKey(constants.KVMappingPrefixService, uid) - action := resolveAction(ctx, mappingsKV, mKey) +func HandleDataStreamServiceUpdate(ctx context.Context, uid string, data map[string]any, publisher port.MessagePublisher, mappings port.MappingReaderWriter) bool { + // Resolve v1 project SFID → v2 project UID via the shared project.sfid.{sfid} mapping + // written by lfx-v1-sync-helper. NAK if the project hasn't been processed yet. + projectSFID := mapconv.StringVal(data, "project_id") + projectUID, ok := mappings.GetMappingValue(ctx, fmt.Sprintf("%s.%s", constants.KVMappingPrefixProjectBySFID, projectSFID)) + if !ok { + slog.WarnContext(ctx, "project mapping not yet available, NAKing service for retry", + "uid", uid, "project_sfid", projectSFID) + return true // NAK — retry with backoff + } + data["project_id"] = projectUID + + svc := transformV1ToGrpsIOService(uid, data) + mKey := fmt.Sprintf("%s.%s", constants.KVMappingPrefixService, uid) + action := mappings.ResolveAction(ctx, mKey) msg := &model.IndexerMessage{Action: action, Tags: svc.Tags()} built, err := msg.Build(ctx, svc) @@ -37,24 +48,28 @@ func handleServiceUpdate(ctx context.Context, uid string, data map[string]any, p accessMsg := &model.AccessMessage{ UID: uid, - ObjectType: "groupsio_service", + ObjectType: constants.ObjectTypeGroupsIOService, Public: svc.Public, - References: map[string][]string{"project": {svc.ProjectUID}}, + References: map[string][]string{ + constants.RelationProject: {svc.ProjectUID}, + }, } if err := publisher.Access(ctx, constants.UpdateAccessGroupsIOServiceSubject, accessMsg); err != nil { slog.WarnContext(ctx, "failed to publish service access message", "uid", uid, "error", err) } - putMapping(ctx, mappingsKV, mKey, uid) + if err := mappings.PutMapping(ctx, mKey, uid); err != nil { + slog.ErrorContext(ctx, "failed to put mapping key", "mapping_key", mKey, "error", err) + } return false } -// handleServiceDelete publishes a delete indexer message and tombstones the mapping. +// HandleDataStreamServiceDelete publishes a delete indexer message and tombstones the mapping. // Returns true to NAK on transient errors. -func handleServiceDelete(ctx context.Context, uid string, publisher port.MessagePublisher, mappingsKV jetstream.KeyValue) bool { - mKey := buildMappingKey(constants.KVMappingPrefixService, uid) +func HandleDataStreamServiceDelete(ctx context.Context, uid string, publisher port.MessagePublisher, mappings port.MappingReaderWriter) bool { + mKey := fmt.Sprintf("%s.%s", constants.KVMappingPrefixService, uid) - if isTombstoned(ctx, mappingsKV, mKey) { + if mappings.IsTombstoned(ctx, mKey) { slog.InfoContext(ctx, "service already deleted, ACKing duplicate", "uid", uid) return false } @@ -71,18 +86,20 @@ func handleServiceDelete(ctx context.Context, uid string, publisher port.Message return pkgerrors.IsTransient(err) } - accessMsg := &model.AccessMessage{UID: uid, ObjectType: "groupsio_service"} + accessMsg := &model.AccessMessage{UID: uid, ObjectType: constants.ObjectTypeGroupsIOService} if err := publisher.Access(ctx, constants.DeleteAllAccessGroupsIOServiceSubject, accessMsg); err != nil { slog.WarnContext(ctx, "failed to publish service delete access message", "uid", uid, "error", err) } - putTombstone(ctx, mappingsKV, mKey) + if err := mappings.PutTombstone(ctx, mKey); err != nil { + slog.ErrorContext(ctx, "failed to put tombstone", "mapping_key", mKey, "error", err) + } return false } -// transformToGrpsIOService maps v1 DynamoDB fields to the GrpsIOService domain model. +// transformV1ToGrpsIOService maps v1 DynamoDB fields to the GrpsIOService domain model. // Source is always "v1-sync" to distinguish these from API-created records. -func transformToGrpsIOService(uid string, data map[string]any) *model.GrpsIOService { +func transformV1ToGrpsIOService(uid string, data map[string]any) *model.GrpsIOService { svc := &model.GrpsIOService{ UID: uid, Type: mapconv.StringVal(data, "group_service_type"), diff --git a/cmd/mailing-list-api/eventing/datastream/subgroup_handler.go b/internal/service/datastream_subgroup_handler.go similarity index 51% rename from cmd/mailing-list-api/eventing/datastream/subgroup_handler.go rename to internal/service/datastream_subgroup_handler.go index 3f7ad78..fb841b3 100644 --- a/cmd/mailing-list-api/eventing/datastream/subgroup_handler.go +++ b/internal/service/datastream_subgroup_handler.go @@ -1,7 +1,7 @@ // Copyright The Linux Foundation and each contributor to LFX. // SPDX-License-Identifier: MIT -package datastream +package service import ( "context" @@ -14,26 +14,48 @@ import ( "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" pkgerrors "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/errors" "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/mapconv" - "github.com/nats-io/nats.go/jetstream" ) -// handleSubgroupUpdate transforms the v1 payload into a GrpsIOMailingList and publishes +// HandleDataStreamSubgroupUpdate transforms the v1 payload into a GrpsIOMailingList and publishes // indexer + access control messages. Returns true to NAK when the parent service mapping // is absent (ordering guarantee) or on transient errors. -func handleSubgroupUpdate(ctx context.Context, uid string, data map[string]any, publisher port.MessagePublisher, mappingsKV jetstream.KeyValue) bool { - list := transformToGrpsIOMailingList(uid, data) +func HandleDataStreamSubgroupUpdate(ctx context.Context, uid string, data map[string]any, publisher port.MessagePublisher, mappings port.MappingReaderWriter) bool { + // Resolve v1 project SFID → v2 project UID via the shared project.sfid.{sfid} mapping + // written by lfx-v1-sync-helper. NAK if the project hasn't been processed yet. + projectSFID := mapconv.StringVal(data, "project_id") + projectUID, ok := mappings.GetMappingValue(ctx, fmt.Sprintf("%s.%s", constants.KVMappingPrefixProjectBySFID, projectSFID)) + if !ok { + slog.WarnContext(ctx, "project mapping not yet available, NAKing subgroup for retry", + "uid", uid, "project_sfid", projectSFID) + return true // NAK — retry with backoff + } + data["project_id"] = projectUID + + // Resolve optional v1 committee SFID → v2 committee UID. NAK if the committee + // has been specified but hasn't been synced yet (ordering guarantee). + if committeeSFID := mapconv.StringVal(data, "committee"); committeeSFID != "" { + committeeUID, ok := mappings.GetMappingValue(ctx, fmt.Sprintf("%s.%s", constants.KVMappingPrefixCommitteeBySFID, committeeSFID)) + if !ok { + slog.WarnContext(ctx, "committee mapping not yet available, NAKing subgroup for retry", + "uid", uid, "committee_sfid", committeeSFID) + return true // NAK — retry with backoff + } + data["committee"] = committeeUID + } + + list := transformV1ToGrpsIOMailingList(uid, data) // Parent dependency check: the indexer must have the parent service record before // the child mailing list to avoid orphaned documents in OpenSearch. - serviceKey := buildMappingKey(constants.KVMappingPrefixService, list.ServiceUID) - if !isMappingPresent(ctx, mappingsKV, serviceKey) { + serviceKey := fmt.Sprintf("%s.%s", constants.KVMappingPrefixService, list.ServiceUID) + if !mappings.IsMappingPresent(ctx, serviceKey) { slog.WarnContext(ctx, "parent service not yet processed, NAKing subgroup for retry", "uid", uid, "service_uid", list.ServiceUID) return true // NAK — retry with backoff } - mKey := buildMappingKey(constants.KVMappingPrefixSubgroup, uid) - action := resolveAction(ctx, mappingsKV, mKey) + mKey := fmt.Sprintf("%s.%s", constants.KVMappingPrefixSubgroup, uid) + action := mappings.ResolveAction(ctx, mKey) msg := &model.IndexerMessage{Action: action, Tags: list.Tags()} built, err := msg.Build(ctx, list) @@ -47,35 +69,45 @@ func handleSubgroupUpdate(ctx context.Context, uid string, data map[string]any, return pkgerrors.IsTransient(err) } + references := map[string][]string{ + // Project access is inherited through the service — only service reference needed. + constants.RelationGroupsIOService: {list.ServiceUID}, + } + for _, committee := range list.Committees { + if committee.UID != "" { + references[constants.RelationCommittee] = append(references[constants.RelationCommittee], committee.UID) + } + } accessMsg := &model.AccessMessage{ UID: uid, - ObjectType: "groupsio_mailing_list", + ObjectType: constants.ObjectTypeGroupsIOMailingList, Public: list.Public, - References: map[string][]string{ - "project": {list.ProjectUID}, - "groupsio_service": {list.ServiceUID}, - }, + References: references, } if err := publisher.Access(ctx, constants.UpdateAccessGroupsIOMailingListSubject, accessMsg); err != nil { slog.WarnContext(ctx, "failed to publish subgroup access message", "uid", uid, "error", err) } - putMapping(ctx, mappingsKV, mKey, uid) + if err := mappings.PutMapping(ctx, mKey, uid); err != nil { + slog.ErrorContext(ctx, "failed to put mapping key", "mapping_key", mKey, "error", err) + } // Store reverse index: group_id → subgroup UID so member events can resolve MailingListUID. if list.GroupID != nil { - gidKey := buildMappingKey(constants.KVMappingPrefixSubgroupByGroupID, fmt.Sprintf("%d", *list.GroupID)) - putMapping(ctx, mappingsKV, gidKey, uid) + gidKey := fmt.Sprintf("%s.%d", constants.KVMappingPrefixSubgroupByGroupID, *list.GroupID) + if err := mappings.PutMapping(ctx, gidKey, uid); err != nil { + slog.ErrorContext(ctx, "failed to put mapping key", "mapping_key", gidKey, "error", err) + } } return false } -// handleSubgroupDelete publishes a delete indexer message and tombstones the mapping. -func handleSubgroupDelete(ctx context.Context, uid string, publisher port.MessagePublisher, mappingsKV jetstream.KeyValue) bool { - mKey := buildMappingKey(constants.KVMappingPrefixSubgroup, uid) +// HandleDataStreamSubgroupDelete publishes a delete indexer message and tombstones the mapping. +func HandleDataStreamSubgroupDelete(ctx context.Context, uid string, publisher port.MessagePublisher, mappings port.MappingReaderWriter) bool { + mKey := fmt.Sprintf("%s.%s", constants.KVMappingPrefixSubgroup, uid) - if isTombstoned(ctx, mappingsKV, mKey) { + if mappings.IsTombstoned(ctx, mKey) { slog.InfoContext(ctx, "subgroup already deleted, ACKing duplicate", "uid", uid) return false } @@ -92,17 +124,19 @@ func handleSubgroupDelete(ctx context.Context, uid string, publisher port.Messag return pkgerrors.IsTransient(err) } - accessMsg := &model.AccessMessage{UID: uid, ObjectType: "groupsio_mailing_list"} + accessMsg := &model.AccessMessage{UID: uid, ObjectType: constants.ObjectTypeGroupsIOMailingList} if err := publisher.Access(ctx, constants.DeleteAllAccessGroupsIOMailingListSubject, accessMsg); err != nil { slog.WarnContext(ctx, "failed to publish subgroup delete access message", "uid", uid, "error", err) } - putTombstone(ctx, mappingsKV, mKey) + if err := mappings.PutTombstone(ctx, mKey); err != nil { + slog.ErrorContext(ctx, "failed to put tombstone", "mapping_key", mKey, "error", err) + } return false } -// transformToGrpsIOMailingList maps v1 DynamoDB fields to the GrpsIOMailingList domain model. -func transformToGrpsIOMailingList(uid string, data map[string]any) *model.GrpsIOMailingList { +// transformV1ToGrpsIOMailingList maps v1 DynamoDB fields to the GrpsIOMailingList domain model. +func transformV1ToGrpsIOMailingList(uid string, data map[string]any) *model.GrpsIOMailingList { list := &model.GrpsIOMailingList{ UID: uid, GroupID: mapconv.Int64Ptr(data, "group_id"), diff --git a/pkg/constants/storage.go b/pkg/constants/storage.go index f232b27..8355aad 100644 --- a/pkg/constants/storage.go +++ b/pkg/constants/storage.go @@ -72,6 +72,14 @@ const ( // Written by the subgroup handler so the member handler can resolve MailingListUID from group_id. KVMappingPrefixSubgroupByGroupID = "groupsio-subgroup-gid" + // KVMappingPrefixProjectBySFID is the v1-mappings forward index written by lfx-v1-sync-helper: + // project.sfid.{sfid} → v2 project UID. Used to resolve the v1 project_id (SFID) to a v2 UID. + KVMappingPrefixProjectBySFID = "project.sfid" + + // KVMappingPrefixCommitteeBySFID is the v1-mappings forward index written by lfx-v1-sync-helper: + // committee.sfid.{sfid} → v2 committee UID. Used to resolve the v1 committee SFID to a v2 UID. + KVMappingPrefixCommitteeBySFID = "committee.sfid" + // Key prefixes for bucket detection // GroupsIOMailingListKeyPrefix is the common prefix for all mailing list related keys GroupsIOMailingListKeyPrefix = "lookup/groupsio-mailing-list/" From 2b96419b2d203774513d58e4be1294d7e593da4c Mon Sep 17 00:00:00 2001 From: Mauricio Zanetti Salomao Date: Thu, 12 Mar 2026 12:52:00 -0300 Subject: [PATCH 05/11] feat(eventing): enhance data stream processing and add unit tests for service and subgroup updates Jira Ticket: https://linuxfoundation.atlassian.net/browse/LFXV2-1223 Assisted by [Claude Code](https://claude.ai/code) Signed-off-by: Mauricio Zanetti Salomao --- cmd/mailing-list-api/data_stream.go | 2 +- go.mod | 2 + go.sum | 4 + internal/infrastructure/mock/mapping_store.go | 62 ++++++++ .../infrastructure/mock/message_publisher.go | 24 +++ .../nats/data_stream_consumer.go | 13 +- .../service/datastream_service_handler.go | 4 + .../datastream_service_handler_test.go | 93 +++++++++++ .../service/datastream_subgroup_handler.go | 4 + .../datastream_subgroup_handler_test.go | 148 ++++++++++++++++++ pkg/mapconv/field_extract.go | 18 ++- pkg/mapconv/field_extract_test.go | 4 + 12 files changed, 370 insertions(+), 8 deletions(-) create mode 100644 internal/infrastructure/mock/mapping_store.go create mode 100644 internal/service/datastream_service_handler_test.go create mode 100644 internal/service/datastream_subgroup_handler_test.go diff --git a/cmd/mailing-list-api/data_stream.go b/cmd/mailing-list-api/data_stream.go index c4db021..4bf875b 100644 --- a/cmd/mailing-list-api/data_stream.go +++ b/cmd/mailing-list-api/data_stream.go @@ -21,7 +21,7 @@ import ( // handleDataStream starts the durable JetStream consumer that processes DynamoDB KV // change events for GroupsIO entities (service, subgroup, member). // -// Enabled only when MAILING_LIST_EVENTING_ENABLED=true. If disabled, the function +// Enabled only when EVENTING_ENABLED=true. If disabled, the function // is a no-op and returns nil. func handleDataStream(ctx context.Context, wg *sync.WaitGroup) error { if !dataStreamEnabled() { diff --git a/go.mod b/go.mod index f105cf2..e947cd5 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/nats-io/nats.go v1.31.0 github.com/remychantenay/slog-otel v1.3.4 github.com/stretchr/testify v1.11.1 + github.com/vmihailenco/msgpack/v5 v5.4.1 go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 go.opentelemetry.io/contrib/propagators/jaeger v1.40.0 go.opentelemetry.io/otel v1.40.0 @@ -51,6 +52,7 @@ require ( github.com/nats-io/nuid v1.0.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/stretchr/objx v0.5.2 // indirect + github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.40.0 // indirect go.opentelemetry.io/otel/metric v1.40.0 // indirect diff --git a/go.sum b/go.sum index c3a365f..9044aec 100644 --- a/go.sum +++ b/go.sum @@ -62,6 +62,10 @@ github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/vmihailenco/msgpack/v5 v5.4.1 h1:cQriyiUvjTwOHg8QZaPihLWeRAAVoCpE00IUPn0Bjt8= +github.com/vmihailenco/msgpack/v5 v5.4.1/go.mod h1:GaZTsDaehaPpQVyxrf5mtQlH+pc21PIudVV/E3rRQok= +github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= +github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.65.0 h1:7iP2uCb7sGddAr30RRS6xjKy7AZ2JtTOPA3oolgVSw8= diff --git a/internal/infrastructure/mock/mapping_store.go b/internal/infrastructure/mock/mapping_store.go new file mode 100644 index 0000000..285dc60 --- /dev/null +++ b/internal/infrastructure/mock/mapping_store.go @@ -0,0 +1,62 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package mock + +import ( + "context" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/port" +) + +// FakeMappingStore is an in-memory MappingReaderWriter for unit tests. +type FakeMappingStore struct { + values map[string]string + tombstones map[string]bool +} + +var _ port.MappingReaderWriter = (*FakeMappingStore)(nil) + +// NewFakeMappingStore returns an empty FakeMappingStore. +func NewFakeMappingStore() *FakeMappingStore { + return &FakeMappingStore{values: make(map[string]string), tombstones: make(map[string]bool)} +} + +// Set pre-populates a key/value pair (helper for test setup). +func (f *FakeMappingStore) Set(key, value string) { f.values[key] = value } + +func (f *FakeMappingStore) ResolveAction(_ context.Context, key string) model.MessageAction { + if _, ok := f.values[key]; ok { + return model.ActionUpdated + } + return model.ActionCreated +} + +func (f *FakeMappingStore) IsMappingPresent(_ context.Context, key string) bool { + _, ok := f.values[key] + return ok && !f.tombstones[key] +} + +func (f *FakeMappingStore) IsTombstoned(_ context.Context, key string) bool { + return f.tombstones[key] +} + +func (f *FakeMappingStore) GetMappingValue(_ context.Context, key string) (string, bool) { + if f.tombstones[key] { + return "", false + } + v, ok := f.values[key] + return v, ok +} + +func (f *FakeMappingStore) PutMapping(_ context.Context, key, value string) error { + f.values[key] = value + return nil +} + +func (f *FakeMappingStore) PutTombstone(_ context.Context, key string) error { + f.tombstones[key] = true + delete(f.values, key) + return nil +} diff --git a/internal/infrastructure/mock/message_publisher.go b/internal/infrastructure/mock/message_publisher.go index c225d4b..4db82b1 100644 --- a/internal/infrastructure/mock/message_publisher.go +++ b/internal/infrastructure/mock/message_publisher.go @@ -10,6 +10,30 @@ import ( "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/port" ) +// SpyMessagePublisher records every call to Indexer and Access for assertion in tests. +type SpyMessagePublisher struct { + IndexerCalls []PublishedMsg + AccessCalls []PublishedMsg +} + +// PublishedMsg holds the subject and message from a single publisher call. +type PublishedMsg struct { + Subject string + Message any +} + +var _ port.MessagePublisher = (*SpyMessagePublisher)(nil) + +func (s *SpyMessagePublisher) Indexer(_ context.Context, subject string, message any) error { + s.IndexerCalls = append(s.IndexerCalls, PublishedMsg{subject, message}) + return nil +} +func (s *SpyMessagePublisher) Access(_ context.Context, subject string, message any) error { + s.AccessCalls = append(s.AccessCalls, PublishedMsg{subject, message}) + return nil +} +func (s *SpyMessagePublisher) Internal(_ context.Context, _ string, _ any) error { return nil } + // mockMessagePublisher is a mock implementation of the MessagePublisher interface type mockMessagePublisher struct{} diff --git a/internal/infrastructure/nats/data_stream_consumer.go b/internal/infrastructure/nats/data_stream_consumer.go index ff63eb9..dbf5bd1 100644 --- a/internal/infrastructure/nats/data_stream_consumer.go +++ b/internal/infrastructure/nats/data_stream_consumer.go @@ -9,6 +9,8 @@ import ( "log/slog" "time" + msgpack "github.com/vmihailenco/msgpack/v5" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/port" ) @@ -35,10 +37,13 @@ func (c *dataStreamConsumer) Process(ctx context.Context, msg model.StreamMessag var data map[string]any if err := json.Unmarshal(msg.Data, &data); err != nil { - slog.ErrorContext(ctx, "failed to unmarshal stream message payload, ACKing to avoid poison pill", - "key", msg.Key, "error", err) - c.ack(ctx, msg) - return + if msgErr := msgpack.Unmarshal(msg.Data, &data); msgErr != nil { + slog.ErrorContext(ctx, "failed to unmarshal stream message payload as JSON or msgpack, ACKing to avoid poison pill", + "key", msg.Key, "json_error", err, "msgpack_error", msgErr) + c.ack(ctx, msg) + return + } + slog.DebugContext(ctx, "decoded stream message payload as msgpack", "key", msg.Key) } if nak := c.handler.HandleChange(ctx, msg.Key, data); nak { diff --git a/internal/service/datastream_service_handler.go b/internal/service/datastream_service_handler.go index 942cce3..c3adcd3 100644 --- a/internal/service/datastream_service_handler.go +++ b/internal/service/datastream_service_handler.go @@ -22,6 +22,10 @@ func HandleDataStreamServiceUpdate(ctx context.Context, uid string, data map[str // Resolve v1 project SFID → v2 project UID via the shared project.sfid.{sfid} mapping // written by lfx-v1-sync-helper. NAK if the project hasn't been processed yet. projectSFID := mapconv.StringVal(data, "project_id") + if projectSFID == "" { + slog.ErrorContext(ctx, "missing project_id in service event, discarding", "uid", uid) + return false // ACK — malformed data, retrying won't help + } projectUID, ok := mappings.GetMappingValue(ctx, fmt.Sprintf("%s.%s", constants.KVMappingPrefixProjectBySFID, projectSFID)) if !ok { slog.WarnContext(ctx, "project mapping not yet available, NAKing service for retry", diff --git a/internal/service/datastream_service_handler_test.go b/internal/service/datastream_service_handler_test.go new file mode 100644 index 0000000..22f6d88 --- /dev/null +++ b/internal/service/datastream_service_handler_test.go @@ -0,0 +1,93 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package service + +import ( + "context" + "fmt" + "testing" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/infrastructure/mock" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" + "github.com/stretchr/testify/assert" +) + +func TestHandleDataStreamServiceUpdate_MissingProjectID_ACK(t *testing.T) { + nak := HandleDataStreamServiceUpdate(context.Background(), "svc-1", + map[string]any{}, + &mock.SpyMessagePublisher{}, mock.NewFakeMappingStore()) + assert.False(t, nak, "missing project_id should ACK (not retry)") +} + +func TestHandleDataStreamServiceUpdate_ProjectMappingAbsent_NAK(t *testing.T) { + nak := HandleDataStreamServiceUpdate(context.Background(), "svc-1", + map[string]any{"project_id": "sfid-proj"}, + &mock.SpyMessagePublisher{}, mock.NewFakeMappingStore()) + assert.True(t, nak, "unknown project mapping should NAK for retry") +} + +func TestHandleDataStreamServiceUpdate_HappyPath_ACKAndPublishes(t *testing.T) { + m := mock.NewFakeMappingStore() + m.Set(fmt.Sprintf("%s.sfid-proj", constants.KVMappingPrefixProjectBySFID), "proj-uid") + + pub := &mock.SpyMessagePublisher{} + nak := HandleDataStreamServiceUpdate(context.Background(), "svc-1", + map[string]any{ + "project_id": "sfid-proj", + "group_service_type": "mailing-list", + "domain": "example.com", + }, + pub, m) + + assert.False(t, nak) + assert.Len(t, pub.IndexerCalls, 1) + assert.Equal(t, constants.IndexGroupsIOServiceSubject, pub.IndexerCalls[0].Subject) + assert.Len(t, pub.AccessCalls, 1) + assert.Equal(t, constants.UpdateAccessGroupsIOServiceSubject, pub.AccessCalls[0].Subject) + + _, present := m.GetMappingValue(context.Background(), + fmt.Sprintf("%s.svc-1", constants.KVMappingPrefixService)) + assert.True(t, present, "mapping should be written after successful processing") +} + +func TestHandleDataStreamServiceUpdate_CreateVsUpdate_Action(t *testing.T) { + m := mock.NewFakeMappingStore() + m.Set(fmt.Sprintf("%s.sfid-proj", constants.KVMappingPrefixProjectBySFID), "proj-uid") + + data := func() map[string]any { return map[string]any{"project_id": "sfid-proj"} } + ctx := context.Background() + mKey := fmt.Sprintf("%s.svc-1", constants.KVMappingPrefixService) + + assert.Equal(t, model.ActionCreated, m.ResolveAction(ctx, mKey)) + HandleDataStreamServiceUpdate(ctx, "svc-1", data(), &mock.SpyMessagePublisher{}, m) + assert.Equal(t, model.ActionUpdated, m.ResolveAction(ctx, mKey)) +} + +func TestHandleDataStreamServiceDelete_DuplicateDelete_ACK(t *testing.T) { + m := mock.NewFakeMappingStore() + ctx := context.Background() + _ = m.PutTombstone(ctx, fmt.Sprintf("%s.svc-1", constants.KVMappingPrefixService)) + + pub := &mock.SpyMessagePublisher{} + nak := HandleDataStreamServiceDelete(ctx, "svc-1", pub, m) + + assert.False(t, nak) + assert.Empty(t, pub.IndexerCalls, "duplicate delete should not publish") +} + +func TestHandleDataStreamServiceDelete_HappyPath_ACKAndTombstones(t *testing.T) { + m := mock.NewFakeMappingStore() + pub := &mock.SpyMessagePublisher{} + nak := HandleDataStreamServiceDelete(context.Background(), "svc-1", pub, m) + + assert.False(t, nak) + assert.Len(t, pub.IndexerCalls, 1) + assert.Equal(t, constants.IndexGroupsIOServiceSubject, pub.IndexerCalls[0].Subject) + assert.Len(t, pub.AccessCalls, 1) + assert.Equal(t, constants.DeleteAllAccessGroupsIOServiceSubject, pub.AccessCalls[0].Subject) + + assert.True(t, m.IsTombstoned(context.Background(), + fmt.Sprintf("%s.svc-1", constants.KVMappingPrefixService))) +} diff --git a/internal/service/datastream_subgroup_handler.go b/internal/service/datastream_subgroup_handler.go index fb841b3..e3e0fe5 100644 --- a/internal/service/datastream_subgroup_handler.go +++ b/internal/service/datastream_subgroup_handler.go @@ -23,6 +23,10 @@ func HandleDataStreamSubgroupUpdate(ctx context.Context, uid string, data map[st // Resolve v1 project SFID → v2 project UID via the shared project.sfid.{sfid} mapping // written by lfx-v1-sync-helper. NAK if the project hasn't been processed yet. projectSFID := mapconv.StringVal(data, "project_id") + if projectSFID == "" { + slog.ErrorContext(ctx, "missing project_id in subgroup event, discarding", "uid", uid) + return false // ACK — malformed data, retrying won't help + } projectUID, ok := mappings.GetMappingValue(ctx, fmt.Sprintf("%s.%s", constants.KVMappingPrefixProjectBySFID, projectSFID)) if !ok { slog.WarnContext(ctx, "project mapping not yet available, NAKing subgroup for retry", diff --git a/internal/service/datastream_subgroup_handler_test.go b/internal/service/datastream_subgroup_handler_test.go new file mode 100644 index 0000000..40c4847 --- /dev/null +++ b/internal/service/datastream_subgroup_handler_test.go @@ -0,0 +1,148 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package service + +import ( + "context" + "fmt" + "testing" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/infrastructure/mock" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" + "github.com/stretchr/testify/assert" +) + +func TestHandleDataStreamSubgroupUpdate_MissingProjectID_ACK(t *testing.T) { + nak := HandleDataStreamSubgroupUpdate(context.Background(), "sg-1", + map[string]any{}, + &mock.SpyMessagePublisher{}, mock.NewFakeMappingStore()) + assert.False(t, nak, "missing project_id should ACK") +} + +func TestHandleDataStreamSubgroupUpdate_ProjectMappingAbsent_NAK(t *testing.T) { + nak := HandleDataStreamSubgroupUpdate(context.Background(), "sg-1", + map[string]any{"project_id": "sfid-proj"}, + &mock.SpyMessagePublisher{}, mock.NewFakeMappingStore()) + assert.True(t, nak, "unknown project mapping should NAK") +} + +func TestHandleDataStreamSubgroupUpdate_CommitteeMappingAbsent_NAK(t *testing.T) { + m := mock.NewFakeMappingStore() + m.Set(fmt.Sprintf("%s.sfid-proj", constants.KVMappingPrefixProjectBySFID), "proj-uid") + m.Set(fmt.Sprintf("%s.svc-1", constants.KVMappingPrefixService), "svc-1") + + nak := HandleDataStreamSubgroupUpdate(context.Background(), "sg-1", + map[string]any{ + "project_id": "sfid-proj", + "parent_id": "svc-1", + "committee": "sfid-committee", // mapping absent + }, + &mock.SpyMessagePublisher{}, m) + assert.True(t, nak, "unknown committee mapping should NAK") +} + +func TestHandleDataStreamSubgroupUpdate_ParentServiceAbsent_NAK(t *testing.T) { + m := mock.NewFakeMappingStore() + m.Set(fmt.Sprintf("%s.sfid-proj", constants.KVMappingPrefixProjectBySFID), "proj-uid") + // service mapping deliberately absent + + nak := HandleDataStreamSubgroupUpdate(context.Background(), "sg-1", + map[string]any{ + "project_id": "sfid-proj", + "parent_id": "svc-1", + }, + &mock.SpyMessagePublisher{}, m) + assert.True(t, nak, "absent parent service should NAK") +} + +func TestHandleDataStreamSubgroupUpdate_HappyPath_ACKAndPublishesAndWritesMappings(t *testing.T) { + m := mock.NewFakeMappingStore() + m.Set(fmt.Sprintf("%s.sfid-proj", constants.KVMappingPrefixProjectBySFID), "proj-uid") + m.Set(fmt.Sprintf("%s.svc-1", constants.KVMappingPrefixService), "svc-1") + + pub := &mock.SpyMessagePublisher{} + nak := HandleDataStreamSubgroupUpdate(context.Background(), "sg-1", + map[string]any{ + "project_id": "sfid-proj", + "parent_id": "svc-1", + "group_id": float64(42), + "group_name": "dev", + }, + pub, m) + + assert.False(t, nak) + assert.Len(t, pub.IndexerCalls, 1) + assert.Equal(t, constants.IndexGroupsIOMailingListSubject, pub.IndexerCalls[0].Subject) + assert.Len(t, pub.AccessCalls, 1) + assert.Equal(t, constants.UpdateAccessGroupsIOMailingListSubject, pub.AccessCalls[0].Subject) + + _, present := m.GetMappingValue(context.Background(), + fmt.Sprintf("%s.sg-1", constants.KVMappingPrefixSubgroup)) + assert.True(t, present, "forward mapping should be written") + + rev, ok := m.GetMappingValue(context.Background(), + fmt.Sprintf("%s.42", constants.KVMappingPrefixSubgroupByGroupID)) + assert.True(t, ok, "reverse group_id index should be written") + assert.Equal(t, "sg-1", rev) +} + +func TestHandleDataStreamSubgroupUpdate_WithCommittee_ResolvesAndPublishes(t *testing.T) { + m := mock.NewFakeMappingStore() + m.Set(fmt.Sprintf("%s.sfid-proj", constants.KVMappingPrefixProjectBySFID), "proj-uid") + m.Set(fmt.Sprintf("%s.sfid-committee", constants.KVMappingPrefixCommitteeBySFID), "committee-uid") + m.Set(fmt.Sprintf("%s.svc-1", constants.KVMappingPrefixService), "svc-1") + + pub := &mock.SpyMessagePublisher{} + nak := HandleDataStreamSubgroupUpdate(context.Background(), "sg-1", + map[string]any{ + "project_id": "sfid-proj", + "parent_id": "svc-1", + "committee": "sfid-committee", + }, + pub, m) + + assert.False(t, nak) + assert.Len(t, pub.IndexerCalls, 1) +} + +func TestHandleDataStreamSubgroupUpdate_NoGroupID_NoReverseIndex(t *testing.T) { + m := mock.NewFakeMappingStore() + m.Set(fmt.Sprintf("%s.sfid-proj", constants.KVMappingPrefixProjectBySFID), "proj-uid") + m.Set(fmt.Sprintf("%s.svc-1", constants.KVMappingPrefixService), "svc-1") + + HandleDataStreamSubgroupUpdate(context.Background(), "sg-1", + map[string]any{"project_id": "sfid-proj", "parent_id": "svc-1"}, + &mock.SpyMessagePublisher{}, m) + + _, ok := m.GetMappingValue(context.Background(), + fmt.Sprintf("%s.0", constants.KVMappingPrefixSubgroupByGroupID)) + assert.False(t, ok, "should not write reverse index when group_id is absent") +} + +func TestHandleDataStreamSubgroupDelete_DuplicateDelete_ACK(t *testing.T) { + m := mock.NewFakeMappingStore() + ctx := context.Background() + _ = m.PutTombstone(ctx, fmt.Sprintf("%s.sg-1", constants.KVMappingPrefixSubgroup)) + + pub := &mock.SpyMessagePublisher{} + nak := HandleDataStreamSubgroupDelete(ctx, "sg-1", pub, m) + + assert.False(t, nak) + assert.Empty(t, pub.IndexerCalls, "duplicate delete should not publish") +} + +func TestHandleDataStreamSubgroupDelete_HappyPath_ACKAndTombstones(t *testing.T) { + m := mock.NewFakeMappingStore() + pub := &mock.SpyMessagePublisher{} + nak := HandleDataStreamSubgroupDelete(context.Background(), "sg-1", pub, m) + + assert.False(t, nak) + assert.Len(t, pub.IndexerCalls, 1) + assert.Equal(t, constants.IndexGroupsIOMailingListSubject, pub.IndexerCalls[0].Subject) + assert.Len(t, pub.AccessCalls, 1) + assert.Equal(t, constants.DeleteAllAccessGroupsIOMailingListSubject, pub.AccessCalls[0].Subject) + + assert.True(t, m.IsTombstoned(context.Background(), + fmt.Sprintf("%s.sg-1", constants.KVMappingPrefixSubgroup))) +} diff --git a/pkg/mapconv/field_extract.go b/pkg/mapconv/field_extract.go index ffedb58..4f158c9 100644 --- a/pkg/mapconv/field_extract.go +++ b/pkg/mapconv/field_extract.go @@ -11,6 +11,8 @@ package mapconv import ( "fmt" + "math" + "strconv" "strings" ) @@ -43,14 +45,19 @@ func Int64Ptr(data map[string]any, key string) *int64 { var n int64 switch t := v.(type) { case float64: + if t != math.Trunc(t) { + return nil + } n = int64(t) case string: if t == "" { return nil } - if _, err := fmt.Sscanf(t, "%d", &n); err != nil { + parsed, err := strconv.ParseInt(t, 10, 64) + if err != nil { return nil } + n = parsed default: return nil } @@ -67,10 +74,15 @@ func IntVal(data map[string]any, key string) int { } switch t := v.(type) { case float64: + if t != math.Trunc(t) { + return 0 + } return int(t) case string: - var n int - fmt.Sscanf(t, "%d", &n) //nolint:errcheck + n, err := strconv.Atoi(t) + if err != nil { + return 0 + } return n default: return 0 diff --git a/pkg/mapconv/field_extract_test.go b/pkg/mapconv/field_extract_test.go index 417a306..c519ae1 100644 --- a/pkg/mapconv/field_extract_test.go +++ b/pkg/mapconv/field_extract_test.go @@ -45,6 +45,8 @@ func TestInt64Ptr(t *testing.T) { {"nil value", map[string]any{"k": nil}, "k", nil}, {"missing key", map[string]any{}, "k", nil}, {"unparseable string", map[string]any{"k": "abc"}, "k", nil}, + {"partial string 123abc", map[string]any{"k": "123abc"}, "k", nil}, + {"non-integer float64 truncated", map[string]any{"k": float64(3.9)}, "k", nil}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -64,6 +66,8 @@ func TestIntVal(t *testing.T) { {"string value", map[string]any{"k": "250"}, "k", 250}, {"nil value", map[string]any{"k": nil}, "k", 0}, {"missing key", map[string]any{}, "k", 0}, + {"partial string 123abc", map[string]any{"k": "123abc"}, "k", 0}, + {"non-integer float64 truncated", map[string]any{"k": float64(3.9)}, "k", 0}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { From 5368c99ebec94bd9b88ae3ed206d3103fa589231 Mon Sep 17 00:00:00 2001 From: Mauricio Zanetti Salomao Date: Fri, 13 Mar 2026 10:00:38 -0300 Subject: [PATCH 06/11] feat(eventing): add tombstone handling for member and subgroup updates and deletes Jira Ticket: https://linuxfoundation.atlassian.net/browse/LFXV2-932 Assisted by [Claude Code](https://claude.ai/code) Signed-off-by: Mauricio Zanetti Salomao --- .../lfx-v2-mailing-list-service/values.yaml | 3 + internal/service/datastream_member_handler.go | 19 ++- .../service/datastream_member_handler_test.go | 127 ++++++++++++++++++ .../service/datastream_subgroup_handler.go | 18 ++- 4 files changed, 163 insertions(+), 4 deletions(-) create mode 100644 internal/service/datastream_member_handler_test.go diff --git a/charts/lfx-v2-mailing-list-service/values.yaml b/charts/lfx-v2-mailing-list-service/values.yaml index d76e221..7bab1d4 100644 --- a/charts/lfx-v2-mailing-list-service/values.yaml +++ b/charts/lfx-v2-mailing-list-service/values.yaml @@ -319,6 +319,9 @@ app: GROUPSIO_WEBHOOK_SECRET: value: null + EVENTING_ENABLED: + value: "true" + # External Secrets Operator externalSecretsOperator: # Enable/disable External Secrets Operator integration diff --git a/internal/service/datastream_member_handler.go b/internal/service/datastream_member_handler.go index f0f26be..f52b326 100644 --- a/internal/service/datastream_member_handler.go +++ b/internal/service/datastream_member_handler.go @@ -40,11 +40,17 @@ func HandleDataStreamMemberUpdate(ctx context.Context, uid string, data map[stri return true // NAK — retry with backoff } - member := transformV1ToGrpsIOMember(uid, mailingListUID, data) - mKey := fmt.Sprintf("%s.%s", constants.KVMappingPrefixMember, uid) + + if mappings.IsTombstoned(ctx, mKey) { + slog.InfoContext(ctx, "member mapping is tombstoned, skipping update", "uid", uid) + return false + } + action := mappings.ResolveAction(ctx, mKey) + member := transformV1ToGrpsIOMember(uid, mailingListUID, data) + msg := &model.IndexerMessage{Action: action, Tags: member.Tags()} built, err := msg.Build(ctx, member) if err != nil { @@ -72,6 +78,15 @@ func HandleDataStreamMemberDelete(ctx context.Context, uid string, publisher por return false } + // If there is no mapping entry, this record was never indexed — nothing to delete. + if !mappings.IsMappingPresent(ctx, mKey) { + slog.InfoContext(ctx, "member was never indexed, skipping OpenSearch delete", "uid", uid) + if err := mappings.PutTombstone(ctx, mKey); err != nil { + slog.ErrorContext(ctx, "failed to put tombstone", "mapping_key", mKey, "error", err) + } + return false + } + msg := &model.IndexerMessage{Action: model.ActionDeleted} built, err := msg.Build(ctx, uid) if err != nil { diff --git a/internal/service/datastream_member_handler_test.go b/internal/service/datastream_member_handler_test.go new file mode 100644 index 0000000..043176f --- /dev/null +++ b/internal/service/datastream_member_handler_test.go @@ -0,0 +1,127 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package service + +import ( + "context" + "fmt" + "testing" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/domain/model" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/internal/infrastructure/mock" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" + "github.com/stretchr/testify/assert" +) + +// --- HandleDataStreamMemberUpdate --- + +func TestHandleDataStreamMemberUpdate_MissingGroupID_ACK(t *testing.T) { + nak := HandleDataStreamMemberUpdate(context.Background(), "mem-1", + map[string]any{}, + &mock.SpyMessagePublisher{}, mock.NewFakeMappingStore()) + assert.False(t, nak, "missing group_id should ACK (malformed data, no retry)") +} + +func TestHandleDataStreamMemberUpdate_ParentSubgroupAbsent_NAK(t *testing.T) { + // group_id present but no subgroup mapping written yet + nak := HandleDataStreamMemberUpdate(context.Background(), "mem-1", + map[string]any{"group_id": float64(42)}, + &mock.SpyMessagePublisher{}, mock.NewFakeMappingStore()) + assert.True(t, nak, "absent subgroup mapping should NAK for retry") +} + +func TestHandleDataStreamMemberUpdate_Tombstoned_ACK(t *testing.T) { + m := mock.NewFakeMappingStore() + ctx := context.Background() + m.Set(fmt.Sprintf("%s.42", constants.KVMappingPrefixSubgroupByGroupID), "sg-1") + _ = m.PutTombstone(ctx, fmt.Sprintf("%s.mem-1", constants.KVMappingPrefixMember)) + + pub := &mock.SpyMessagePublisher{} + nak := HandleDataStreamMemberUpdate(ctx, "mem-1", + map[string]any{"group_id": float64(42)}, + pub, m) + + assert.False(t, nak) + assert.Empty(t, pub.IndexerCalls, "tombstoned member should not publish") +} + +func TestHandleDataStreamMemberUpdate_HappyPath_ACKAndPublishesAndWritesMapping(t *testing.T) { + m := mock.NewFakeMappingStore() + m.Set(fmt.Sprintf("%s.42", constants.KVMappingPrefixSubgroupByGroupID), "sg-1") + + pub := &mock.SpyMessagePublisher{} + nak := HandleDataStreamMemberUpdate(context.Background(), "mem-1", + map[string]any{ + "group_id": float64(42), + "member_id": float64(99), + "email": "alice@example.com", + "full_name": "Alice Smith", + }, + pub, m) + + assert.False(t, nak) + assert.Len(t, pub.IndexerCalls, 1) + assert.Equal(t, constants.IndexGroupsIOMemberSubject, pub.IndexerCalls[0].Subject) + assert.Empty(t, pub.AccessCalls, "member access is inherited — no access message expected") + + _, present := m.GetMappingValue(context.Background(), + fmt.Sprintf("%s.mem-1", constants.KVMappingPrefixMember)) + assert.True(t, present, "forward mapping should be written after successful processing") +} + +func TestHandleDataStreamMemberUpdate_CreateVsUpdate_Action(t *testing.T) { + m := mock.NewFakeMappingStore() + m.Set(fmt.Sprintf("%s.42", constants.KVMappingPrefixSubgroupByGroupID), "sg-1") + + data := func() map[string]any { return map[string]any{"group_id": float64(42)} } + ctx := context.Background() + mKey := fmt.Sprintf("%s.mem-1", constants.KVMappingPrefixMember) + + assert.Equal(t, model.ActionCreated, m.ResolveAction(ctx, mKey)) + HandleDataStreamMemberUpdate(ctx, "mem-1", data(), &mock.SpyMessagePublisher{}, m) + assert.Equal(t, model.ActionUpdated, m.ResolveAction(ctx, mKey)) +} + +// --- HandleDataStreamMemberDelete --- + +func TestHandleDataStreamMemberDelete_DuplicateDelete_ACK(t *testing.T) { + m := mock.NewFakeMappingStore() + ctx := context.Background() + _ = m.PutTombstone(ctx, fmt.Sprintf("%s.mem-1", constants.KVMappingPrefixMember)) + + pub := &mock.SpyMessagePublisher{} + nak := HandleDataStreamMemberDelete(ctx, "mem-1", pub, m) + + assert.False(t, nak) + assert.Empty(t, pub.IndexerCalls, "duplicate delete should not publish") +} + +func TestHandleDataStreamMemberDelete_NeverIndexed_TombstonesWithoutPublishing(t *testing.T) { + m := mock.NewFakeMappingStore() + pub := &mock.SpyMessagePublisher{} + nak := HandleDataStreamMemberDelete(context.Background(), "mem-1", pub, m) + + assert.False(t, nak) + assert.Empty(t, pub.IndexerCalls, "never-indexed member should not publish indexer message") + assert.True(t, m.IsTombstoned(context.Background(), + fmt.Sprintf("%s.mem-1", constants.KVMappingPrefixMember)), + "should still tombstone to prevent future re-processing") +} + +func TestHandleDataStreamMemberDelete_HappyPath_ACKAndTombstones(t *testing.T) { + m := mock.NewFakeMappingStore() + ctx := context.Background() + mKey := fmt.Sprintf("%s.mem-1", constants.KVMappingPrefixMember) + _ = m.PutMapping(ctx, mKey, "mem-1") + + pub := &mock.SpyMessagePublisher{} + nak := HandleDataStreamMemberDelete(ctx, "mem-1", pub, m) + + assert.False(t, nak) + assert.Len(t, pub.IndexerCalls, 1) + assert.Equal(t, constants.IndexGroupsIOMemberSubject, pub.IndexerCalls[0].Subject) + assert.Empty(t, pub.AccessCalls, "member delete should not publish access message") + + assert.True(t, m.IsTombstoned(ctx, mKey)) +} diff --git a/internal/service/datastream_subgroup_handler.go b/internal/service/datastream_subgroup_handler.go index e3e0fe5..cadbff2 100644 --- a/internal/service/datastream_subgroup_handler.go +++ b/internal/service/datastream_subgroup_handler.go @@ -59,6 +59,12 @@ func HandleDataStreamSubgroupUpdate(ctx context.Context, uid string, data map[st } mKey := fmt.Sprintf("%s.%s", constants.KVMappingPrefixSubgroup, uid) + + if mappings.IsTombstoned(ctx, mKey) { + slog.InfoContext(ctx, "subgroup mapping is tombstoned, skipping update", "uid", uid) + return false + } + action := mappings.ResolveAction(ctx, mKey) msg := &model.IndexerMessage{Action: action, Tags: list.Tags()} @@ -116,6 +122,15 @@ func HandleDataStreamSubgroupDelete(ctx context.Context, uid string, publisher p return false } + // If there is no mapping entry, this record was never indexed — nothing to delete. + if !mappings.IsMappingPresent(ctx, mKey) { + slog.InfoContext(ctx, "subgroup was never indexed, skipping OpenSearch delete", "uid", uid) + if err := mappings.PutTombstone(ctx, mKey); err != nil { + slog.ErrorContext(ctx, "failed to put tombstone", "mapping_key", mKey, "error", err) + } + return false + } + msg := &model.IndexerMessage{Action: model.ActionDeleted} built, err := msg.Build(ctx, uid) if err != nil { @@ -128,8 +143,7 @@ func HandleDataStreamSubgroupDelete(ctx context.Context, uid string, publisher p return pkgerrors.IsTransient(err) } - accessMsg := &model.AccessMessage{UID: uid, ObjectType: constants.ObjectTypeGroupsIOMailingList} - if err := publisher.Access(ctx, constants.DeleteAllAccessGroupsIOMailingListSubject, accessMsg); err != nil { + if err := publisher.Access(ctx, constants.DeleteAllAccessGroupsIOMailingListSubject, uid); err != nil { slog.WarnContext(ctx, "failed to publish subgroup delete access message", "uid", uid, "error", err) } From b182d8efaddd519f23210240120fd36f7c849128 Mon Sep 17 00:00:00 2001 From: Mauricio Zanetti Salomao Date: Fri, 13 Mar 2026 10:07:53 -0300 Subject: [PATCH 07/11] feat(eventing): add subgroup mapping setup for subgroup delete happy path test Jira Ticket: https://linuxfoundation.atlassian.net/browse/LFXV2-1223 Assisted by [Claude Code](https://claude.ai/code) Signed-off-by: Mauricio Zanetti Salomao --- internal/service/datastream_subgroup_handler_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/service/datastream_subgroup_handler_test.go b/internal/service/datastream_subgroup_handler_test.go index 40c4847..72b966d 100644 --- a/internal/service/datastream_subgroup_handler_test.go +++ b/internal/service/datastream_subgroup_handler_test.go @@ -134,6 +134,7 @@ func TestHandleDataStreamSubgroupDelete_DuplicateDelete_ACK(t *testing.T) { func TestHandleDataStreamSubgroupDelete_HappyPath_ACKAndTombstones(t *testing.T) { m := mock.NewFakeMappingStore() + m.Set(fmt.Sprintf("%s.sg-1", constants.KVMappingPrefixSubgroup), "sg-1") pub := &mock.SpyMessagePublisher{} nak := HandleDataStreamSubgroupDelete(context.Background(), "sg-1", pub, m) From 6033090260598f5c637253cec60bd7b03c178442 Mon Sep 17 00:00:00 2001 From: Mauricio Zanetti Salomao Date: Fri, 13 Mar 2026 13:12:33 -0300 Subject: [PATCH 08/11] feat(eventing): add error handling for missing parent_id in subgroup update Jira Ticket: https://linuxfoundation.atlassian.net/browse/LFXV2-1223 Assisted by [Claude Code](https://claude.ai/code) Signed-off-by: Mauricio Zanetti Salomao --- internal/service/datastream_subgroup_handler.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/internal/service/datastream_subgroup_handler.go b/internal/service/datastream_subgroup_handler.go index cadbff2..b1cda48 100644 --- a/internal/service/datastream_subgroup_handler.go +++ b/internal/service/datastream_subgroup_handler.go @@ -49,6 +49,11 @@ func HandleDataStreamSubgroupUpdate(ctx context.Context, uid string, data map[st list := transformV1ToGrpsIOMailingList(uid, data) + if list.ServiceUID == "" { + slog.ErrorContext(ctx, "missing parent_id in subgroup event, discarding", "uid", uid) + return false // ACK — malformed data, retrying won't help + } + // Parent dependency check: the indexer must have the parent service record before // the child mailing list to avoid orphaned documents in OpenSearch. serviceKey := fmt.Sprintf("%s.%s", constants.KVMappingPrefixService, list.ServiceUID) From 8d81c829c9d88f2debb635e532a526096939230f Mon Sep 17 00:00:00 2001 From: Mauricio Zanetti Salomao Date: Fri, 13 Mar 2026 14:31:06 -0300 Subject: [PATCH 09/11] feat(mapconv): remove unused IntVal and BoolVal functions and their tests Jira Ticket: https://linuxfoundation.atlassian.net/browse/LFXV2-1223 Assisted by [Claude Code](https://claude.ai/code) Signed-off-by: Mauricio Zanetti Salomao --- .../service/datastream_service_handler.go | 3 +- pkg/mapconv/field_extract.go | 44 ------------------- pkg/mapconv/field_extract_test.go | 43 ------------------ 3 files changed, 1 insertion(+), 89 deletions(-) diff --git a/internal/service/datastream_service_handler.go b/internal/service/datastream_service_handler.go index c3adcd3..ac0ec42 100644 --- a/internal/service/datastream_service_handler.go +++ b/internal/service/datastream_service_handler.go @@ -90,8 +90,7 @@ func HandleDataStreamServiceDelete(ctx context.Context, uid string, publisher po return pkgerrors.IsTransient(err) } - accessMsg := &model.AccessMessage{UID: uid, ObjectType: constants.ObjectTypeGroupsIOService} - if err := publisher.Access(ctx, constants.DeleteAllAccessGroupsIOServiceSubject, accessMsg); err != nil { + if err := publisher.Access(ctx, constants.DeleteAllAccessGroupsIOServiceSubject, uid); err != nil { slog.WarnContext(ctx, "failed to publish service delete access message", "uid", uid, "error", err) } diff --git a/pkg/mapconv/field_extract.go b/pkg/mapconv/field_extract.go index 4f158c9..37df6ce 100644 --- a/pkg/mapconv/field_extract.go +++ b/pkg/mapconv/field_extract.go @@ -13,7 +13,6 @@ import ( "fmt" "math" "strconv" - "strings" ) // StringVal extracts a string value from data[key]. @@ -64,49 +63,6 @@ func Int64Ptr(data map[string]any, key string) *int64 { return &n } -// IntVal extracts an int from data[key]. -// Accepts float64 or string representations. -// Returns 0 if the key is absent, nil, or unparseable. -func IntVal(data map[string]any, key string) int { - v, ok := data[key] - if !ok || v == nil { - return 0 - } - switch t := v.(type) { - case float64: - if t != math.Trunc(t) { - return 0 - } - return int(t) - case string: - n, err := strconv.Atoi(t) - if err != nil { - return 0 - } - return n - default: - return 0 - } -} - -// BoolVal extracts a bool from data[key]. -// Accepts JSON boolean or the strings "true" / "false" (case-insensitive). -// Returns false if the key is absent or the value cannot be interpreted. -func BoolVal(data map[string]any, key string) bool { - v, ok := data[key] - if !ok || v == nil { - return false - } - switch t := v.(type) { - case bool: - return t - case string: - return strings.EqualFold(t, "true") - default: - return false - } -} - // StringSliceVal extracts a []string from data[key]. // Accepts a JSON array of strings or a bare string (returned as a one-element slice). // Returns nil if the key is absent or the value is an empty string. diff --git a/pkg/mapconv/field_extract_test.go b/pkg/mapconv/field_extract_test.go index c519ae1..df993cb 100644 --- a/pkg/mapconv/field_extract_test.go +++ b/pkg/mapconv/field_extract_test.go @@ -55,49 +55,6 @@ func TestInt64Ptr(t *testing.T) { } } -func TestIntVal(t *testing.T) { - tests := []struct { - name string - data map[string]any - key string - expected int - }{ - {"float64 value", map[string]any{"k": float64(7)}, "k", 7}, - {"string value", map[string]any{"k": "250"}, "k", 250}, - {"nil value", map[string]any{"k": nil}, "k", 0}, - {"missing key", map[string]any{}, "k", 0}, - {"partial string 123abc", map[string]any{"k": "123abc"}, "k", 0}, - {"non-integer float64 truncated", map[string]any{"k": float64(3.9)}, "k", 0}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - assert.Equal(t, tt.expected, mapconv.IntVal(tt.data, tt.key)) - }) - } -} - -func TestBoolVal(t *testing.T) { - tests := []struct { - name string - data map[string]any - key string - expected bool - }{ - {"bool true", map[string]any{"k": true}, "k", true}, - {"bool false", map[string]any{"k": false}, "k", false}, - {"string true", map[string]any{"k": "true"}, "k", true}, - {"string TRUE uppercase", map[string]any{"k": "TRUE"}, "k", true}, - {"string false", map[string]any{"k": "false"}, "k", false}, - {"nil value", map[string]any{"k": nil}, "k", false}, - {"missing key", map[string]any{}, "k", false}, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - assert.Equal(t, tt.expected, mapconv.BoolVal(tt.data, tt.key)) - }) - } -} - func TestStringSliceVal(t *testing.T) { tests := []struct { name string From b9849c857b0cef183ab06b9e1681c844ebf85ada Mon Sep 17 00:00:00 2001 From: Mauricio Zanetti Salomao Date: Mon, 16 Mar 2026 14:50:02 -0300 Subject: [PATCH 10/11] feat(eventing): enhance data models and handlers with additional fields and settings for improved event processing Jira Ticket: https://linuxfoundation.atlassian.net/browse/LFXV2-1223 Assisted by [Claude Code](https://claude.ai/code) Signed-off-by: Mauricio Zanetti Salomao --- docs/event-processing.md | 42 +++++++--- internal/domain/model/grpsio_mailing_list.go | 8 +- internal/domain/model/grpsio_member.go | 26 ++++-- internal/domain/model/grpsio_service.go | 1 + internal/service/datastream_member_handler.go | 42 ++++++---- .../service/datastream_service_handler.go | 70 +++++++++++++--- .../service/datastream_subgroup_handler.go | 81 +++++++++++++++++++ 7 files changed, 227 insertions(+), 43 deletions(-) diff --git a/docs/event-processing.md b/docs/event-processing.md index abca446..15ff019 100644 --- a/docs/event-processing.md +++ b/docs/event-processing.md @@ -125,7 +125,12 @@ The member handler reads this entry to resolve the parent `MailingListUID` befor | `group_id` | `GroupID` | | `prefix` | `Prefix` | | `project_id` | `ProjectUID` | +| `proj_id` | `ProjectSlug` | +| `writers` | `GrpsIOServiceSettings.Writers` | +| `auditors` | `GrpsIOServiceSettings.Auditors` | +| `created_at` | `CreatedAt` | | `last_modified_at` | `UpdatedAt` | +| `last_system_modified_at` | `SystemUpdatedAt` | | _(hardcoded)_ | `Source = "v1-sync"` | ### Mailing List / Subgroup (`GrpsIOMailingList`) @@ -137,12 +142,18 @@ The member handler reads this entry to resolve the parent `MailingListUID` befor | `visibility == "Public"` | `Public` | | `type` | `Type` | | `description` | `Description` | +| `title` | `Title` | | `subject_tag` | `SubjectTag` | +| `url` | `URL` | +| `flags` | `Flags` | +| `subscriber_count` | `SubscriberCount` | | `parent_id` | `ServiceUID` | | `project_id` | `ProjectUID` | | `committee` | `Committees[0].UID` | | `committee_filters` | `Committees[0].AllowedVotingStatuses` | +| `created_at` | `CreatedAt` | | `last_modified_at` | `UpdatedAt` | +| `last_system_modified_at` | `SystemUpdatedAt` | | _(hardcoded)_ | `Source = "v1-sync"` | ### Member (`GrpsIOMember`) @@ -151,16 +162,26 @@ The member handler reads this entry to resolve the parent `MailingListUID` befor |---|---| | `member_id` | `MemberID` | | `group_id` | `GroupID` | +| `user_id` | `UserID` | | `full_name` (split on first space) | `FirstName`, `LastName` | | `email` | `Email` | | `organization` | `Organization` | | `job_title` | `JobTitle` | +| `groups_email` | `GroupsEmail` | +| `groups_full_name` | `GroupsFullName` | +| `committee_email` | `CommitteeEmail` | +| `committee_full_name` | `CommitteeFullName` | +| `committee_id` | `CommitteeID` | +| `role` | `Role` | +| `voting_status` | `VotingStatus` | | `member_type` | `MemberType` | | `delivery_mode` | `DeliveryMode` | +| `delivery_mode_list` | `DeliveryModeList` | | `mod_status` | `ModStatus` | | `status` | `Status` | | `created_at` | `CreatedAt` | | `last_modified_at` | `UpdatedAt` | +| `last_system_modified_at` | `SystemUpdatedAt` | | _(resolved from reverse index)_ | `MailingListUID` | | _(hardcoded)_ | `Source = "v1-sync"` | @@ -170,18 +191,17 @@ The member handler reads this entry to resolve the parent `MailingListUID` befor ## NATS Subjects Published -| Entity | Action | Subject | +| Entity | Subject | Notes | |---|---|---| -| Service | Created / Updated | `lfx.index.groupsio_service` | -| Service | Created / Updated | `lfx.update_access.groupsio_service` | -| Service | Deleted | `lfx.index.groupsio_service` | -| Service | Deleted | `lfx.delete_all_access.groupsio_service` | -| Mailing List | Created / Updated | `lfx.index.groupsio_mailing_list` | -| Mailing List | Created / Updated | `lfx.update_access.groupsio_mailing_list` | -| Mailing List | Deleted | `lfx.index.groupsio_mailing_list` | -| Mailing List | Deleted | `lfx.delete_all_access.groupsio_mailing_list` | -| Member | Created / Updated | `lfx.index.groupsio_member` | -| Member | Deleted | `lfx.index.groupsio_member` | +| Service | `lfx.index.groupsio_service` | All actions (Created / Updated / Deleted) | +| Service | `lfx.index.groupsio_service_settings` | Created / Updated only (when writers/auditors present) | +| Service | `lfx.update_access.groupsio_service` | Created / Updated only | +| Service | `lfx.delete_all_access.groupsio_service` | Deleted only | +| Mailing List | `lfx.index.groupsio_mailing_list` | All actions (Created / Updated / Deleted) | +| Mailing List | `lfx.index.groupsio_mailing_list_settings` | Created / Updated only (when writers/auditors present) | +| Mailing List | `lfx.update_access.groupsio_mailing_list` | Created / Updated only | +| Mailing List | `lfx.delete_all_access.groupsio_mailing_list` | Deleted only | +| Member | `lfx.index.groupsio_member` | All actions (Created / Updated / Deleted) | --- diff --git a/internal/domain/model/grpsio_mailing_list.go b/internal/domain/model/grpsio_mailing_list.go index 0c7a441..097c731 100644 --- a/internal/domain/model/grpsio_mailing_list.go +++ b/internal/domain/model/grpsio_mailing_list.go @@ -39,8 +39,12 @@ type GrpsIOMailingList struct { ProjectName string `json:"project_name"` // Inherited from parent service ProjectSlug string `json:"project_slug"` // Inherited from parent service - CreatedAt time.Time `json:"created_at"` - UpdatedAt time.Time `json:"updated_at"` + URL string `json:"url,omitempty"` // The groups.io URL for the subgroup + Flags []string `json:"flags,omitempty"` // Warning messages about unusual settings + + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + SystemUpdatedAt time.Time `json:"system_updated_at,omitempty"` // Last modified by system (scripts/webhooks) } // Removed visibility constants - now using Public bool field diff --git a/internal/domain/model/grpsio_member.go b/internal/domain/model/grpsio_member.go index 3b5ee53..23e2636 100644 --- a/internal/domain/model/grpsio_member.go +++ b/internal/domain/model/grpsio_member.go @@ -28,17 +28,30 @@ type GrpsIOMember struct { Source string `json:"source"` // "api", "webhook", or "mock" - tracks origin for business logic // Member Information - Username string `json:"username"` // Username + UserID string `json:"user_id,omitempty"` // User-service ID of the member + Username string `json:"username"` // Username FirstName string `json:"first_name"` LastName string `json:"last_name"` Email string `json:"email"` // Required, RFC 5322 Organization string `json:"organization"` // Optional JobTitle string `json:"job_title"` // Optional + // Normalised search fields (lowercase, for filtering) + GroupsEmail string `json:"groups_email,omitempty"` // Lowercase email from Groups.io + GroupsFullName string `json:"groups_full_name,omitempty"` // Lowercase full name from Groups.io + CommitteeEmail string `json:"committee_email,omitempty"` // Lowercase email from Committee Service + CommitteeFullName string `json:"committee_full_name,omitempty"` // Lowercase full name from Committee Service + + // Committee association + CommitteeID string `json:"committee_id,omitempty"` // Committee ID if member belongs to a committee + Role string `json:"role,omitempty"` // Role of the member + VotingStatus string `json:"voting_status,omitempty"` // Voting status of the member + // Member Configuration - MemberType string `json:"member_type"` // "committee" or "direct" - DeliveryMode string `json:"delivery_mode"` // Email delivery preference - ModStatus string `json:"mod_status"` // "none", "moderator", "owner" + MemberType string `json:"member_type"` // "committee" or "direct" + DeliveryMode string `json:"delivery_mode"` // Email delivery preference + DeliveryModeList string `json:"delivery_mode_list,omitempty"` // Delivery mode list from Groups.io + ModStatus string `json:"mod_status"` // "none", "moderator", "owner" // Status Status string `json:"status"` // Groups.io status: normal, pending, etc. @@ -47,8 +60,9 @@ type GrpsIOMember struct { LastReviewedBy *string `json:"last_reviewed_by"` // Nullable user ID // Timestamps - CreatedAt time.Time `json:"created_at"` - UpdatedAt time.Time `json:"updated_at"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + SystemUpdatedAt time.Time `json:"system_updated_at,omitempty"` // Last modified by system (scripts/webhooks) } // BuildIndexKey generates a SHA-256 hash for use as a NATS KV key. diff --git a/internal/domain/model/grpsio_service.go b/internal/domain/model/grpsio_service.go index 385a91c..4e51d85 100644 --- a/internal/domain/model/grpsio_service.go +++ b/internal/domain/model/grpsio_service.go @@ -93,6 +93,7 @@ type GrpsIOService struct { Public bool `json:"public"` CreatedAt time.Time `json:"created_at"` UpdatedAt time.Time `json:"updated_at"` + SystemUpdatedAt time.Time `json:"system_updated_at,omitempty"` // Last modified by system (scripts/webhooks) } // BuildIndexKey generates a SHA-256 hash for use as a NATS KV key diff --git a/internal/service/datastream_member_handler.go b/internal/service/datastream_member_handler.go index f52b326..5d146fe 100644 --- a/internal/service/datastream_member_handler.go +++ b/internal/service/datastream_member_handler.go @@ -111,20 +111,29 @@ func transformV1ToGrpsIOMember(uid, mailingListUID string, data map[string]any) firstName, lastName := splitFullName(mapconv.StringVal(data, "full_name")) member := &model.GrpsIOMember{ - UID: uid, - MailingListUID: mailingListUID, - MemberID: mapconv.Int64Ptr(data, "member_id"), - GroupID: mapconv.Int64Ptr(data, "group_id"), - FirstName: firstName, - LastName: lastName, - Email: mapconv.StringVal(data, "email"), - Organization: mapconv.StringVal(data, "organization"), - JobTitle: mapconv.StringVal(data, "job_title"), - MemberType: mapconv.StringVal(data, "member_type"), - DeliveryMode: mapconv.StringVal(data, "delivery_mode"), - ModStatus: mapconv.StringVal(data, "mod_status"), - Status: mapconv.StringVal(data, "status"), - Source: "v1-sync", + UID: uid, + MailingListUID: mailingListUID, + MemberID: mapconv.Int64Ptr(data, "member_id"), + GroupID: mapconv.Int64Ptr(data, "group_id"), + UserID: mapconv.StringVal(data, "user_id"), + FirstName: firstName, + LastName: lastName, + Email: mapconv.StringVal(data, "email"), + Organization: mapconv.StringVal(data, "organization"), + JobTitle: mapconv.StringVal(data, "job_title"), + GroupsEmail: mapconv.StringVal(data, "groups_email"), + GroupsFullName: mapconv.StringVal(data, "groups_full_name"), + CommitteeEmail: mapconv.StringVal(data, "committee_email"), + CommitteeFullName: mapconv.StringVal(data, "committee_full_name"), + CommitteeID: mapconv.StringVal(data, "committee_id"), + Role: mapconv.StringVal(data, "role"), + VotingStatus: mapconv.StringVal(data, "voting_status"), + MemberType: mapconv.StringVal(data, "member_type"), + DeliveryMode: mapconv.StringVal(data, "delivery_mode"), + DeliveryModeList: mapconv.StringVal(data, "delivery_mode_list"), + ModStatus: mapconv.StringVal(data, "mod_status"), + Status: mapconv.StringVal(data, "status"), + Source: "v1-sync", } if ts := mapconv.StringVal(data, "created_at"); ts != "" { @@ -137,6 +146,11 @@ func transformV1ToGrpsIOMember(uid, mailingListUID string, data map[string]any) member.UpdatedAt = t } } + if ts := mapconv.StringVal(data, "last_system_modified_at"); ts != "" { + if t, err := time.Parse(time.RFC3339, ts); err == nil { + member.SystemUpdatedAt = t + } + } return member } diff --git a/internal/service/datastream_service_handler.go b/internal/service/datastream_service_handler.go index ac0ec42..51d02af 100644 --- a/internal/service/datastream_service_handler.go +++ b/internal/service/datastream_service_handler.go @@ -50,13 +50,37 @@ func HandleDataStreamServiceUpdate(ctx context.Context, uid string, data map[str return pkgerrors.IsTransient(err) } + // Publish settings indexer message when writers or auditors are present. + settings := buildServiceSettings(uid, data) + if settings != nil { + settingsMsg := &model.IndexerMessage{Action: action, Tags: settings.Tags()} + builtSettings, errSettings := settingsMsg.Build(ctx, settings) + if errSettings != nil { + slog.ErrorContext(ctx, "failed to build service settings indexer message", "uid", uid, "error", errSettings) + } + if errSettings == nil { + if errPublish := publisher.Indexer(ctx, constants.IndexGroupsIOServiceSettingsSubject, builtSettings); errPublish != nil { + slog.ErrorContext(ctx, "failed to publish service settings indexer message", "uid", uid, "error", errPublish) + } + } + } + + references := map[string][]string{ + constants.RelationProject: {svc.ProjectUID}, + } + if settings != nil { + if writers := userInfoUsernames(settings.Writers); len(writers) > 0 { + references[constants.RelationWriter] = writers + } + if auditors := userInfoUsernames(settings.Auditors); len(auditors) > 0 { + references[constants.RelationAuditor] = auditors + } + } accessMsg := &model.AccessMessage{ UID: uid, ObjectType: constants.ObjectTypeGroupsIOService, Public: svc.Public, - References: map[string][]string{ - constants.RelationProject: {svc.ProjectUID}, - }, + References: references, } if err := publisher.Access(ctx, constants.UpdateAccessGroupsIOServiceSubject, accessMsg); err != nil { slog.WarnContext(ctx, "failed to publish service access message", "uid", uid, "error", err) @@ -100,24 +124,50 @@ func HandleDataStreamServiceDelete(ctx context.Context, uid string, publisher po return false } +// buildServiceSettings constructs a GrpsIOServiceSettings from v1 writers/auditors. +// Returns nil when both slices are empty (no settings message needed). +func buildServiceSettings(uid string, data map[string]any) *model.GrpsIOServiceSettings { + writers := toUserInfoSlice(mapconv.StringSliceVal(data, "writers")) + auditors := toUserInfoSlice(mapconv.StringSliceVal(data, "auditors")) + if len(writers) == 0 && len(auditors) == 0 { + return nil + } + return &model.GrpsIOServiceSettings{ + UID: uid, + Writers: writers, + Auditors: auditors, + } +} + // transformV1ToGrpsIOService maps v1 DynamoDB fields to the GrpsIOService domain model. // Source is always "v1-sync" to distinguish these from API-created records. func transformV1ToGrpsIOService(uid string, data map[string]any) *model.GrpsIOService { svc := &model.GrpsIOService{ - UID: uid, - Type: mapconv.StringVal(data, "group_service_type"), - Domain: mapconv.StringVal(data, "domain"), - GroupID: mapconv.Int64Ptr(data, "group_id"), - Prefix: mapconv.StringVal(data, "prefix"), - ProjectUID: mapconv.StringVal(data, "project_id"), - Source: "v1-sync", + UID: uid, + Type: mapconv.StringVal(data, "group_service_type"), + Domain: mapconv.StringVal(data, "domain"), + GroupID: mapconv.Int64Ptr(data, "group_id"), + Prefix: mapconv.StringVal(data, "prefix"), + ProjectUID: mapconv.StringVal(data, "project_id"), + ProjectSlug: mapconv.StringVal(data, "proj_id"), + Source: "v1-sync", } + if ts := mapconv.StringVal(data, "created_at"); ts != "" { + if t, err := time.Parse(time.RFC3339, ts); err == nil { + svc.CreatedAt = t + } + } if ts := mapconv.StringVal(data, "last_modified_at"); ts != "" { if t, err := time.Parse(time.RFC3339, ts); err == nil { svc.UpdatedAt = t } } + if ts := mapconv.StringVal(data, "last_system_modified_at"); ts != "" { + if t, err := time.Parse(time.RFC3339, ts); err == nil { + svc.SystemUpdatedAt = t + } + } return svc } diff --git a/internal/service/datastream_subgroup_handler.go b/internal/service/datastream_subgroup_handler.go index b1cda48..7efd426 100644 --- a/internal/service/datastream_subgroup_handler.go +++ b/internal/service/datastream_subgroup_handler.go @@ -84,6 +84,21 @@ func HandleDataStreamSubgroupUpdate(ctx context.Context, uid string, data map[st return pkgerrors.IsTransient(err) } + // Publish settings indexer message when writers or auditors are present. + settings := buildMailingListSettings(uid, data) + if settings != nil { + settingsMsg := &model.IndexerMessage{Action: action, Tags: settings.Tags()} + builtSettings, errSettings := settingsMsg.Build(ctx, settings) + if errSettings != nil { + slog.ErrorContext(ctx, "failed to build subgroup settings indexer message", "uid", uid, "error", errSettings) + } + if errSettings == nil { + if errPublish := publisher.Indexer(ctx, constants.IndexGroupsIOMailingListSettingsSubject, builtSettings); errPublish != nil { + slog.ErrorContext(ctx, "failed to publish subgroup settings indexer message", "uid", uid, "error", errPublish) + } + } + } + references := map[string][]string{ // Project access is inherited through the service — only service reference needed. constants.RelationGroupsIOService: {list.ServiceUID}, @@ -93,6 +108,14 @@ func HandleDataStreamSubgroupUpdate(ctx context.Context, uid string, data map[st references[constants.RelationCommittee] = append(references[constants.RelationCommittee], committee.UID) } } + if settings != nil { + if writers := userInfoUsernames(settings.Writers); len(writers) > 0 { + references[constants.RelationWriter] = writers + } + if auditors := userInfoUsernames(settings.Auditors); len(auditors) > 0 { + references[constants.RelationAuditor] = auditors + } + } accessMsg := &model.AccessMessage{ UID: uid, ObjectType: constants.ObjectTypeGroupsIOMailingList, @@ -158,6 +181,45 @@ func HandleDataStreamSubgroupDelete(ctx context.Context, uid string, publisher p return false } +// buildMailingListSettings constructs a GrpsIOMailingListSettings from v1 writers/auditors. +// Returns nil when both slices are empty (no settings message needed). +func buildMailingListSettings(uid string, data map[string]any) *model.GrpsIOMailingListSettings { + writers := toUserInfoSlice(mapconv.StringSliceVal(data, "writers")) + auditors := toUserInfoSlice(mapconv.StringSliceVal(data, "auditors")) + if len(writers) == 0 && len(auditors) == 0 { + return nil + } + return &model.GrpsIOMailingListSettings{ + UID: uid, + Writers: writers, + Auditors: auditors, + } +} + +// toUserInfoSlice converts a slice of username strings to UserInfo values. +func toUserInfoSlice(usernames []string) []model.UserInfo { + if len(usernames) == 0 { + return nil + } + out := make([]model.UserInfo, len(usernames)) + for i, u := range usernames { + username := u + out[i] = model.UserInfo{Username: &username} + } + return out +} + +// userInfoUsernames extracts the non-empty Username pointers from a []UserInfo slice. +func userInfoUsernames(users []model.UserInfo) []string { + out := make([]string, 0, len(users)) + for _, u := range users { + if u.Username != nil && *u.Username != "" { + out = append(out, *u.Username) + } + } + return out +} + // transformV1ToGrpsIOMailingList maps v1 DynamoDB fields to the GrpsIOMailingList domain model. func transformV1ToGrpsIOMailingList(uid string, data map[string]any) *model.GrpsIOMailingList { list := &model.GrpsIOMailingList{ @@ -167,12 +229,19 @@ func transformV1ToGrpsIOMailingList(uid string, data map[string]any) *model.Grps Public: mapconv.StringVal(data, "visibility") == "Public", Type: mapconv.StringVal(data, "type"), Description: mapconv.StringVal(data, "description"), + Title: mapconv.StringVal(data, "title"), SubjectTag: mapconv.StringVal(data, "subject_tag"), + URL: mapconv.StringVal(data, "url"), + Flags: mapconv.StringSliceVal(data, "flags"), ServiceUID: mapconv.StringVal(data, "parent_id"), ProjectUID: mapconv.StringVal(data, "project_id"), Source: "v1-sync", } + if n := mapconv.Int64Ptr(data, "subscriber_count"); n != nil { + list.SubscriberCount = int(*n) + } + if committeeUID := mapconv.StringVal(data, "committee"); committeeUID != "" { list.Committees = []model.Committee{{ UID: committeeUID, @@ -180,11 +249,23 @@ func transformV1ToGrpsIOMailingList(uid string, data map[string]any) *model.Grps }} } + if ts := mapconv.StringVal(data, "created_at"); ts != "" { + if t, err := time.Parse(time.RFC3339, ts); err == nil { + list.CreatedAt = t + } + } + if ts := mapconv.StringVal(data, "last_modified_at"); ts != "" { if t, err := time.Parse(time.RFC3339, ts); err == nil { list.UpdatedAt = t } } + if ts := mapconv.StringVal(data, "last_system_modified_at"); ts != "" { + if t, err := time.Parse(time.RFC3339, ts); err == nil { + list.SystemUpdatedAt = t + } + } + return list } From 02bdc3ae1bdc05706cabbe82b21e50916863f11e Mon Sep 17 00:00:00 2001 From: Mauricio Zanetti Salomao Date: Tue, 17 Mar 2026 10:32:49 -0300 Subject: [PATCH 11/11] feat(principal): implement username conversion to Auth0 "sub" format with tests Jira Ticket: https://linuxfoundation.atlassian.net/browse/LFXV2-1223 Generated with [Claude Code](https://claude.com/claude-code) Signed-off-by: Mauricio Zanetti Salomao --- go.mod | 1 + go.sum | 2 + internal/service/datastream_member_handler.go | 58 ++++++++++++-- pkg/principal/principal.go | 46 +++++++++++ pkg/principal/principal_test.go | 79 +++++++++++++++++++ 5 files changed, 178 insertions(+), 8 deletions(-) create mode 100644 pkg/principal/principal.go create mode 100644 pkg/principal/principal_test.go diff --git a/go.mod b/go.mod index e947cd5..98c0bfc 100644 --- a/go.mod +++ b/go.mod @@ -34,6 +34,7 @@ require ( ) require ( + github.com/akamensky/base58 v0.0.0-20210829145138-ce8bf8802e8f // indirect github.com/aws/smithy-go v1.22.5 // indirect github.com/cenkalti/backoff/v5 v5.0.3 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect diff --git a/go.sum b/go.sum index 9044aec..33221ce 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/akamensky/base58 v0.0.0-20210829145138-ce8bf8802e8f h1:z8MkSJCUyTmW5YQlxsMLBlwA7GmjxC7L4ooicxqnhz8= +github.com/akamensky/base58 v0.0.0-20210829145138-ce8bf8802e8f/go.mod h1:UdUwYgAXBiL+kLfcqxoQJYkHA/vl937/PbFhZM34aZs= github.com/auth0/go-jwt-middleware/v2 v2.3.0 h1:4QREj6cS3d8dS05bEm443jhnqQF97FX9sMBeWqnNRzE= github.com/auth0/go-jwt-middleware/v2 v2.3.0/go.mod h1:dL4ObBs1/dj4/W4cYxd8rqAdDGXYyd5rqbpMIxcbVrU= github.com/aws/smithy-go v1.22.5 h1:P9ATCXPMb2mPjYBgueqJNCA5S9UfktsW0tTxi+a7eqw= diff --git a/internal/service/datastream_member_handler.go b/internal/service/datastream_member_handler.go index 5d146fe..5867a7b 100644 --- a/internal/service/datastream_member_handler.go +++ b/internal/service/datastream_member_handler.go @@ -15,14 +15,13 @@ import ( "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/constants" pkgerrors "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/errors" "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/mapconv" + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/principal" ) // HandleDataStreamMemberUpdate transforms the v1 payload into a GrpsIOMember and publishes an -// indexer message. Returns true to NAK when the parent subgroup mapping is absent -// (ordering guarantee) or on transient errors. -// -// No FGA access message is published — member access is inherited from the parent -// mailing list's access record. +// indexer message and, when the member has a username, an FGA put_member access message. +// Returns true to NAK when the parent subgroup mapping is absent (ordering guarantee) +// or on transient errors. func HandleDataStreamMemberUpdate(ctx context.Context, uid string, data map[string]any, publisher port.MessagePublisher, mappings port.MappingReaderWriter) bool { // Members carry group_id (Groups.io numeric ID) rather than a direct mailing_list_uid. // Resolve the parent subgroup UID via the reverse index written by the subgroup handler. @@ -63,13 +62,26 @@ func HandleDataStreamMemberUpdate(ctx context.Context, uid string, data map[stri return pkgerrors.IsTransient(err) } - if err := mappings.PutMapping(ctx, mKey, uid); err != nil { + if member.Username != "" { + accessMsg := &groupsioMailingListMemberStub{ + UID: uid, + Username: principal.FromUsername(member.Username), + MailingListUID: mailingListUID, + } + if err := publisher.Access(ctx, constants.PutMemberGroupsIOMailingListSubject, accessMsg); err != nil { + slog.WarnContext(ctx, "failed to publish member FGA put message", "uid", uid, "error", err) + } + } + + mappingValue := buildMemberMappingValue(uid, member.Username, mailingListUID) + if err := mappings.PutMapping(ctx, mKey, mappingValue); err != nil { slog.ErrorContext(ctx, "failed to put mapping key", "mapping_key", mKey, "error", err) } return false } -// HandleDataStreamMemberDelete publishes a delete indexer message and tombstones the mapping. +// HandleDataStreamMemberDelete publishes a delete indexer message, an FGA remove_member message +// (when the stored mapping contains a username), and tombstones the mapping. func HandleDataStreamMemberDelete(ctx context.Context, uid string, publisher port.MessagePublisher, mappings port.MappingReaderWriter) bool { mKey := fmt.Sprintf("%s.%s", constants.KVMappingPrefixMember, uid) @@ -79,7 +91,8 @@ func HandleDataStreamMemberDelete(ctx context.Context, uid string, publisher por } // If there is no mapping entry, this record was never indexed — nothing to delete. - if !mappings.IsMappingPresent(ctx, mKey) { + storedValue, ok := mappings.GetMappingValue(ctx, mKey) + if !ok { slog.InfoContext(ctx, "member was never indexed, skipping OpenSearch delete", "uid", uid) if err := mappings.PutTombstone(ctx, mKey); err != nil { slog.ErrorContext(ctx, "failed to put tombstone", "mapping_key", mKey, "error", err) @@ -99,6 +112,18 @@ func HandleDataStreamMemberDelete(ctx context.Context, uid string, publisher por return pkgerrors.IsTransient(err) } + _, username, mailingListUID := parseMemberMappingValue(storedValue) + if username != "" { + accessMsg := &groupsioMailingListMemberStub{ + UID: uid, + Username: principal.FromUsername(username), + MailingListUID: mailingListUID, + } + if err := publisher.Access(ctx, constants.RemoveMemberGroupsIOMailingListSubject, accessMsg); err != nil { + slog.WarnContext(ctx, "failed to publish member FGA remove message", "uid", uid, "error", err) + } + } + if err := mappings.PutTombstone(ctx, mKey); err != nil { slog.ErrorContext(ctx, "failed to put tombstone", "mapping_key", mKey, "error", err) } @@ -116,6 +141,7 @@ func transformV1ToGrpsIOMember(uid, mailingListUID string, data map[string]any) MemberID: mapconv.Int64Ptr(data, "member_id"), GroupID: mapconv.Int64Ptr(data, "group_id"), UserID: mapconv.StringVal(data, "user_id"), + Username: mapconv.StringVal(data, "username"), FirstName: firstName, LastName: lastName, Email: mapconv.StringVal(data, "email"), @@ -155,6 +181,22 @@ func transformV1ToGrpsIOMember(uid, mailingListUID string, data map[string]any) return member } +// buildMemberMappingValue encodes uid, username, and mailingListUID into a single string +// so they can be recovered on delete without an extra lookup. +func buildMemberMappingValue(uid, username, mailingListUID string) string { + return fmt.Sprintf("%s|%s|%s", uid, username, mailingListUID) +} + +// parseMemberMappingValue decodes a value written by buildMemberMappingValue. +// Falls back gracefully for old-format entries that only stored uid. +func parseMemberMappingValue(value string) (uid, username, mailingListUID string) { + parts := strings.SplitN(value, "|", 3) + if len(parts) == 3 { + return parts[0], parts[1], parts[2] + } + return value, "", "" +} + // splitFullName splits "First Last" into (first, last). // For single-token names (no space), the whole string is returned as first name. func splitFullName(fullName string) (string, string) { diff --git a/pkg/principal/principal.go b/pkg/principal/principal.go new file mode 100644 index 0000000..765dc48 --- /dev/null +++ b/pkg/principal/principal.go @@ -0,0 +1,46 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +// Package principal converts v1 usernames to the Auth0 "sub" format expected by v2 +// services: "auth0|{userID}". +// +// The mapping logic is shared with lfx-v1-sync-helper and must remain in sync with it. +package principal + +import ( + "crypto/sha512" + "regexp" + + "github.com/akamensky/base58" +) + +var ( + // safeNameRE matches usernames safe to use directly as Auth0 user IDs. + safeNameRE = regexp.MustCompile(`^[A-Za-z0-9][A-Za-z0-9._-]{0,58}[A-Za-z0-9]$`) + // hexUserRE matches usernames that could collide with Auth0 native-DB hexadecimal IDs. + hexUserRE = regexp.MustCompile(`^[0-9a-f]{24,60}$`) +) + +// FromUsername converts a raw v1 username to the Auth0 "sub" format: "auth0|{userID}". +// +// Safe usernames (matching safeNameRE but not hexUserRE) are used verbatim as the userID. +// All others are SHA-512 hashed and base58-encoded (~80 chars) to handle legacy usernames +// that are too long, contain non-standard characters, or risk colliding with future Auth0 +// native-DB hexadecimal IDs. +// +// Returns an empty string when username is empty. +func FromUsername(username string) string { + if username == "" { + return "" + } + + var userID string + if safeNameRE.MatchString(username) && !hexUserRE.MatchString(username) { + userID = username + } else { + hash := sha512.Sum512([]byte(username)) + userID = base58.Encode(hash[:]) + } + + return "auth0|" + userID +} diff --git a/pkg/principal/principal_test.go b/pkg/principal/principal_test.go new file mode 100644 index 0000000..aeb18d5 --- /dev/null +++ b/pkg/principal/principal_test.go @@ -0,0 +1,79 @@ +// Copyright The Linux Foundation and each contributor to LFX. +// SPDX-License-Identifier: MIT + +package principal_test + +import ( + "testing" + + "github.com/linuxfoundation/lfx-v2-mailing-list-service/pkg/principal" + "github.com/stretchr/testify/assert" +) + +func TestFromUsername(t *testing.T) { + tests := []struct { + name string + username string + expected string + }{ + { + name: "empty returns empty", + username: "", + expected: "", + }, + { + // Safe username: matches safeNameRE, does not match hexUserRE — used verbatim. + name: "simple safe username", + username: "john.doe", + expected: "auth0|john.doe", + }, + { + name: "safe username with hyphens and digits", + username: "user-123", + expected: "auth0|user-123", + }, + { + // 60 chars = 1 + 58 + 1, the maximum safeNameRE allows. + name: "exactly 60 safe chars used verbatim", + username: "abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789", + expected: "auth0|abcdefghij0123456789abcdefghij0123456789abcdefghij0123456789", + }, + { + // 61 chars — exceeds safeNameRE max, must be SHA-512 + base58 encoded. + name: "username longer than 60 chars is hashed", + username: "abcdefghij0123456789abcdefghij0123456789abcdefghij01234567890", + expected: "auth0|5fLnnbn4KGc4pxKzgK9JE4GGGpKoWdUqSnsQtutw2XBBTr8qBbv6vv71m1TsGe3mbNvr6a6ncktckEBVD2yhUKD3", + }, + { + // Pure lowercase hex 24+ chars matches hexUserRE — must be hashed to avoid + // colliding with Auth0 native-DB IDs. + name: "24-char hex username is hashed", + username: "0123456789abcdef01234567", + expected: "auth0|3TjHYyavZDgNgHjy8pnsNZAD7Ek7bVyv9NRnF5384aAmUdvqh2NADaPWr1k1QyX2sbs8Yoh2m5wV7BdMwmpkstf2", + }, + { + // Space is not in [A-Za-z0-9._-] so safeNameRE won't match — must be hashed. + name: "username with space is hashed", + username: "John Doe", + expected: "auth0|dsNnfygV3vpJeB65SWP4JwT4Jcud9eUHjuqetwR7XYZdCokZ7vcs1VUqkov1ktH5qppUQsmHgy5Z3j6ZhwekXY2", + }, + { + // @ is not allowed by safeNameRE — must be hashed. + name: "email-style username is hashed", + username: "user@example.com", + expected: "auth0|3CCfg5ewbyYkXLuXR6oq4aXDZCrV4dqpMSY9XdoWUdENu9MPm9RcZUrCVzMe1W1vzXfkaMVN8awpp82tMSF8AswD", + }, + { + // Hashing must be deterministic: calling twice yields the same result. + name: "deterministic output for safe username", + username: "stable-user", + expected: "auth0|stable-user", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.expected, principal.FromUsername(tt.username)) + }) + } +}