diff --git a/.chloggen/ocb-component-schema-alternative.yaml b/.chloggen/ocb-component-schema-alternative.yaml new file mode 100644 index 00000000000..698ecdb51d9 --- /dev/null +++ b/.chloggen/ocb-component-schema-alternative.yaml @@ -0,0 +1,30 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. receiver/otlp) +component: cmd/mdatagen + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add config JSON schema generation + +# One or more tracking issues or pull requests related to the change +issues: [9769] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: | + The component config JSON schema can be optionally enabled by: + ``` + schema: + enabled: true + ``` + +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [api] diff --git a/cmd/mdatagen/go.mod b/cmd/mdatagen/go.mod index 8392dff8f74..19dcfebac8b 100644 --- a/cmd/mdatagen/go.mod +++ b/cmd/mdatagen/go.mod @@ -4,10 +4,13 @@ go 1.24.0 require ( github.com/google/go-cmp v0.7.0 + github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 github.com/spf13/cobra v1.10.2 github.com/stretchr/testify v1.11.1 go.opentelemetry.io/collector/component v1.49.0 go.opentelemetry.io/collector/component/componenttest v0.143.0 + go.opentelemetry.io/collector/config/configopaque v1.49.0 + go.opentelemetry.io/collector/config/configoptional v1.49.0 go.opentelemetry.io/collector/confmap v1.49.0 go.opentelemetry.io/collector/confmap/provider/fileprovider v1.49.0 go.opentelemetry.io/collector/connector v0.143.0 @@ -32,6 +35,7 @@ require ( go.uber.org/goleak v1.3.0 go.uber.org/zap v1.27.1 golang.org/x/text v0.32.0 + golang.org/x/tools v0.40.0 gopkg.in/yaml.v3 v3.0.1 ) @@ -57,6 +61,7 @@ require ( github.com/spf13/pflag v1.0.10 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/collector/component/componentstatus v0.143.0 // indirect + go.opentelemetry.io/collector/confmap/xconfmap v0.143.0 // indirect go.opentelemetry.io/collector/connector/xconnector v0.143.0 // indirect go.opentelemetry.io/collector/consumer/consumererror v0.143.0 // indirect go.opentelemetry.io/collector/consumer/xconsumer v0.143.0 // indirect @@ -72,6 +77,8 @@ require ( go.opentelemetry.io/otel/sdk v1.39.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/mod v0.31.0 // indirect + golang.org/x/sync v0.19.0 // indirect golang.org/x/sys v0.39.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20251222181119-0a764e51fe1b // indirect google.golang.org/grpc v1.78.0 // indirect diff --git a/cmd/mdatagen/go.sum b/cmd/mdatagen/go.sum index 002338fbd97..850c7ff9d27 100644 --- a/cmd/mdatagen/go.sum +++ b/cmd/mdatagen/go.sum @@ -4,6 +4,8 @@ github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6N github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dlclark/regexp2 v1.11.0 h1:G/nrcoOa7ZXlpoa/91N3X7mM3r8eIlMBBJZvsz/mxKI= +github.com/dlclark/regexp2 v1.11.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -51,6 +53,8 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/santhosh-tekuri/jsonschema/v6 v6.0.2 h1:KRzFb2m7YtdldCEkzs6KqmJw4nqEVZGK7IN2kJkjTuQ= +github.com/santhosh-tekuri/jsonschema/v6 v6.0.2/go.mod h1:JXeL+ps8p7/KNMjDQk3TCwPpBy0wYklyWTfbkIzdIFU= github.com/spf13/cobra v1.10.2 h1:DMTTonx5m65Ic0GOoRY2c16WCbHxOOw6xxezuLaBpcU= github.com/spf13/cobra v1.10.2/go.mod h1:7C1pvHqHw5A4vrJfjNwvOdzYu0Gml16OCs2GRiTUUS4= github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= @@ -86,12 +90,18 @@ go.uber.org/zap v1.27.1 h1:08RqriUEv8+ArZRYSTXy1LeBScaMpVSTBhCeaZYfMYc= go.uber.org/zap v1.27.1/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/mod v0.31.0 h1:HaW9xtz0+kOcWKwli0ZXy79Ix+UW/vOfmWI5QVd2tgI= +golang.org/x/mod v0.31.0/go.mod h1:43JraMp9cGx1Rx3AqioxrbrhNsLl2l/iNAvuBkrezpg= golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU= golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY= +golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4= +golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= +golang.org/x/tools v0.40.0 h1:yLkxfA+Qnul4cs9QA3KnlFu0lVmd8JJfoq+E41uSutA= +golang.org/x/tools v0.40.0/go.mod h1:Ik/tzLRlbscWpqqMRjyWYDisX8bG13FrdXp3o4Sr9lc= google.golang.org/genproto/googleapis/rpc v0.0.0-20251222181119-0a764e51fe1b h1:Mv8VFug0MP9e5vUxfBcE3vUkV6CImK3cMNMIDFjmzxU= google.golang.org/genproto/googleapis/rpc v0.0.0-20251222181119-0a764e51fe1b/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ= google.golang.org/grpc v1.78.0 h1:K1XZG/yGDJnzMdd/uZHAkVqJE+xIDOcmdSFZkBUicNc= diff --git a/cmd/mdatagen/internal/command.go b/cmd/mdatagen/internal/command.go index 7e6837e208d..c01b7bf3e93 100644 --- a/cmd/mdatagen/internal/command.go +++ b/cmd/mdatagen/internal/command.go @@ -22,6 +22,8 @@ import ( "golang.org/x/text/cases" "golang.org/x/text/language" "gopkg.in/yaml.v3" + + "go.opentelemetry.io/collector/cmd/mdatagen/internal/schemagen" ) const ( @@ -216,6 +218,11 @@ func run(ymlPath string) error { } } + // Generate JSON schema if enabled + if err := generateSchema(md, ymlDir); err != nil { + return fmt.Errorf("failed to generate schema: %w", err) + } + return nil } @@ -492,3 +499,60 @@ func validateYAMLKeyOrder(raw []byte) error { } return nil } + +// generateSchema generates a JSON schema for the component's config if enabled. +func generateSchema(md Metadata, ymlDir string) error { + // Skip if schema generation is not enabled + if md.Schema == nil || !md.Schema.Enabled { + return nil + } + + // Skip non-component types + if md.Status == nil || slices.Contains(nonComponents, md.Status.Class) { + return nil + } + + // Create schemas directory + outputDir := filepath.Join(ymlDir, "internal", md.GeneratedPackageName, "schemas") + if err := os.MkdirAll(outputDir, 0o700); err != nil { + return fmt.Errorf("failed to create schemas directory: %w", err) + } + + // Parse config type specification + pkgPath, typeName := parseConfigType(md.Schema.ConfigType) + + // Create analyzer and generator + analyzer := schemagen.NewPackageAnalyzer(ymlDir) + generator := schemagen.NewSchemaGenerator(outputDir, analyzer) + + // Generate schema + return generator.GenerateSchema(md.Status.Class, md.Type, typeName, pkgPath) +} + +// parseConfigType parses a config type specification into package path and type name. +// Examples: +// - "Config" -> ("", "Config") +// - "go.opentelemetry.io/collector/pkg.Config" -> ("go.opentelemetry.io/collector/pkg", "Config") +func parseConfigType(configType string) (pkgPath, typeName string) { + if configType == "" { + return "", "" + } + + // Find the last dot that separates package path from type name + lastDot := strings.LastIndex(configType, ".") + if lastDot == -1 { + // No dot means it's just a type name in the local package + return "", configType + } + + // Check if this looks like a package path (contains "/" before the last dot) + potentialPkg := configType[:lastDot] + if strings.Contains(potentialPkg, "/") { + // It's a fully qualified type: pkg/path.TypeName + return potentialPkg, configType[lastDot+1:] + } + + // No slash means it's just a type name (e.g., "Config" or possibly "pkg.Config" for local) + // Treat as local type name + return "", configType +} diff --git a/cmd/mdatagen/internal/command_test.go b/cmd/mdatagen/internal/command_test.go index 2a70c4ef693..899c3cd2b96 100644 --- a/cmd/mdatagen/internal/command_test.go +++ b/cmd/mdatagen/internal/command_test.go @@ -820,3 +820,40 @@ func Tracer(settings component.TelemetrySettings) trace.Tracer { }) } } + +func TestGenerateSchema_Skipped(t *testing.T) { + tests := []struct { + name string + md Metadata + }{ + { + name: "schema is nil", + md: Metadata{ + Type: "test", + Status: &Status{Class: "exporter"}, + }, + }, + { + name: "explicitly disabled", + md: Metadata{ + Type: "test", + Status: &Status{Class: "exporter"}, + Schema: &SchemaConfig{Enabled: false}, + }, + }, + { + name: "non-component type", + md: Metadata{ + Type: "test", + Status: &Status{Class: "cmd"}, + Schema: &SchemaConfig{Enabled: true}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := generateSchema(tt.md, t.TempDir()) + require.NoError(t, err) + }) + } +} diff --git a/cmd/mdatagen/internal/loader_test.go b/cmd/mdatagen/internal/loader_test.go index b84b59c8e55..f0637c21288 100644 --- a/cmd/mdatagen/internal/loader_test.go +++ b/cmd/mdatagen/internal/loader_test.go @@ -52,6 +52,9 @@ func TestLoadMetadata(t *testing.T) { SemConvVersion: "1.38.0", PackageName: "go.opentelemetry.io/collector/cmd/mdatagen/internal/samplereceiver", ReaggregationEnabled: true, + Schema: &SchemaConfig{ + Enabled: true, + }, Status: &Status{ DisableCodeCov: true, Class: "receiver", diff --git a/cmd/mdatagen/internal/metadata.go b/cmd/mdatagen/internal/metadata.go index f177de89f21..35becfea937 100644 --- a/cmd/mdatagen/internal/metadata.go +++ b/cmd/mdatagen/internal/metadata.go @@ -54,6 +54,8 @@ type Metadata struct { Tests Tests `mapstructure:"tests"` // PackageName is the name of the package where the component is defined. PackageName string `mapstructure:"package_name"` + // Schema holds configuration for JSON schema generation. + Schema *SchemaConfig `mapstructure:"schema"` } func (md Metadata) GetCodeCovComponentID() string { diff --git a/cmd/mdatagen/internal/samplereceiver/config.go b/cmd/mdatagen/internal/samplereceiver/config.go new file mode 100644 index 00000000000..16fe867f406 --- /dev/null +++ b/cmd/mdatagen/internal/samplereceiver/config.go @@ -0,0 +1,108 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package samplereceiver // import "go.opentelemetry.io/collector/cmd/mdatagen/internal/samplereceiver" + +import ( + "time" + + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/config/configopaque" + "go.opentelemetry.io/collector/config/configoptional" +) + +var _ component.Factory = (*AnotherStruct)(nil) + +type AnotherStruct struct{} + +func (a AnotherStruct) Type() component.Type { + // TODO implement me + panic("implement me") +} + +func (a AnotherStruct) CreateDefaultConfig() component.Config { + // TODO implement me + panic("implement me") +} + +var _ component.Config = (*MyConfig)(nil) + +type CustomString string + +// NetworkConfig holds network configuration that should be squashed into parent. +type NetworkConfig struct { + // Host is the network host. + Host string `mapstructure:"host"` + + // Port is the network port. + Port int `mapstructure:"port"` +} + +// MyConfig defines configuration for the sample exporter used to test schema generation. +type MyConfig struct { + // Network is squashed into the parent config. + Network NetworkConfig `mapstructure:",squash"` + + // ID is the component identifier. + ID component.ID `mapstructure:"id"` + + // Endpoint is the target URL to send data to. + Endpoint string `mapstructure:"endpoint"` + + // CustomString is a custom string. + CustomString CustomString `mapstructure:"custom_string"` + + // Timeout is the maximum time to wait for a response. + Timeout time.Duration `mapstructure:"timeout"` + + // StartTime is the time when the receiver should start collecting data. + StartTime time.Time `mapstructure:"start_time"` + + // Enabled controls whether the exporter is active. + Enabled bool `mapstructure:"enabled"` + + // BatchSize is the number of items to send in each batch. + BatchSize int `mapstructure:"batch_size"` + + // Headers are additional headers to include in requests. + Headers map[string]string `mapstructure:"headers"` + + // Retry contains retry configuration. + Retry RetryConfig `mapstructure:"retry"` + + // Tags are optional tags to attach. + Tags []string `mapstructure:"tags"` + + // APIKey is a secret API key (opaque string). + APIKey configopaque.String `mapstructure:"api_key"` + + // OptionalRetry is an optional retry configuration. + OptionalRetry configoptional.Optional[RetryConfig] `mapstructure:"optional_retry"` + + // Secrets is a list of secret key-value pairs. + Secrets configopaque.MapList `mapstructure:"secrets"` + + // Endpoints is a list of endpoint configurations. + Endpoints []EndpointConfig `mapstructure:"endpoints"` + + // InternalState is an internal field that should be excluded from the schema. + InternalState string `mapstructure:"-"` +} + +// EndpointConfig holds configuration for a single endpoint. +type EndpointConfig struct { + // URL is the endpoint URL. + URL string `mapstructure:"url"` + + // Priority is the endpoint priority. + Priority int `mapstructure:"priority"` +} + +// RetryConfig holds retry settings. +type RetryConfig struct { + // MaxRetries is the maximum number of retries. + MaxRetries int `mapstructure:"max_retries"` + + // InitialInterval is the initial retry interval. + InitialInterval time.Duration `mapstructure:"initial_interval"` +} diff --git a/cmd/mdatagen/internal/samplereceiver/factory.go b/cmd/mdatagen/internal/samplereceiver/factory.go index df625767eff..17cfc86cb59 100644 --- a/cmd/mdatagen/internal/samplereceiver/factory.go +++ b/cmd/mdatagen/internal/samplereceiver/factory.go @@ -18,7 +18,7 @@ import ( func NewFactory() receiver.Factory { return receiver.NewFactory( metadata.Type, - func() component.Config { return &struct{}{} }, + func() component.Config { return &MyConfig{} }, receiver.WithTraces(createTraces, metadata.TracesStability), receiver.WithMetrics(createMetrics, metadata.MetricsStability), receiver.WithLogs(createLogs, metadata.LogsStability)) diff --git a/cmd/mdatagen/internal/samplereceiver/internal/metadata/schemas/config_schema.yaml b/cmd/mdatagen/internal/samplereceiver/internal/metadata/schemas/config_schema.yaml new file mode 100644 index 00000000000..2eb1ed83ead --- /dev/null +++ b/cmd/mdatagen/internal/samplereceiver/internal/metadata/schemas/config_schema.yaml @@ -0,0 +1,87 @@ +$schema: https://json-schema.org/draft/2020-12/schema +title: sample receiver configuration +type: object +properties: + api_key: + description: APIKey is a secret API key (opaque string). + type: string + batch_size: + description: BatchSize is the number of items to send in each batch. + type: integer + custom_string: + description: CustomString is a custom string. + type: string + enabled: + description: Enabled controls whether the exporter is active. + type: boolean + endpoint: + description: Endpoint is the target URL to send data to. + type: string + endpoints: + description: Endpoints is a list of endpoint configurations. + type: array + items: + type: object + properties: + priority: + description: Priority is the endpoint priority. + type: integer + url: + description: URL is the endpoint URL. + type: string + additionalProperties: false + headers: + description: Headers are additional headers to include in requests. + type: object + additionalProperties: + type: string + host: + description: Host is the network host. + type: string + id: + description: ID is the component identifier. + type: string + optional_retry: + description: OptionalRetry is an optional retry configuration. + type: object + properties: + initial_interval: + description: InitialInterval is the initial retry interval. + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + max_retries: + description: MaxRetries is the maximum number of retries. + type: integer + additionalProperties: false + port: + description: Port is the network port. + type: integer + retry: + description: Retry contains retry configuration. + type: object + properties: + initial_interval: + description: InitialInterval is the initial retry interval. + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + max_retries: + description: MaxRetries is the maximum number of retries. + type: integer + additionalProperties: false + secrets: + description: Secrets is a list of secret key-value pairs. + type: object + start_time: + description: StartTime is the time when the receiver should start collecting data. + type: string + format: date-time + tags: + description: Tags are optional tags to attach. + type: array + items: + type: string + timeout: + description: Timeout is the maximum time to wait for a response. + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ +additionalProperties: false diff --git a/cmd/mdatagen/internal/samplereceiver/metadata.yaml b/cmd/mdatagen/internal/samplereceiver/metadata.yaml index 77c502160e2..4a5a69bcf94 100644 --- a/cmd/mdatagen/internal/samplereceiver/metadata.yaml +++ b/cmd/mdatagen/internal/samplereceiver/metadata.yaml @@ -9,6 +9,9 @@ github_project: open-telemetry/opentelemetry-collector sem_conv_version: 1.38.0 +schema: + enabled: true + status: disable_codecov_badge: true class: receiver diff --git a/cmd/mdatagen/internal/schema.go b/cmd/mdatagen/internal/schema.go new file mode 100644 index 00000000000..bd25e34ea7f --- /dev/null +++ b/cmd/mdatagen/internal/schema.go @@ -0,0 +1,16 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package internal // import "go.opentelemetry.io/collector/cmd/mdatagen/internal" + +// SchemaConfig holds configuration for YAML schema generation. +type SchemaConfig struct { + // Enabled controls whether schema generation is enabled (default: false). + Enabled bool `mapstructure:"enabled"` + + // ConfigType specifies the config type to generate schema for. + // Can be a simple type name (e.g., "Config") for local package, + // or a fully qualified type (e.g., "go.opentelemetry.io/collector/pkg.Config") + // for external packages. If empty, auto-detection is used. + ConfigType string `mapstructure:"config_type"` +} diff --git a/cmd/mdatagen/internal/schemagen/analyzer.go b/cmd/mdatagen/internal/schemagen/analyzer.go new file mode 100644 index 00000000000..d61630dfb05 --- /dev/null +++ b/cmd/mdatagen/internal/schemagen/analyzer.go @@ -0,0 +1,438 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package schemagen // import "go.opentelemetry.io/collector/cmd/mdatagen/internal/schemagen" + +import ( + "fmt" + "go/ast" + "go/types" + "reflect" + "strings" + + "golang.org/x/tools/go/packages" +) + +// PackageAnalyzer analyzes Go packages to extract struct information. +type PackageAnalyzer struct { + dir string + // pkgCache caches loaded packages to avoid reloading them + pkgCache map[string]*packages.Package +} + +// NewPackageAnalyzer creates a new PackageAnalyzer for the given directory. +func NewPackageAnalyzer(dir string) *PackageAnalyzer { + return &PackageAnalyzer{ + dir: dir, + pkgCache: make(map[string]*packages.Package), + } +} + +// analyzeConfig loads the package and finds the Config struct. +// configTypeName is the name of the config type (e.g., "Config"). If empty, auto-detection is used. +// configPkgPath is the package path where the config is defined. If empty, the local package is used. +func (a *PackageAnalyzer) analyzeConfig(configTypeName, configPkgPath string) (*StructInfo, error) { + cfg := &packages.Config{ + Mode: packages.NeedName | packages.NeedTypes | packages.NeedTypesInfo | + packages.NeedSyntax | packages.NeedImports | packages.NeedDeps, + Dir: a.dir, + } + + pkgs, err := packages.Load(cfg, ".") + if err != nil { + return nil, fmt.Errorf("failed to load package: %w", err) + } + + if len(pkgs) == 0 { + return nil, fmt.Errorf("no packages found in %s", a.dir) + } + + if len(pkgs[0].Errors) > 0 { + var errMsgs []string + for _, e := range pkgs[0].Errors { + errMsgs = append(errMsgs, e.Error()) + } + return nil, fmt.Errorf("package errors: %s", strings.Join(errMsgs, "; ")) + } + + pkg := pkgs[0] + + // Auto-detect config type if not specified + if configTypeName == "" { + configTypeName = a.detectConfigFromVarDecl(pkg) + if configTypeName == "" { + configTypeName = "Config" // fallback to default + } + } + + // Determine which package to analyze + analyzePkg := pkg + if configPkgPath != "" { + // Config is in an external package + analyzePkg, err = a.loadPackage(configPkgPath) + if err != nil { + return nil, fmt.Errorf("failed to load config package %s: %w", configPkgPath, err) + } + } + + // Find the Config type + obj := analyzePkg.Types.Scope().Lookup(configTypeName) + if obj == nil { + return nil, fmt.Errorf("type %s not found in package %s", configTypeName, analyzePkg.PkgPath) + } + + named, ok := obj.Type().(*types.Named) + if !ok { + return nil, fmt.Errorf("%s is not a named type", configTypeName) + } + + structType, ok := named.Underlying().(*types.Struct) + if !ok { + return nil, fmt.Errorf("%s is not a struct type", configTypeName) + } + + // Extract fields + fields := a.extractFields(analyzePkg, structType) + + return &StructInfo{ + Name: configTypeName, + Package: analyzePkg.PkgPath, + Fields: fields, + }, nil +} + +// extractFields extracts field information from a struct type. +func (a *PackageAnalyzer) extractFields(pkg *packages.Package, st *types.Struct) []FieldInfo { + var fields []FieldInfo + + for i := 0; i < st.NumFields(); i++ { + field := st.Field(i) + tag := st.Tag(i) + + fieldInfo := a.extractFieldInfo(pkg, field, tag) + if fieldInfo != nil { + fields = append(fields, *fieldInfo) + } + } + + return fields +} + +// extractFieldInfo extracts information about a single field. +func (a *PackageAnalyzer) extractFieldInfo(pkg *packages.Package, field *types.Var, tag string) *FieldInfo { + // Skip unexported fields + if !field.Exported() { + return nil + } + + // Parse struct tags + jsonName, squash := parseTag(tag) + if jsonName == "-" { + return nil // Skip fields with json:"-" or mapstructure:"-" + } + if jsonName == "" { + jsonName = strings.ToLower(field.Name()) + } + + // Check if embedded - either Go-level embedding or mapstructure squash + embedded := field.Embedded() || squash + + // Get field doc comment + fieldDoc := a.findFieldDoc(pkg, field) + + typeStr := resolveTypeAlias(field.Type()) + + info := &FieldInfo{ + Name: field.Name(), + JSONName: jsonName, + Type: typeStr, + Description: fieldDoc, + Embedded: embedded, + } + + // For embedded structs or struct fields, extract nested fields + if st := getUnderlyingStruct(field.Type()); st != nil { + info.Fields = a.extractFields(pkg, st) + } + + return info +} + +// findFieldDoc finds the documentation comment for a struct field. +// It searches in the field's originating package, loading external packages if needed. +func (a *PackageAnalyzer) findFieldDoc(pkg *packages.Package, field *types.Var) string { + pos := field.Pos() + if !pos.IsValid() { + return "" + } + + // Determine which package the field comes from + fieldPkg := field.Pkg() + if fieldPkg == nil { + return "" + } + + // Get the package to search in - either the local package or load the external one + searchPkg := pkg + if fieldPkg.Path() != pkg.PkgPath { + // Field comes from an external package, need to load it + var err error + searchPkg, err = a.loadPackage(fieldPkg.Path()) + if err != nil || searchPkg == nil { + return "" + } + } + + return a.findFieldDocInPackage(searchPkg, field) +} + +// loadPackage loads a package by its import path, using the cache if available. +func (a *PackageAnalyzer) loadPackage(pkgPath string) (*packages.Package, error) { + // Check cache first + if cached, ok := a.pkgCache[pkgPath]; ok { + return cached, nil + } + + cfg := &packages.Config{ + Mode: packages.NeedName | packages.NeedTypes | packages.NeedTypesInfo | + packages.NeedSyntax | packages.NeedImports | packages.NeedDeps, + Dir: a.dir, + } + + pkgs, err := packages.Load(cfg, pkgPath) + if err != nil { + return nil, err + } + + if len(pkgs) == 0 { + return nil, nil + } + + // Cache the loaded package + a.pkgCache[pkgPath] = pkgs[0] + return pkgs[0], nil +} + +// findFieldDocInPackage searches for a field's documentation in a specific package's AST. +func (a *PackageAnalyzer) findFieldDocInPackage(pkg *packages.Package, field *types.Var) string { + if pkg == nil || pkg.Syntax == nil { + return "" + } + + for _, file := range pkg.Syntax { + for _, decl := range file.Decls { + genDecl, ok := decl.(*ast.GenDecl) + if !ok { + continue + } + for _, spec := range genDecl.Specs { + typeSpec, ok := spec.(*ast.TypeSpec) + if !ok { + continue + } + st, ok := typeSpec.Type.(*ast.StructType) + if !ok { + continue + } + for _, f := range st.Fields.List { + for _, name := range f.Names { + if name.Name == field.Name() { + if f.Doc != nil { + return strings.TrimSpace(f.Doc.Text()) + } + if f.Comment != nil { + return strings.TrimSpace(f.Comment.Text()) + } + return "" + } + } + } + } + } + } + return "" +} + +// parseTag parses struct tags to find the mapstructure name and squash option. +func parseTag(tag string) (name string, squash bool) { + st := reflect.StructTag(tag) + + if ms := st.Get("mapstructure"); ms != "" { + parts := strings.Split(ms, ",") + name = parts[0] + for _, p := range parts[1:] { + if p == "squash" { + squash = true + } + } + return name, squash + } + + return "", false +} + +// detectConfigFromVarDecl looks for the component.Config interface assignment pattern: +// var _ component.Config = (*TypeName)(nil) +func (a *PackageAnalyzer) detectConfigFromVarDecl(pkg *packages.Package) string { + for _, file := range pkg.Syntax { + for _, decl := range file.Decls { + genDecl, ok := decl.(*ast.GenDecl) + if !ok { + continue + } + + for _, spec := range genDecl.Specs { + valueSpec, ok := spec.(*ast.ValueSpec) + if !ok { + continue + } + + // Check if this is a blank identifier assignment: var _ Type = ... + if len(valueSpec.Names) != 1 || valueSpec.Names[0].Name != "_" { + continue + } + + // Check if the type is component.Config + if !isComponentConfigType(valueSpec.Type) { + continue + } + + // Extract the type from the value: (*TypeName)(nil) + if len(valueSpec.Values) != 1 { + continue + } + + typeName := extractPointerTypeName(valueSpec.Values[0]) + if typeName != "" { + return typeName + } + } + } + } + return "" +} + +// isComponentConfigType checks if the type expression is component.Config. +func isComponentConfigType(expr ast.Expr) bool { + sel, ok := expr.(*ast.SelectorExpr) + if !ok { + return false + } + + ident, ok := sel.X.(*ast.Ident) + if !ok { + return false + } + + return ident.Name == "component" && sel.Sel.Name == "Config" +} + +// extractPointerTypeName extracts TypeName from (*TypeName)(nil). +func extractPointerTypeName(expr ast.Expr) string { + // Pattern: (*TypeName)(nil) is a CallExpr + call, ok := expr.(*ast.CallExpr) + if !ok { + return "" + } + + // The function part should be (*TypeName) + paren, ok := call.Fun.(*ast.ParenExpr) + if !ok { + return "" + } + + // Inside parens should be *TypeName + star, ok := paren.X.(*ast.StarExpr) + if !ok { + return "" + } + + // Extract the type name + ident, ok := star.X.(*ast.Ident) + if !ok { + return "" + } + + return ident.Name +} + +// getUnderlyingStruct returns the underlying struct type if t is a struct or pointer to struct. +func getUnderlyingStruct(t types.Type) *types.Struct { + // Handle pointer types + if ptr, ok := t.(*types.Pointer); ok { + t = ptr.Elem() + } + + // Handle slice types - extract element type + if slice, ok := t.(*types.Slice); ok { + return getUnderlyingStruct(slice.Elem()) + } + + // Handle named types (including generics like configoptional.Optional[T]) + if named, ok := t.(*types.Named); ok { + // Check if this is an Optional[T] generic and unwrap T + if isOptionalType(t) { + typeArgs := named.TypeArgs() + // Recursively get the struct from the inner type + return getUnderlyingStruct(typeArgs.At(0)) + } + t = named.Underlying() + } + + if st, ok := t.(*types.Struct); ok { + return st + } + return nil +} + +// isOptionalType checks if a type is a generic Optional type (e.g., configoptional.Optional[T]). +// Uses type structure rather than string matching for robustness. +func isOptionalType(t types.Type) bool { + named, ok := t.(*types.Named) + if !ok { + return false + } + + // Check if it's a generic type with type arguments + if named.TypeArgs() == nil || named.TypeArgs().Len() == 0 { + return false + } + + // Check the type name (without package path) + return named.Obj().Name() == "Optional" +} + +// resolveTypeAlias resolves type aliases to their underlying basic types. +// For example, "type CustomString string" returns "string". +// Preserves well-known types like time.Duration that need special handling. +// For complex types (structs, etc.), returns the original type string. +func resolveTypeAlias(t types.Type) string { + // If it's a named type, check if it's an alias for a basic type + if named, ok := t.(*types.Named); ok { + typeName := named.Obj().Name() + pkgPath := "" + if named.Obj().Pkg() != nil { + pkgPath = named.Obj().Pkg().Path() + } + + // Preserve well-known types that need special handling + switch { + case pkgPath == "time" && (typeName == "Duration" || typeName == "Time"): + return t.String() + case typeName == "ID" || typeName == "Type": + // component.ID and component.Type + return t.String() + case strings.Contains(t.String(), "configopaque") || strings.Contains(t.String(), "configoptional"): + // Preserve config types for special handling + return t.String() + } + + underlying := named.Underlying() + // If the underlying type is a basic type, use that + if basic, ok := underlying.(*types.Basic); ok { + return basic.String() + } + } + // For all other cases, return the original type string + return t.String() +} diff --git a/cmd/mdatagen/internal/schemagen/generator.go b/cmd/mdatagen/internal/schemagen/generator.go new file mode 100644 index 00000000000..64ca87965c4 --- /dev/null +++ b/cmd/mdatagen/internal/schemagen/generator.go @@ -0,0 +1,265 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package schemagen // import "go.opentelemetry.io/collector/cmd/mdatagen/internal/schemagen" + +import ( + "fmt" + "maps" + "os" + "path/filepath" + "strings" + + "gopkg.in/yaml.v3" +) + +const ( + // jsonSchemaVersion is the JSON Schema draft version used. + jsonSchemaVersion = "https://json-schema.org/draft/2020-12/schema" + + // durationPattern is a regex pattern matching Go's time.ParseDuration format. + // Matches: "0", "10s", "1.5h", "1h30m", "-5m", etc. + // Valid units: ns, us (or µs), ms, s, m, h + // Uses actual Unicode characters for µ (micro sign U+00B5 and Greek mu U+03BC) + durationPattern = `^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$` +) + +// SchemaGenerator generates JSON schemas from Go struct information. +type SchemaGenerator struct { + outputDir string + analyzer *PackageAnalyzer +} + +// NewSchemaGenerator creates a new SchemaGenerator. +func NewSchemaGenerator(outputDir string, analyzer *PackageAnalyzer) *SchemaGenerator { + return &SchemaGenerator{ + outputDir: outputDir, + analyzer: analyzer, + } +} + +// GenerateSchema generates a YAML schema for the component's config. +// configTypeName is the name of the config type (e.g., "Config"). +// configPkgPath is the package path where the config is defined (empty for local package). +func (g *SchemaGenerator) GenerateSchema(componentKind, componentName, configTypeName, configPkgPath string) error { + structInfo, err := g.analyzer.analyzeConfig(configTypeName, configPkgPath) + if err != nil { + return fmt.Errorf("failed to analyze config: %w", err) + } + + schema := g.structToSchema(structInfo, componentKind, componentName) + + // Ensure output directory exists + if err = os.MkdirAll(g.outputDir, 0o700); err != nil { + return fmt.Errorf("failed to create output directory: %w", err) + } + + // Write schema to file + outputPath := filepath.Join(g.outputDir, "config_schema.yaml") + data, err := yaml.Marshal(schema) + if err != nil { + return fmt.Errorf("failed to marshal schema: %w", err) + } + + if err := os.WriteFile(outputPath, data, 0o600); err != nil { + return fmt.Errorf("failed to write schema: %w", err) + } + + return nil +} + +// structToSchema converts a StructInfo to a JSON Schema. +func (g *SchemaGenerator) structToSchema(info *StructInfo, componentKind, componentName string) *Schema { + schema := &Schema{ + Schema: jsonSchemaVersion, + Title: fmt.Sprintf("%s %s configuration", componentName, componentKind), + Description: info.Description, + Type: "object", + Properties: make(map[string]*Schema), + AdditionalProperties: false, + } + + for _, field := range info.Fields { + propSchema := g.fieldToSchema(&field) + if propSchema != nil { + if field.Embedded { + // For embedded structs, merge properties into parent + maps.Copy(schema.Properties, propSchema.Properties) + } else { + schema.Properties[field.JSONName] = propSchema + } + } + } + + return schema +} + +// fieldToSchema converts a FieldInfo to a JSON Schema property. +func (g *SchemaGenerator) fieldToSchema(field *FieldInfo) *Schema { + schema := &Schema{ + Description: field.Description, + } + + // Handle embedded structs + if field.Embedded && len(field.Fields) > 0 { + schema.Type = "object" + schema.Properties = make(map[string]*Schema) + schema.AdditionalProperties = false + for _, f := range field.Fields { + propSchema := g.fieldToSchema(&f) + if propSchema != nil { + if f.Embedded { + // For embedded/squashed fields, merge their properties into parent + maps.Copy(schema.Properties, propSchema.Properties) + } else { + schema.Properties[f.JSONName] = propSchema + } + } + } + return schema + } + + // Convert Go type to JSON Schema type + g.setSchemaType(schema, field.Type) + + // Handle nested struct fields + if len(field.Fields) > 0 { + if schema.Type == "object" { + schema.Properties = make(map[string]*Schema) + schema.AdditionalProperties = false + for _, f := range field.Fields { + propSchema := g.fieldToSchema(&f) + if propSchema != nil { + if f.Embedded { + // For embedded/squashed fields, merge their properties into parent + maps.Copy(schema.Properties, propSchema.Properties) + } else { + schema.Properties[f.JSONName] = propSchema + } + } + } + } else if schema.Type == "array" && schema.Items != nil && schema.Items.Type == "object" { + // For arrays of structs, populate items properties + schema.Items.Properties = make(map[string]*Schema) + schema.Items.AdditionalProperties = false + for _, f := range field.Fields { + propSchema := g.fieldToSchema(&f) + if propSchema != nil { + if f.Embedded { + // For embedded/squashed fields, merge their properties into parent + maps.Copy(schema.Items.Properties, propSchema.Properties) + } else { + schema.Items.Properties[f.JSONName] = propSchema + } + } + } + } + } + + return schema +} + +// setSchemaType sets the JSON Schema type based on Go type. +func (g *SchemaGenerator) setSchemaType(schema *Schema, goType string) { + // Remove package prefix for easier matching + typeName := goType + if idx := strings.LastIndex(typeName, "."); idx != -1 { + typeName = typeName[idx+1:] + } + + // Handle pointer types + if after, found := strings.CutPrefix(goType, "*"); found { + g.setSchemaType(schema, after) + return + } + + // Handle opaque string types (e.g., configopaque.String) - treat as string + if strings.HasSuffix(typeName, "String") && strings.Contains(goType, "opaque") { + schema.Type = "string" + return + } + + // Handle Optional[T] generic types - unwrap the inner type + if strings.Contains(goType, ".Optional[") { + innerType := extractOptionalInnerType(goType) + if innerType != "" { + g.setSchemaType(schema, innerType) + } + return + } + + // Handle slice types + if after, found := strings.CutPrefix(goType, "[]"); found { + schema.Type = "array" + itemSchema := &Schema{} + g.setSchemaType(itemSchema, after) + schema.Items = itemSchema + return + } + + // Handle map types + if strings.HasPrefix(goType, "map[") { + schema.Type = "object" + // Extract value type from map[string]ValueType + if _, valueType, found := strings.Cut(goType, "]"); found && valueType != "" { + addProps := &Schema{} + g.setSchemaType(addProps, valueType) + schema.AdditionalProperties = addProps + } + return + } + + // Handle common types + switch typeName { + case "string": + schema.Type = "string" + case "bool": + schema.Type = "boolean" + case "int", "int8", "int16", "int32", "int64", + "uint", "uint8", "uint16", "uint32", "uint64": + schema.Type = "integer" + case "float32", "float64": + schema.Type = "number" + case "Duration": + // time.Duration uses Go's duration format (e.g., "1h30m10s"), not ISO 8601 + schema.Type = "string" + schema.Pattern = durationPattern + case "Time": + schema.Type = "string" + schema.Format = "date-time" + case "ID", "Type": + // component.ID and component.Type are represented as strings in config + schema.Type = "string" + case "interface{}", "any": + // No specific type constraint + schema.Type = "" + default: + // Default to object for complex types + schema.Type = "object" + } +} + +// extractOptionalInnerType extracts the inner type from configoptional.Optional[T]. +func extractOptionalInnerType(goType string) string { + // Find the start of Optional[ + start := strings.Index(goType, "Optional[") + if start == -1 { + return "" + } + start += len("Optional[") + + // Find matching closing bracket + depth := 1 + for i := start; i < len(goType); i++ { + switch goType[i] { + case '[': + depth++ + case ']': + depth-- + if depth == 0 { + return goType[start:i] + } + } + } + return "" +} diff --git a/cmd/mdatagen/internal/schemagen/generator_test.go b/cmd/mdatagen/internal/schemagen/generator_test.go new file mode 100644 index 00000000000..a9eaf2ff8c6 --- /dev/null +++ b/cmd/mdatagen/internal/schemagen/generator_test.go @@ -0,0 +1,248 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package schemagen + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" + + "github.com/santhosh-tekuri/jsonschema/v6" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "gopkg.in/yaml.v3" +) + +func TestSchemaGenerator_GenerateSchema(t *testing.T) { + testDir := filepath.Join("..", "samplereceiver") + outputDir := t.TempDir() + + analyzer := NewPackageAnalyzer(testDir) + generator := NewSchemaGenerator(outputDir, analyzer) + + // Pass empty strings to test auto-detection of config type + err := generator.GenerateSchema("receiver", "sample", "", "") + require.NoError(t, err) + + // Check that schema file was created + schemaPath := filepath.Join(outputDir, "config_schema.yaml") + _, err = os.Stat(schemaPath) + require.NoError(t, err, "schema file was not created") + + // Read the generated schema + generatedData, err := os.ReadFile(schemaPath) //#nosec G304 -- test file path + require.NoError(t, err) + + // Read the expected schema from testdata + expectedPath := filepath.Join("testdata", "config_schema.yaml") + expectedData, err := os.ReadFile(expectedPath) //#nosec G304 -- test file path + require.NoError(t, err) + + // Compare YAML content by parsing both and comparing as JSON + var generatedSchema, expectedSchema any + require.NoError(t, yaml.Unmarshal(generatedData, &generatedSchema)) + require.NoError(t, yaml.Unmarshal(expectedData, &expectedSchema)) + + generatedJSON, err := json.Marshal(generatedSchema) + require.NoError(t, err) + expectedJSON, err := json.Marshal(expectedSchema) + require.NoError(t, err) + + assert.JSONEq(t, string(expectedJSON), string(generatedJSON)) +} + +func TestDetectConfigFromFactory(t *testing.T) { + // Test detection from createDefaultConfig() function + // The samplereceiver uses var _ component.Config = (*Config)(nil) pattern, + // but we can test that the detection chain works correctly + + testDir := filepath.Join("..", "samplereceiver") + analyzer := NewPackageAnalyzer(testDir) + + // Load the package + structInfo, err := analyzer.analyzeConfig("", "") + require.NoError(t, err) + + // Verify the config was detected (samplereceiver uses MyConfig) + assert.Equal(t, "MyConfig", structInfo.Name) + assert.Contains(t, structInfo.Package, "samplereceiver") + + // Verify fields were extracted + assert.NotEmpty(t, structInfo.Fields) + + // Check for known fields + fieldNames := make(map[string]bool) + for _, f := range structInfo.Fields { + fieldNames[f.JSONName] = true + } + assert.True(t, fieldNames["endpoint"], "expected 'endpoint' field") + assert.True(t, fieldNames["timeout"], "expected 'timeout' field") +} + +func TestParseTag(t *testing.T) { + tests := []struct { + name string + tag string + expectedName string + expectedSquash bool + }{ + { + name: "simple name", + tag: `mapstructure:"endpoint"`, + expectedName: "endpoint", + expectedSquash: false, + }, + { + name: "skip field with dash", + tag: `mapstructure:"-"`, + expectedName: "-", + expectedSquash: false, + }, + { + name: "squash tag", + tag: `mapstructure:",squash"`, + expectedName: "", + expectedSquash: true, + }, + { + name: "name with squash", + tag: `mapstructure:"config,squash"`, + expectedName: "config", + expectedSquash: true, + }, + { + name: "empty mapstructure", + tag: `json:"foo"`, + expectedName: "", + expectedSquash: false, + }, + { + name: "omitempty option", + tag: `mapstructure:"field,omitempty"`, + expectedName: "field", + expectedSquash: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + name, squash := parseTag(tc.tag) + assert.Equal(t, tc.expectedName, name, "unexpected name") + assert.Equal(t, tc.expectedSquash, squash, "unexpected squash") + }) + } +} + +func TestSetSchemaType(t *testing.T) { + g := &SchemaGenerator{} + + tests := []struct { + goType string + expectedType string + format string + pattern string + }{ + {"string", "string", "", ""}, + {"bool", "boolean", "", ""}, + {"int", "integer", "", ""}, + {"int64", "integer", "", ""}, + {"float64", "number", "", ""}, + {"time.Duration", "string", "", `^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$`}, + {"[]string", "array", "", ""}, + {"map[string]string", "object", "", ""}, + {"go.opentelemetry.io/collector/config/configopaque.String", "string", "", ""}, + {"go.opentelemetry.io/collector/config/configoptional.Optional[string]", "string", "", ""}, + {"go.opentelemetry.io/collector/config/configoptional.Optional[int]", "integer", "", ""}, + } + + for _, tc := range tests { + t.Run(tc.goType, func(t *testing.T) { + schema := &Schema{} + g.setSchemaType(schema, tc.goType) + if schema.Type != tc.expectedType { + t.Errorf("expected type %q, got %q", tc.expectedType, schema.Type) + } + if tc.format != "" && schema.Format != tc.format { + t.Errorf("expected format %q, got %q", tc.format, schema.Format) + } + if tc.pattern != "" && schema.Pattern != tc.pattern { + t.Errorf("expected pattern %q, got %q", tc.pattern, schema.Pattern) + } + }) + } +} + +func TestSchemaValidation(t *testing.T) { + // Load the YAML schema + schemaPath := filepath.Join("testdata", "config_schema.yaml") + schemaData, err := os.ReadFile(schemaPath) //#nosec G304 -- test file path + require.NoError(t, err, "failed to read schema file") + + // Parse the schema YAML and convert to JSON-compatible format + var schemaDoc any + err = yaml.Unmarshal(schemaData, &schemaDoc) + require.NoError(t, err, "failed to parse schema YAML") + + // Convert to JSON and back to ensure JSON-compatible types + jsonBytes, err := json.Marshal(schemaDoc) + require.NoError(t, err, "failed to convert schema to JSON") + err = json.Unmarshal(jsonBytes, &schemaDoc) + require.NoError(t, err, "failed to parse schema JSON") + + // Compile the schema + compiler := jsonschema.NewCompiler() + err = compiler.AddResource("config_schema.json", schemaDoc) + require.NoError(t, err, "failed to add schema resource") + + schema, err := compiler.Compile("config_schema.json") + require.NoError(t, err, "failed to compile schema") + + tests := []struct { + name string + configFile string + expectValid bool + }{ + { + name: "valid configuration", + configFile: "samplereceiver_config.yaml", + expectValid: true, + }, + { + name: "invalid configuration with type mismatches", + configFile: "samplereceiver_invalid_config.yaml", + expectValid: false, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // Load the YAML config + configPath := filepath.Join("testdata", tc.configFile) + configData, err := os.ReadFile(configPath) //#nosec G304 -- test file path + require.NoError(t, err, "failed to read config file") + + // Parse YAML to interface{} + var config any + err = yaml.Unmarshal(configData, &config) + require.NoError(t, err, "failed to parse YAML") + + // Convert to JSON-compatible format via round-trip + jsonBytes, err := json.Marshal(config) + require.NoError(t, err, "failed to marshal config to JSON") + err = json.Unmarshal(jsonBytes, &config) + require.NoError(t, err, "failed to unmarshal JSON") + + // Validate against schema + validationErr := schema.Validate(config) + + if tc.expectValid { + require.NoError(t, validationErr, "expected config to be valid") + } else { + require.Error(t, validationErr, "expected config to be invalid") + t.Logf("Validation errors (expected): %v", validationErr) + } + }) + } +} diff --git a/cmd/mdatagen/internal/schemagen/testdata/config_schema.yaml b/cmd/mdatagen/internal/schemagen/testdata/config_schema.yaml new file mode 100644 index 00000000000..2eb1ed83ead --- /dev/null +++ b/cmd/mdatagen/internal/schemagen/testdata/config_schema.yaml @@ -0,0 +1,87 @@ +$schema: https://json-schema.org/draft/2020-12/schema +title: sample receiver configuration +type: object +properties: + api_key: + description: APIKey is a secret API key (opaque string). + type: string + batch_size: + description: BatchSize is the number of items to send in each batch. + type: integer + custom_string: + description: CustomString is a custom string. + type: string + enabled: + description: Enabled controls whether the exporter is active. + type: boolean + endpoint: + description: Endpoint is the target URL to send data to. + type: string + endpoints: + description: Endpoints is a list of endpoint configurations. + type: array + items: + type: object + properties: + priority: + description: Priority is the endpoint priority. + type: integer + url: + description: URL is the endpoint URL. + type: string + additionalProperties: false + headers: + description: Headers are additional headers to include in requests. + type: object + additionalProperties: + type: string + host: + description: Host is the network host. + type: string + id: + description: ID is the component identifier. + type: string + optional_retry: + description: OptionalRetry is an optional retry configuration. + type: object + properties: + initial_interval: + description: InitialInterval is the initial retry interval. + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + max_retries: + description: MaxRetries is the maximum number of retries. + type: integer + additionalProperties: false + port: + description: Port is the network port. + type: integer + retry: + description: Retry contains retry configuration. + type: object + properties: + initial_interval: + description: InitialInterval is the initial retry interval. + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + max_retries: + description: MaxRetries is the maximum number of retries. + type: integer + additionalProperties: false + secrets: + description: Secrets is a list of secret key-value pairs. + type: object + start_time: + description: StartTime is the time when the receiver should start collecting data. + type: string + format: date-time + tags: + description: Tags are optional tags to attach. + type: array + items: + type: string + timeout: + description: Timeout is the maximum time to wait for a response. + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ +additionalProperties: false diff --git a/cmd/mdatagen/internal/schemagen/testdata/samplereceiver_config.yaml b/cmd/mdatagen/internal/schemagen/testdata/samplereceiver_config.yaml new file mode 100644 index 00000000000..da34a3c2ea7 --- /dev/null +++ b/cmd/mdatagen/internal/schemagen/testdata/samplereceiver_config.yaml @@ -0,0 +1,31 @@ +# Valid sample receiver configuration +host: localhost +port: 8080 +id: sample/myreceiver +endpoint: https://api.example.com/v1/data +custom_string: my-custom-value +timeout: 30s +start_time: "2024-01-15T10:30:00Z" +enabled: true +batch_size: 100 +headers: + Authorization: Bearer token123 + Content-Type: application/json +retry: + max_retries: 3 + initial_interval: 1s +tags: + - production + - critical +api_key: secret-api-key-12345 +optional_retry: + max_retries: 5 + initial_interval: 500ms +secrets: + key1: value1 + key2: value2 +endpoints: + - url: https://primary.example.com + priority: 1 + - url: https://backup.example.com + priority: 2 diff --git a/cmd/mdatagen/internal/schemagen/testdata/samplereceiver_invalid_config.yaml b/cmd/mdatagen/internal/schemagen/testdata/samplereceiver_invalid_config.yaml new file mode 100644 index 00000000000..716ae62e467 --- /dev/null +++ b/cmd/mdatagen/internal/schemagen/testdata/samplereceiver_invalid_config.yaml @@ -0,0 +1,10 @@ +# Invalid sample receiver configuration - has type mismatches +host: localhost +port: "not-a-number" # Should be integer, not string +endpoint: https://api.example.com/v1/data +timeout: 30s +enabled: "yes" # Should be boolean, not string +batch_size: 100 +retry: + max_retries: "three" # Should be integer, not string + initial_interval: 1s diff --git a/cmd/mdatagen/internal/schemagen/types.go b/cmd/mdatagen/internal/schemagen/types.go new file mode 100644 index 00000000000..88db5e06ecc --- /dev/null +++ b/cmd/mdatagen/internal/schemagen/types.go @@ -0,0 +1,51 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +package schemagen // import "go.opentelemetry.io/collector/cmd/mdatagen/internal/schemagen" + +// Schema represents a JSON Schema document (draft-07). +type Schema struct { + Schema string `yaml:"$schema,omitempty"` + ID string `yaml:"$id,omitempty"` + Title string `yaml:"title,omitempty"` + Description string `yaml:"description,omitempty"` + Type string `yaml:"type,omitempty"` + Properties map[string]*Schema `yaml:"properties,omitempty"` + Required []string `yaml:"required,omitempty"` + AdditionalProperties any `yaml:"additionalProperties,omitempty"` + Items *Schema `yaml:"items,omitempty"` + Enum []any `yaml:"enum,omitempty"` + Default any `yaml:"default,omitempty"` + Ref string `yaml:"$ref,omitempty"` + Definitions map[string]*Schema `yaml:"definitions,omitempty"` + OneOf []*Schema `yaml:"oneOf,omitempty"` + AnyOf []*Schema `yaml:"anyOf,omitempty"` + AllOf []*Schema `yaml:"allOf,omitempty"` + Format string `yaml:"format,omitempty"` + Minimum *float64 `yaml:"minimum,omitempty"` + Maximum *float64 `yaml:"maximum,omitempty"` + MinLength *int `yaml:"minLength,omitempty"` + MaxLength *int `yaml:"maxLength,omitempty"` + Pattern string `yaml:"pattern,omitempty"` + MinItems *int `yaml:"minItems,omitempty"` + MaxItems *int `yaml:"maxItems,omitempty"` +} + +// FieldInfo holds metadata about a struct field for schema generation. +type FieldInfo struct { + Name string // Go field name + JSONName string // Name in JSON/YAML (from mapstructure or json tag) + Type string // Go type string representation + Description string // Doc comment for the field + Default any // Default value if any + Embedded bool // Whether this is an embedded struct + Fields []FieldInfo // Nested fields for struct types +} + +// StructInfo holds metadata about a Go struct for schema generation. +type StructInfo struct { + Name string // Struct name + Package string // Package path + Description string // Doc comment for the struct + Fields []FieldInfo // Struct fields +} diff --git a/receiver/otlpreceiver/internal/metadata/schemas/config_schema.yaml b/receiver/otlpreceiver/internal/metadata/schemas/config_schema.yaml new file mode 100644 index 00000000000..aa989112223 --- /dev/null +++ b/receiver/otlpreceiver/internal/metadata/schemas/config_schema.yaml @@ -0,0 +1,421 @@ +$schema: https://json-schema.org/draft/2020-12/schema +title: otlp receiver configuration +type: object +properties: + grpc: + type: object + properties: + auth: + description: Auth configuration for outgoing RPCs. + type: object + properties: + authenticator: + description: AuthenticatorID specifies the name of the extension to use in order to authenticate the incoming data point. + type: string + additionalProperties: false + dialer: + description: DialerConfig contains options for connecting to an address. + type: object + properties: + timeout: + description: |- + Timeout is the maximum amount of time a dial will wait for + a connect to complete. The default is no timeout. + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + additionalProperties: false + endpoint: + description: |- + Endpoint configures the address for this network connection. + For TCP and UDP networks, the address has the form "host:port". The host must be a literal IP address, + or a host name that can be resolved to IP addresses. The port must be a literal port number or a service name. + If the host is a literal IPv6 address it must be enclosed in square brackets, as in "[2001:db8::1]:80" or + "[fe80::1%zone]:80". The zone specifies the scope of the literal IPv6 address as defined in RFC 4007. + type: string + include_metadata: + description: Include propagates the incoming connection's metadata to downstream consumers. + type: boolean + keepalive: + description: |- + The keepalive parameters for gRPC client. See grpc.WithKeepaliveParams. + (https://godoc.org/google.golang.org/grpc#WithKeepaliveParams). + type: object + properties: + enforcement_policy: + type: object + properties: + min_time: + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + permit_without_stream: + type: boolean + additionalProperties: false + server_parameters: + type: object + properties: + max_connection_age: + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + max_connection_age_grace: + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + max_connection_idle: + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + time: + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + timeout: + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + additionalProperties: false + additionalProperties: false + max_concurrent_streams: + description: |- + MaxConcurrentStreams sets the limit on the number of concurrent streams to each ServerTransport. + It has effect only for streaming RPCs. + type: integer + max_recv_msg_size_mib: + description: MaxRecvMsgSizeMiB sets the maximum size (in MiB) of messages accepted by the server. + type: integer + middlewares: + description: Middlewares for the gRPC client. + type: array + items: + type: object + properties: + id: + description: ID specifies the name of the extension to use. + type: string + additionalProperties: false + read_buffer_size: + description: |- + ReadBufferSize for gRPC client. See grpc.WithReadBufferSize. + (https://godoc.org/google.golang.org/grpc#WithReadBufferSize). + type: integer + tls: + description: TLS struct exposes TLS client configuration. + type: object + properties: + ca_file: + description: |- + Path to the CA cert. For a client this verifies the server certificate. + For a server this verifies client certificates. If empty uses system root CA. + (optional) + type: string + ca_pem: + description: In memory PEM encoded cert. (optional) + type: string + cert_file: + description: Path to the TLS cert to use for TLS required connections. (optional) + type: string + cert_pem: + description: In memory PEM encoded TLS cert to use for TLS required connections. (optional) + type: string + cipher_suites: + description: |- + CipherSuites is a list of TLS cipher suites that the TLS transport can use. + If left blank, a safe default list is used. + See https://go.dev/src/crypto/tls/cipher_suites.go for a list of supported cipher suites. + type: array + items: + type: string + client_ca_file: + description: |- + Path to the TLS cert to use by the server to verify a client certificate. (optional) + This sets the ClientCAs and ClientAuth to RequireAndVerifyClientCert in the TLSConfig. Please refer to + https://godoc.org/crypto/tls#Config for more information. (optional) + type: string + client_ca_file_reload: + description: |- + Reload the ClientCAs file when it is modified + (optional, default false) + type: boolean + curve_preferences: + description: |- + contains the elliptic curves that will be used in + an ECDHE handshake, in preference order + Defaults to empty list and "crypto/tls" defaults are used, internally. + type: array + items: + type: string + include_system_ca_certs_pool: + description: |- + If true, load system CA certificates pool in addition to the certificates + configured in this struct. + type: boolean + key_file: + description: Path to the TLS key to use for TLS required connections. (optional) + type: string + key_pem: + description: In memory PEM encoded TLS key to use for TLS required connections. (optional) + type: string + max_version: + description: |- + MaxVersion sets the maximum TLS version that is acceptable. + If not set, refer to crypto/tls for defaults. (optional) + type: string + min_version: + description: |- + MinVersion sets the minimum TLS version that is acceptable. + If not set, TLS 1.2 will be used. (optional) + type: string + reload_interval: + description: |- + ReloadInterval specifies the duration after which the certificate will be reloaded + If not set, it will never be reloaded (optional) + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + tpm: + description: Trusted platform module configuration + type: object + properties: + auth: + type: string + enabled: + type: boolean + owner_auth: + type: string + path: + description: |- + The path to the TPM device or Unix domain socket. + For instance /dev/tpm0 or /dev/tpmrm0. + type: string + additionalProperties: false + additionalProperties: false + transport: + description: |- + Transport to use. Allowed protocols are "tcp", "tcp4" (IPv4-only), "tcp6" (IPv6-only), "udp", "udp4" (IPv4-only), + "udp6" (IPv6-only), "ip", "ip4" (IPv4-only), "ip6" (IPv6-only), "unix", "unixgram" and "unixpacket". + type: string + write_buffer_size: + description: |- + WriteBufferSize for gRPC gRPC. See grpc.WithWriteBufferSize. + (https://godoc.org/google.golang.org/grpc#WithWriteBufferSize). + type: integer + additionalProperties: false + http: + type: object + properties: + auth: + description: Auth configuration for outgoing HTTP calls. + type: object + properties: + authenticator: + description: AuthenticatorID specifies the name of the extension to use in order to authenticate the incoming data point. + type: string + request_params: + description: |- + RequestParameters is a list of parameters that should be extracted from the request and added to the context. + When a parameter is found in both the query string and the header, the value from the query string will be used. + type: array + items: + type: string + additionalProperties: false + compression_algorithms: + description: 'CompressionAlgorithms configures the list of compression algorithms the server can accept. Default: ["", "gzip", "zstd", "zlib", "snappy", "deflate"]' + type: array + items: + type: string + cors: + description: CORS configures the server for HTTP cross-origin resource sharing (CORS). + type: object + properties: + allowed_headers: + description: |- + AllowedHeaders sets what headers will be allowed in CORS requests. + The Accept, Accept-Language, Content-Type, and Content-Language + headers are implicitly allowed. If no headers are listed, + X-Requested-With will also be accepted by default. Include "*" to + allow any request header. + type: array + items: + type: string + allowed_origins: + description: |- + AllowedOrigins sets the allowed values of the Origin header for + HTTP/JSON requests to an OTLP receiver. An origin may contain a + wildcard (*) to replace 0 or more characters (e.g., + "http://*.domain.com", or "*" to allow any origin). + type: array + items: + type: string + max_age: + description: |- + MaxAge sets the value of the Access-Control-Max-Age response header. + Set it to the number of seconds that browsers should cache a CORS + preflight response for. + type: integer + additionalProperties: false + endpoint: + description: 'The target URL to send data to (e.g.: http://some.url:9411/v1/traces).' + type: string + idle_timeout: + description: |- + IdleTimeout is the maximum amount of time to wait for the + next request when keep-alives are enabled. If IdleTimeout + is zero, the value of ReadTimeout is used. If both are + zero, there is no timeout. + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + include_metadata: + description: IncludeMetadata propagates the client metadata from the incoming requests to the downstream consumers + type: boolean + keep_alives_enabled: + description: |- + KeepAlivesEnabled controls whether HTTP keep-alives are enabled. + By default, keep-alives are always enabled. Only very resource-constrained environments should disable them. + type: boolean + logs_url_path: + description: The URL path to receive logs on. If omitted "/v1/logs" will be used. + type: string + max_request_body_size: + description: 'MaxRequestBodySize sets the maximum request body size in bytes. Default: 20MiB.' + type: integer + metrics_url_path: + description: The URL path to receive metrics on. If omitted "/v1/metrics" will be used. + type: string + middlewares: + description: |- + Middlewares are used to add custom functionality to the HTTP client. + Middleware handlers are called in the order they appear in this list, + with the first middleware becoming the outermost handler. + type: array + items: + type: object + properties: + id: + description: ID specifies the name of the extension to use. + type: string + additionalProperties: false + read_header_timeout: + description: |- + ReadHeaderTimeout is the amount of time allowed to read + request headers. The connection's read deadline is reset + after reading the headers and the Handler can decide what + is considered too slow for the body. If ReadHeaderTimeout + is zero, the value of ReadTimeout is used. If both are + zero, there is no timeout. + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + read_timeout: + description: |- + ReadTimeout is the maximum duration for reading the entire + request, including the body. A zero or negative value means + there will be no timeout. + + Because ReadTimeout does not let Handlers make per-request + decisions on each request body's acceptable deadline or + upload rate, most users will prefer to use + ReadHeaderTimeout. It is valid to use them both. + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + response_headers: + description: |- + Additional headers attached to each HTTP response sent to the client. + Header values are opaque since they may be sensitive. + type: object + tls: + description: TLS struct exposes TLS client configuration. + type: object + properties: + ca_file: + description: |- + Path to the CA cert. For a client this verifies the server certificate. + For a server this verifies client certificates. If empty uses system root CA. + (optional) + type: string + ca_pem: + description: In memory PEM encoded cert. (optional) + type: string + cert_file: + description: Path to the TLS cert to use for TLS required connections. (optional) + type: string + cert_pem: + description: In memory PEM encoded TLS cert to use for TLS required connections. (optional) + type: string + cipher_suites: + description: |- + CipherSuites is a list of TLS cipher suites that the TLS transport can use. + If left blank, a safe default list is used. + See https://go.dev/src/crypto/tls/cipher_suites.go for a list of supported cipher suites. + type: array + items: + type: string + client_ca_file: + description: |- + Path to the TLS cert to use by the server to verify a client certificate. (optional) + This sets the ClientCAs and ClientAuth to RequireAndVerifyClientCert in the TLSConfig. Please refer to + https://godoc.org/crypto/tls#Config for more information. (optional) + type: string + client_ca_file_reload: + description: |- + Reload the ClientCAs file when it is modified + (optional, default false) + type: boolean + curve_preferences: + description: |- + contains the elliptic curves that will be used in + an ECDHE handshake, in preference order + Defaults to empty list and "crypto/tls" defaults are used, internally. + type: array + items: + type: string + include_system_ca_certs_pool: + description: |- + If true, load system CA certificates pool in addition to the certificates + configured in this struct. + type: boolean + key_file: + description: Path to the TLS key to use for TLS required connections. (optional) + type: string + key_pem: + description: In memory PEM encoded TLS key to use for TLS required connections. (optional) + type: string + max_version: + description: |- + MaxVersion sets the maximum TLS version that is acceptable. + If not set, refer to crypto/tls for defaults. (optional) + type: string + min_version: + description: |- + MinVersion sets the minimum TLS version that is acceptable. + If not set, TLS 1.2 will be used. (optional) + type: string + reload_interval: + description: |- + ReloadInterval specifies the duration after which the certificate will be reloaded + If not set, it will never be reloaded (optional) + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + tpm: + description: Trusted platform module configuration + type: object + properties: + auth: + type: string + enabled: + type: boolean + owner_auth: + type: string + path: + description: |- + The path to the TPM device or Unix domain socket. + For instance /dev/tpm0 or /dev/tpmrm0. + type: string + additionalProperties: false + additionalProperties: false + traces_url_path: + description: The URL path to receive traces on. If omitted "/v1/traces" will be used. + type: string + write_timeout: + description: |- + WriteTimeout is the maximum duration before timing out + writes of the response. It is reset whenever a new + request's header is read. Like ReadTimeout, it does not + let Handlers make decisions on a per-request basis. + A zero or negative value means there will be no timeout. + type: string + pattern: ^(0|[-+]?((\d+(\.\d*)?|\.\d+)(ns|us|µs|μs|ms|s|m|h))+)$ + additionalProperties: false +additionalProperties: false diff --git a/receiver/otlpreceiver/metadata.yaml b/receiver/otlpreceiver/metadata.yaml index adb2cae28f0..91901c3d37c 100644 --- a/receiver/otlpreceiver/metadata.yaml +++ b/receiver/otlpreceiver/metadata.yaml @@ -8,3 +8,6 @@ status: stable: [traces, metrics, logs] development: [profiles] distributions: [core, contrib, k8s, otlp] + +schema: + enabled: true \ No newline at end of file