diff --git a/CHANGELOG.md b/CHANGELOG.md index e8a327cfd..a3abbc336 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ All notable changes to this project will be documented in this file. - Unified output handling across all registry subcommands. Builtin output formats (json, yaml, jsonl) are now available in `registry stats`, `registry diff`, and `registry resolve`. `registry stats` also supports `--templates` for custom text output templates. ([#1200](https://github.com/open-telemetry/weaver/pull/1200) by @jerbly) - New feature ([#1152](https://github.com/open-telemetry/weaver/issues/1152)) - Live-check with `--emit-otlp-logs` will now include the attributes from the resource in the emitted log record, this helps to identify the source of the finding in a multi-source environment. ([#1206](https://github.com/open-telemetry/weaver/pull/1206) by @jerbly) - New Experimental feature: `weaver registry infer` command that listens for OTLP telemetry and infers a semantic convention registry file from the received data. Supports spans, metrics, events, and resource attributes. Includes configurable gRPC address/port, admin server for graceful shutdown, and inactivity timeout. ([#1138](https://github.com/open-telemetry/weaver/pull/1138) by @ArthurSens) +- Use `schema_url` in registry manifest, dependencies, and resolved schema instead of `registry_url`. Parse registry name and version + from it. ([#1202](https://github.com/open-telemetry/weaver/pull/1202) by @lmolkova) +- Default to `manifest.yaml` for registry manifest file, deprecate `registry_manifest.yaml` and add warning when it's used. ([#1202](https://github.com/open-telemetry/weaver/pull/1202) by @lmolkova) # [0.21.2] - 2026-02-03 diff --git a/Cargo.lock b/Cargo.lock index e9f9e075f..a71fdd0cc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5892,6 +5892,7 @@ dependencies = [ "serde_yaml", "thiserror 2.0.18", "ureq", + "url", "utoipa", "walkdir", "weaver_common", diff --git a/crates/weaver_codegen_test/build.rs b/crates/weaver_codegen_test/build.rs index 1b282494d..e21227c1a 100644 --- a/crates/weaver_codegen_test/build.rs +++ b/crates/weaver_codegen_test/build.rs @@ -42,8 +42,8 @@ fn main() { let registry_path = VirtualDirectoryPath::LocalFolder { path: SEMCONV_REGISTRY_PATH.into(), }; - let registry_repo = - RegistryRepo::try_new("main", ®istry_path).unwrap_or_else(|e| process_error(&logger, e)); + let registry_repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![]) + .unwrap_or_else(|e| process_error(&logger, e)); let loaded = SchemaResolver::load_semconv_repository(registry_repo, FOLLOW_SYMLINKS) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal() diff --git a/crates/weaver_emit/src/lib.rs b/crates/weaver_emit/src/lib.rs index 1f75880ef..e16d854fa 100644 --- a/crates/weaver_emit/src/lib.rs +++ b/crates/weaver_emit/src/lib.rs @@ -590,7 +590,7 @@ mod tests { }; let registry = ForgeResolvedRegistry { - registry_url: "TEST_V2".to_owned(), + schema_url: "https://example.com/schemas/1.2.3".try_into().unwrap(), registry: Registry { attributes: vec![], attribute_groups: vec![], diff --git a/crates/weaver_forge/src/lib.rs b/crates/weaver_forge/src/lib.rs index c1213eba9..ba8102ec0 100644 --- a/crates/weaver_forge/src/lib.rs +++ b/crates/weaver_forge/src/lib.rs @@ -842,6 +842,7 @@ mod tests { use weaver_diff::diff_dir; use weaver_resolver::{LoadedSemconvRegistry, SchemaResolver}; use weaver_semconv::registry_repo::RegistryRepo; + use weaver_semconv::schema_url::SchemaUrl; use crate::config::{ApplicationMode, CaseConvention, Params, TemplateConfig, WeaverConfig}; use crate::debug::print_dedup_errors; @@ -855,12 +856,16 @@ mod tests { cli_params: Params, ignore_non_fatal_errors: bool, ) -> (TemplateEngine, ResolvedRegistry, PathBuf, PathBuf) { - let registry_id = "default"; + let schema_url: Option = Some( + "https://default/1.0.0" + .try_into() + .expect("Should be valid schema url"), + ); let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let repo = - RegistryRepo::try_new(registry_id, &path).expect("Failed to construct repository"); + let repo = RegistryRepo::try_new(schema_url, &path, &mut vec![]) + .expect("Failed to construct repository"); let registry_result = SchemaResolver::load_semconv_repository(repo, false); // SemConvRegistry::try_from_path_pattern(registry_id, "data/*.yaml"); let registry = if ignore_non_fatal_errors { @@ -1056,12 +1061,16 @@ mod tests { }); engine.target_config.templates = Some(templates); - let registry_id = "default"; let path: VirtualDirectoryPath = "data/registry" .try_into() .expect("Invalid virtual directory path string"); - let repo = - RegistryRepo::try_new(registry_id, &path).expect("Failed to construct repository"); + let schema_url: Option = Some( + "https://default/1.0.0" + .try_into() + .expect("Should be valid schema url"), + ); + let repo = RegistryRepo::try_new(schema_url, &path, &mut vec![]) + .expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() .expect("Failed to load registry") @@ -1186,12 +1195,14 @@ mod tests { #[test] fn test_comment_format() { - let registry_id = "default"; let path: VirtualDirectoryPath = "data/mini_registry_for_comments" .try_into() .expect("Invalid virtual directory path string"); - let repo = - RegistryRepo::try_new(registry_id, &path).expect("Failed to construct repository"); + let schema_url: SchemaUrl = "https://default/1.0.0" + .try_into() + .expect("Should be valid schema url"); + let repo = RegistryRepo::try_new(Some(schema_url), &path, &mut vec![]) + .expect("Failed to construct repository"); let loaded = SchemaResolver::load_semconv_repository(repo, false) .into_result_with_non_fatal() .expect("Failed to load registry") diff --git a/crates/weaver_forge/src/v2/registry.rs b/crates/weaver_forge/src/v2/registry.rs index 7c9595756..51a64caab 100644 --- a/crates/weaver_forge/src/v2/registry.rs +++ b/crates/weaver_forge/src/v2/registry.rs @@ -3,6 +3,7 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use weaver_resolved_schema::{attribute::AttributeRef, v2::catalog::AttributeCatalog}; +use weaver_semconv::schema_url::SchemaUrl; use crate::{ error::Error, @@ -24,8 +25,7 @@ use crate::{ #[serde(deny_unknown_fields)] pub struct ForgeResolvedRegistry { /// The semantic convention registry url. - #[serde(skip_serializing_if = "String::is_empty")] - pub registry_url: String, + pub schema_url: SchemaUrl, // TODO - Attribute Groups /// The signals defined in this registry. pub registry: Registry, @@ -413,7 +413,7 @@ impl ForgeResolvedRegistry { } Ok(Self { - registry_url: schema.schema_url.clone(), + schema_url: schema.schema_url.clone(), registry: Registry { attributes, attribute_groups, @@ -448,8 +448,7 @@ mod tests { fn test_try_from_resolved_schema() { let resolved_schema = ResolvedTelemetrySchema { file_format: "2.0.0".to_owned(), - schema_url: "https://example.com/schema".to_owned(), - registry_id: "my-registry".to_owned(), + schema_url: "https://example.com/schema".try_into().unwrap(), attribute_catalog: vec![attribute::Attribute { key: "test.attr".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -457,7 +456,6 @@ mod tests { common: CommonFields::default(), }], registry: v2::registry::Registry { - registry_url: "https://example.com/registry".to_owned(), attributes: vec![attribute::AttributeRef(0)], spans: vec![span::Span { r#type: SignalId::from("my-span".to_owned()), @@ -563,7 +561,6 @@ mod tests { }, }], }, - registry_manifest: None, }; let forge_registry = @@ -613,11 +610,9 @@ mod tests { fn test_try_from_resolved_schema_with_missing_attribute() { let resolved_schema = ResolvedTelemetrySchema { file_format: "2.0.0".to_owned(), - schema_url: "https://example.com/schema".to_owned(), - registry_id: "my-registry".to_owned(), + schema_url: "https://example.com/schema".try_into().unwrap(), attribute_catalog: vec![], registry: v2::registry::Registry { - registry_url: "https://example.com/registry".to_owned(), attributes: vec![], // No attributes - This is the logic bug. spans: vec![span::Span { r#type: SignalId::from("my-span".to_owned()), @@ -645,7 +640,6 @@ mod tests { metrics: vec![], events: vec![], }, - registry_manifest: None, }; let result = ForgeResolvedRegistry::try_from(resolved_schema); diff --git a/crates/weaver_live_check/src/lib.rs b/crates/weaver_live_check/src/lib.rs index 6308ead93..d012503df 100644 --- a/crates/weaver_live_check/src/lib.rs +++ b/crates/weaver_live_check/src/lib.rs @@ -116,9 +116,9 @@ pub const DEFAULT_LIVE_CHECK_JQ: &str = include_str!("../../../defaults/jq/advic #[serde(untagged)] pub enum VersionedRegistry { /// v1 ResolvedRegistry - V1(ResolvedRegistry), + V1(Box), /// v2 ForgeResolvedRegistry - V2(ForgeResolvedRegistry), + V2(Box), } /// Versioned enum for the attribute diff --git a/crates/weaver_live_check/src/live_checker.rs b/crates/weaver_live_check/src/live_checker.rs index 132ac7ff6..075a8476b 100644 --- a/crates/weaver_live_check/src/live_checker.rs +++ b/crates/weaver_live_check/src/live_checker.rs @@ -507,8 +507,10 @@ mod tests { fn make_registry(use_v2: bool) -> VersionedRegistry { if use_v2 { - VersionedRegistry::V2(ForgeResolvedRegistry { - registry_url: "TEST".to_owned(), + VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { + schema_url: "https://example.com/schemas/1.2.3" + .try_into() + .expect("Should be valid schema url"), registry: Registry { attributes: vec![ V2Attribute { @@ -609,9 +611,9 @@ mod tests { spans: vec![], events: vec![], }, - }) + })) } else { - VersionedRegistry::V1(ResolvedRegistry { + VersionedRegistry::V1(Box::new(ResolvedRegistry { registry_url: "TEST".to_owned(), groups: vec![ResolvedGroup { id: "test.comprehensive.internal".to_owned(), @@ -750,7 +752,7 @@ mod tests { body: None, annotations: None, }], - }) + })) } } @@ -793,8 +795,10 @@ mod tests { }, }; - VersionedRegistry::V2(ForgeResolvedRegistry { - registry_url: "TEST_METRICS".to_owned(), + VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { + schema_url: "https://example.com/schemas/1.2.3" + .try_into() + .expect("Should be valid schema url"), registry: Registry { attributes: vec![memory_state_attr.clone()], attribute_groups: vec![], @@ -842,9 +846,9 @@ mod tests { spans: vec![], events: vec![], }, - }) + })) } else { - VersionedRegistry::V1(ResolvedRegistry { + VersionedRegistry::V1(Box::new(ResolvedRegistry { registry_url: "TEST_METRICS".to_owned(), groups: vec![ // Attribute group for system memory @@ -979,7 +983,7 @@ mod tests { annotations: None, }, ], - }) + })) } } @@ -1001,9 +1005,10 @@ mod tests { }, }; - VersionedRegistry::V2(ForgeResolvedRegistry { - registry_url: "TEST".to_owned(), - + VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { + schema_url: "https://example.com/schemas/1.2.3" + .try_into() + .expect("Should be valid schema url"), registry: Registry { attributes: vec![custom_string_attr.clone()], attribute_groups: vec![], @@ -1038,9 +1043,9 @@ mod tests { spans: vec![], events: vec![], }, - }) + })) } else { - VersionedRegistry::V1(ResolvedRegistry { + VersionedRegistry::V1(Box::new(ResolvedRegistry { registry_url: "TEST".to_owned(), groups: vec![ResolvedGroup { id: "custom.comprehensive.internal".to_owned(), @@ -1085,7 +1090,7 @@ mod tests { body: None, annotations: None, }], - }) + })) } } @@ -1516,8 +1521,10 @@ mod tests { }, }; - VersionedRegistry::V2(ForgeResolvedRegistry { - registry_url: "TEST_EVENTS".to_owned(), + VersionedRegistry::V2(Box::new(ForgeResolvedRegistry { + schema_url: "https://example.com/schemas/1.2.3" + .try_into() + .expect("Should be valid schema url"), registry: Registry { attributes: vec![session_id_attr.clone(), session_previous_id_attr.clone()], attribute_groups: vec![], @@ -1591,9 +1598,9 @@ mod tests { spans: vec![], events: vec![], }, - }) + })) } else { - VersionedRegistry::V1(ResolvedRegistry { + VersionedRegistry::V1(Box::new(ResolvedRegistry { registry_url: "TEST_EVENTS".to_owned(), groups: vec![ ResolvedGroup { @@ -1704,7 +1711,7 @@ mod tests { }), }, ], - }) + })) } } diff --git a/crates/weaver_live_check/src/stats.rs b/crates/weaver_live_check/src/stats.rs index 70c0cab64..ce90bc635 100644 --- a/crates/weaver_live_check/src/stats.rs +++ b/crates/weaver_live_check/src/stats.rs @@ -351,7 +351,7 @@ mod tests { groups: vec![], registry_url: String::new(), }; - let versioned_registry = VersionedRegistry::V1(registry); + let versioned_registry = VersionedRegistry::V1(Box::new(registry)); let mut disabled_stats = LiveCheckStatistics::Disabled(DisabledStatistics); let mut normal_stats = diff --git a/crates/weaver_mcp/src/service.rs b/crates/weaver_mcp/src/service.rs index 5f945310f..2982af24f 100644 --- a/crates/weaver_mcp/src/service.rs +++ b/crates/weaver_mcp/src/service.rs @@ -58,7 +58,7 @@ impl WeaverMcpService { let search_context = Arc::new(SearchContext::from_registry(®istry)); // Create versioned registry wrapper once for live check - let versioned_registry = Arc::new(VersionedRegistry::V2((*registry).clone())); + let versioned_registry = Arc::new(VersionedRegistry::V2(Box::new((*registry).clone()))); Self { search_context, @@ -392,7 +392,7 @@ mod tests { fn make_test_registry() -> ForgeResolvedRegistry { ForgeResolvedRegistry { - registry_url: "test".to_owned(), + schema_url: "https://todo/1.0.0".try_into().unwrap(), registry: Registry { attributes: vec![Attribute { key: "http.request.method".to_owned(), diff --git a/crates/weaver_resolved_schema/src/error.rs b/crates/weaver_resolved_schema/src/error.rs index 99a1f3750..2986e22e7 100644 --- a/crates/weaver_resolved_schema/src/error.rs +++ b/crates/weaver_resolved_schema/src/error.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; use crate::attribute::AttributeRef; -use crate::error::Error::{AttributeNotFound, CompoundError, EventNameNotFound}; +use crate::error::Error::{AttributeNotFound, CompoundError, EventNameNotFound, InvalidSchemaUrl}; /// Errors emitted by this crate. #[derive(thiserror::Error, Debug, Clone, Deserialize, Serialize)] @@ -26,6 +26,16 @@ pub enum Error { group_id: String, }, + /// Cannot convert from V1 to V2 schema due to invalid schema URL. + #[error("Failed to convert from V1 to V2 schema, invalid schema URL: {url}, error: {error}")] + InvalidSchemaUrl { + /// The invalid schema URL. + url: String, + + /// The error message from the URL validation. + error: String, + }, + /// A generic container for multiple errors. #[error("Errors:\n{0:#?}")] CompoundError(Vec), @@ -53,6 +63,7 @@ impl Error { CompoundError(errors) => errors, e @ AttributeNotFound { .. } => vec![e], e @ EventNameNotFound { .. } => vec![e], + e @ InvalidSchemaUrl { .. } => vec![e], }) .collect(), ) diff --git a/crates/weaver_resolved_schema/src/lib.rs b/crates/weaver_resolved_schema/src/lib.rs index cde4f4947..9f6441c5e 100644 --- a/crates/weaver_resolved_schema/src/lib.rs +++ b/crates/weaver_resolved_schema/src/lib.rs @@ -298,13 +298,13 @@ impl ResolvedTelemetrySchema { if let Some(ref manifest) = self.registry_manifest { changes.set_head_manifest(weaver_version::schema_changes::RegistryManifest { - semconv_version: manifest.version.clone(), + semconv_version: manifest.version().to_owned(), }); } if let Some(ref manifest) = baseline_schema.registry_manifest { changes.set_baseline_manifest(weaver_version::schema_changes::RegistryManifest { - semconv_version: manifest.version.clone(), + semconv_version: manifest.version().to_owned(), }); } @@ -645,7 +645,7 @@ mod tests { #[test] fn detect_2_renamed_registry_attributes() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("http://test/schemas/1.0", "", ""); prior_schema.add_attribute_group( "registry.group1", [ @@ -659,7 +659,7 @@ mod tests { // 2 new attributes are added: attr2_bis and attr3_bis // attr2 is renamed attr2_bis // attr3 is renamed attr3_bis - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("http://test/schemas/2.0", "", ""); latest_schema.add_attribute_group( "registry.group1", [ @@ -805,9 +805,9 @@ mod tests { // TODO add many more group diff checks for various capabilities. #[test] fn detect_metric_name_change() { - let mut prior_schema = ResolvedTelemetrySchema::new("1.0", "test/base_version", ""); + let mut prior_schema = ResolvedTelemetrySchema::new("http://test/schemas/1.0", "", ""); prior_schema.add_metric_group("metrics.cpu.time", "cpu.time", [], None); - let mut latest_schema = ResolvedTelemetrySchema::new("1.0", "test/new_version", ""); + let mut latest_schema = ResolvedTelemetrySchema::new("http://test/schemas/2.0", "", ""); latest_schema.add_metric_group( "metrics.cpu.time", "cpu.time", diff --git a/crates/weaver_resolved_schema/src/v2/mod.rs b/crates/weaver_resolved_schema/src/v2/mod.rs index 28e2d7cc1..143fa2fef 100644 --- a/crates/weaver_resolved_schema/src/v2/mod.rs +++ b/crates/weaver_resolved_schema/src/v2/mod.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize}; use weaver_semconv::{ deprecated::Deprecated, group::GroupType, - manifest::RegistryManifest, + schema_url::SchemaUrl, v2::{ attribute_group::AttributeGroupVisibilitySpec, signal_id::SignalId, span::SpanName, CommonFields, @@ -50,18 +50,13 @@ pub struct ResolvedTelemetrySchema { /// Version of the file structure. pub file_format: String, /// Schema URL that this file is published at. - pub schema_url: String, - /// The ID of the registry that this schema belongs to. - pub registry_id: String, + pub schema_url: SchemaUrl, /// Catalog of attributes. Note: this will include duplicates for the same key. pub attribute_catalog: Vec, /// The registry that this schema belongs to. pub registry: Registry, /// Refinements for the registry pub refinements: Refinements, - /// The manifest of the registry. - #[serde(skip_serializing)] - pub registry_manifest: Option, } impl ResolvedTelemetrySchema { @@ -128,14 +123,22 @@ impl TryFrom for ResolvedTelemetrySchema { fn try_from(value: crate::ResolvedTelemetrySchema) -> Result { let (attribute_catalog, registry, refinements) = convert_v1_to_v2(value.catalog, value.registry)?; + let schema_url_str = value.schema_url.clone(); + let schema_url: SchemaUrl = + value + .schema_url + .try_into() + .map_err(|e| crate::error::Error::InvalidSchemaUrl { + url: schema_url_str, + error: e, + })?; + Ok(ResolvedTelemetrySchema { file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: value.schema_url, - registry_id: value.registry_id, + schema_url, attribute_catalog, registry, refinements, - registry_manifest: None, }) } } @@ -505,7 +508,6 @@ pub fn convert_v1_to_v2( } let v2_registry = Registry { - registry_url: r.registry_url, attributes, spans, metrics, @@ -989,11 +991,11 @@ mod tests { fn test_try_from_v1_to_v2() { let v1_schema = crate::ResolvedTelemetrySchema { file_format: V1_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: "my.schema.url".to_owned(), + schema_url: "http://test/schemas/1.0.0".to_owned(), registry_id: "my-registry".to_owned(), catalog: crate::catalog::Catalog::from_attributes(vec![]), registry: crate::registry::Registry { - registry_url: "my.schema.url".to_owned(), + registry_url: "http://another/url/1.0".to_owned(), groups: vec![], }, instrumentation_library: None, @@ -1007,8 +1009,10 @@ mod tests { assert!(v2_schema.is_ok()); let v2_schema = v2_schema.unwrap(); assert_eq!(v2_schema.file_format, V2_RESOLVED_FILE_FORMAT); - assert_eq!(v2_schema.schema_url, "my.schema.url"); - assert_eq!(v2_schema.registry_id, "my-registry"); + assert_eq!( + v2_schema.schema_url, + "http://test/schemas/1.0.0".try_into().unwrap() + ); } #[test] @@ -1216,13 +1220,13 @@ mod tests { fn empty_v2_schema() -> ResolvedTelemetrySchema { ResolvedTelemetrySchema { file_format: V2_RESOLVED_FILE_FORMAT.to_owned(), - schema_url: "my.schema.url".to_owned(), - registry_id: "main".to_owned(), + schema_url: "http://test/schemas/1.0" + .try_into() + .expect("Should be valid schema url"), attribute_catalog: vec![], registry: Registry { attributes: vec![], attribute_groups: vec![], - registry_url: "todo".to_owned(), spans: vec![], metrics: vec![], events: vec![], @@ -1233,7 +1237,6 @@ mod tests { metrics: vec![], events: vec![], }, - registry_manifest: None, } } } diff --git a/crates/weaver_resolved_schema/src/v2/registry.rs b/crates/weaver_resolved_schema/src/v2/registry.rs index bdea11ce1..147977427 100644 --- a/crates/weaver_resolved_schema/src/v2/registry.rs +++ b/crates/weaver_resolved_schema/src/v2/registry.rs @@ -35,11 +35,6 @@ pub struct Registry { /// Catalog of (public) attribute groups. pub attribute_groups: Vec, - /// The semantic convention registry url. - /// - /// This is the base URL, under which this registry can be found. - pub registry_url: String, - /// A list of span signal definitions. pub spans: Vec, @@ -267,7 +262,6 @@ mod test { }]; let registry = Registry { attribute_groups: vec![], - registry_url: "https://opentelemetry.io/schemas/1.23.0".to_owned(), spans: vec![Span { r#type: "test.span".to_owned().into(), kind: SpanKindSpec::Client, diff --git a/crates/weaver_resolver/data/circular-registry-test/registry_a/registry_manifest.yaml b/crates/weaver_resolver/data/circular-registry-test/registry_a/registry_manifest.yaml index 0fcd0ce52..8e5b52e46 100644 --- a/crates/weaver_resolver/data/circular-registry-test/registry_a/registry_manifest.yaml +++ b/crates/weaver_resolver/data/circular-registry-test/registry_a/registry_manifest.yaml @@ -3,5 +3,5 @@ description: Test registry A for circular dependency testing. semconv_version: 0.1.0 schema_base_url: https://example.com/registry_a/schemas/ dependencies: - - name: registry_b + - schema_url: https://example.com/registry_b/schemas/1.0.0 registry_path: data/circular-registry-test/registry_b \ No newline at end of file diff --git a/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml b/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml index d09a81eb1..d805ab3e9 100644 --- a/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml +++ b/crates/weaver_resolver/data/circular-registry-test/registry_b/registry_manifest.yaml @@ -4,4 +4,5 @@ semconv_version: 0.1.0 schema_base_url: https://example.com/registry_b/schemas/ dependencies: - name: registry_a + # schema_url: is not necessary here, we're using deprecated, but valid for now `name` registry_path: data/circular-registry-test/registry_a \ No newline at end of file diff --git a/crates/weaver_resolver/data/multi-registry/app_registry/registry_manifest.yaml b/crates/weaver_resolver/data/multi-registry/app_registry/registry_manifest.yaml index d9cf26bc9..be13985a8 100644 --- a/crates/weaver_resolver/data/multi-registry/app_registry/registry_manifest.yaml +++ b/crates/weaver_resolver/data/multi-registry/app_registry/registry_manifest.yaml @@ -3,5 +3,5 @@ description: This registry contains the semantic conventions for the App. semconv_version: 0.1.0 schema_base_url: https://app.com/schemas/ dependencies: - - name: acme + - schema_url: https://acme.com/schemas/0.1.0 registry_path: data/multi-registry/custom_registry diff --git a/crates/weaver_resolver/data/multi-registry/custom_registry/registry_manifest.yaml b/crates/weaver_resolver/data/multi-registry/custom_registry/registry_manifest.yaml index 711eb37f4..8e0a5081b 100644 --- a/crates/weaver_resolver/data/multi-registry/custom_registry/registry_manifest.yaml +++ b/crates/weaver_resolver/data/multi-registry/custom_registry/registry_manifest.yaml @@ -3,5 +3,5 @@ description: This registry contains the semantic conventions for the Acme vendor semconv_version: 0.1.0 schema_base_url: https://acme.com/schemas/ dependencies: - - name: otel + - schema_url: https://opentelemetry.io/schemas/1.30.0 registry_path: data/multi-registry/otel_registry diff --git a/crates/weaver_resolver/data/registry-test-published-1/expected-registry.json b/crates/weaver_resolver/data/registry-test-published-1/expected-registry.json index 0f2def153..e0b0e9af3 100644 --- a/crates/weaver_resolver/data/registry-test-published-1/expected-registry.json +++ b/crates/weaver_resolver/data/registry-test-published-1/expected-registry.json @@ -58,12 +58,12 @@ "name": "my-span", "lineage": { "provenance": { - "registry_id": "acme", + "registry_id": "acme.com/schemas", "path": "data/registry-test-published-1/registry/main.yaml" }, "attributes": { "a": { - "source_group": "v2_dependency.published", + "source_group": "v2_dependency.opentelemetry.io/schemas", "inherited_fields": [ "annotations", "brief", diff --git a/crates/weaver_resolver/data/registry-test-published-1/published/registry_manifest.yaml b/crates/weaver_resolver/data/registry-test-published-1/published/registry_manifest.yaml index f85d5ef60..6d7cb03d2 100644 --- a/crates/weaver_resolver/data/registry-test-published-1/published/registry_manifest.yaml +++ b/crates/weaver_resolver/data/registry-test-published-1/published/registry_manifest.yaml @@ -1,7 +1,5 @@ file_format: manifest/2.0.0 -name: resolved -description: Test repository that has been resolved. -version: 1.0.0 -repository_url: https://github.com/open-telemetry/weaver.git stability: stable -resolved_schema_url: resolved_schema.yaml +schema_url: https://opentelemetry.io/schemas/1.0.0 +resolved_schema_uri: resolved_schema.yaml +description: Test repository that has been resolved. \ No newline at end of file diff --git a/crates/weaver_resolver/data/registry-test-published-1/published/resolved_schema.yaml b/crates/weaver_resolver/data/registry-test-published-1/published/resolved_schema.yaml index 189cdb46f..96103a4f1 100644 --- a/crates/weaver_resolver/data/registry-test-published-1/published/resolved_schema.yaml +++ b/crates/weaver_resolver/data/registry-test-published-1/published/resolved_schema.yaml @@ -1,13 +1,11 @@ file_format: resolved/2.0.0 -schema_url: http://todo -registry_id: published +schema_url: https://opentelemetry.io/schemas/1.0.0 attribute_catalog: - key: a type: string brief: test a stability: stable registry: - registry_url: todo-why? attributes: - 0 attribute_groups: diff --git a/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml b/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml index ad5d44275..d48e2ab4e 100644 --- a/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml +++ b/crates/weaver_resolver/data/registry-test-published-1/registry/registry_manifest.yaml @@ -3,5 +3,5 @@ description: This registry contains the semantic conventions for the Acme vendor semconv_version: 0.1.0 schema_base_url: https://acme.com/schemas/ dependencies: - - name: published + - schema_url: https://example.com/schemas/1.2.3 registry_path: data/registry-test-published-1/published diff --git a/crates/weaver_resolver/src/attribute.rs b/crates/weaver_resolver/src/attribute.rs index 99f782b3f..908101c7f 100644 --- a/crates/weaver_resolver/src/attribute.rs +++ b/crates/weaver_resolver/src/attribute.rs @@ -321,7 +321,7 @@ impl AttributeLookup for V1Schema { impl AttributeLookup for V2Schema { fn lookup_attribute(&self, key: &str) -> Option { - let fake_group_id = format!("v2_dependency.{}", self.registry_id); + let fake_group_id = format!("v2_dependency.{}", self.schema_url.name()); self.attribute_catalog.iter().find_map(|attr| { if attr.key == key { Some(AttributeWithGroupId { diff --git a/crates/weaver_resolver/src/dependency.rs b/crates/weaver_resolver/src/dependency.rs index 24ead7e59..7c6caee8d 100644 --- a/crates/weaver_resolver/src/dependency.rs +++ b/crates/weaver_resolver/src/dependency.rs @@ -19,9 +19,9 @@ use crate::{attribute::AttributeCatalog, Error}; #[derive(Debug, Deserialize)] pub(crate) enum ResolvedDependency { /// A V1 Dependency - V1(V1Schema), - // A V2 Dependency - V2(V2Schema), + V1(Box), + /// A V2 Dependency + V2(Box), } impl ResolvedDependency { @@ -167,7 +167,7 @@ impl ImportableDependency for V2Schema { for ar in m.attributes.iter() { let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_id: self.registry_id.clone(), + registry_name: self.schema_url.name().to_owned(), attribute_ref: ar.base.0, }, )?; @@ -214,7 +214,7 @@ impl ImportableDependency for V2Schema { for ar in e.attributes.iter() { let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_id: self.registry_id.clone(), + registry_name: self.schema_url.name().to_owned(), attribute_ref: ar.base.0, }, )?; @@ -262,7 +262,7 @@ impl ImportableDependency for V2Schema { // TODO - this should be non-panic errors. let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_id: self.registry_id.clone(), + registry_name: self.schema_url.name().to_owned(), attribute_ref: ar.base.0, }, )?; @@ -276,7 +276,7 @@ impl ImportableDependency for V2Schema { // TODO - this should be non-panic errors. let attr = self.attribute_catalog.attribute(&ar.base).ok_or( Error::InvalidRegistryAttributeRef { - registry_id: self.registry_id.clone(), + registry_name: self.schema_url.name().to_owned(), attribute_ref: ar.base.0, }, )?; @@ -411,13 +411,13 @@ impl UnresolvedAttributeLookup for Vec { impl From for ResolvedDependency { fn from(value: V1Schema) -> Self { - ResolvedDependency::V1(value) + ResolvedDependency::V1(Box::new(value)) } } impl From for ResolvedDependency { fn from(value: V2Schema) -> Self { - ResolvedDependency::V2(value) + ResolvedDependency::V2(Box::new(value)) } } @@ -444,7 +444,7 @@ mod tests { #[test] fn test_lookup_group_attributes() -> Result<(), Box> { - let d = ResolvedDependency::V1(example_v1_schema()); + let d = ResolvedDependency::V1(Box::new(example_v1_schema())); let result = d.lookup_group_attributes("a"); assert!( result.is_some(), @@ -470,8 +470,8 @@ mod tests { fn example_v1_schema() -> V1Schema { V1Schema { file_format: "resolved/1.0.0".to_owned(), - schema_url: "v1-example".to_owned(), - registry_id: "v1-example".to_owned(), + schema_url: "http://test/schemas/1.0.0".to_owned(), + registry_id: "test-registry".to_owned(), registry: weaver_resolved_schema::registry::Registry { registry_url: "v1-example".to_owned(), groups: vec![weaver_resolved_schema::registry::Group { diff --git a/crates/weaver_resolver/src/error.rs b/crates/weaver_resolver/src/error.rs index f2e01eb11..e8d2add5a 100644 --- a/crates/weaver_resolver/src/error.rs +++ b/crates/weaver_resolver/src/error.rs @@ -19,21 +19,26 @@ pub enum Error { FailToResolveDefinition(#[from] weaver_semconv::Error), /// We discovered a circular dependency we cannot resolve. - #[error("Circular dependency detected: registry '{registry_id}' depends on itself through the chain: {chain}")] + #[error("Circular dependency detected: registry '{registry_name}' depends on itself through the chain: {chain}")] CircularDependency { /// The registry that depends on itself. - registry_id: String, + registry_name: String, + /// A string representing the dependency chain. chain: String, }, /// We've reached the maximum dependency depth for this registry. - #[error("Maximum dependency depth reached for registry `{registry}`. Cannot load further dependencies.")] + #[error("Maximum dependency depth reached for registry `{registry_name}`. Cannot load further dependencies.")] MaximumDependencyDepth { /// The registry which has too many dependencies. - registry: String, + registry_name: String, }, + /// Failed to resolve the schema URL for a registry. + #[error("Schema URL is missing in the manifest and cannot be constructed from the registry name and version.")] + FailToResolveSchemaUrl {}, + /// An invalid URL. #[error("Invalid URL `{url:?}`, error: {error:?})")] #[diagnostic(help("Check the URL and try again."))] @@ -168,10 +173,12 @@ pub enum Error { }, /// We - #[error("Invalid registry: {registry_id}. Unable to find attribute by index: {attribute_ref}")] + #[error( + "Invalid registry: {registry_name}. Unable to find attribute by index: {attribute_ref}" + )] InvalidRegistryAttributeRef { /// The registry with the issue. - registry_id: String, + registry_name: String, /// The attribute index that does not exist in the registry. attribute_ref: u32, }, diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index 940dc104a..e83c98ba7 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -3,6 +3,7 @@ #![doc = include_str!("../README.md")] use weaver_semconv::group::ImportsWithProvenance; +use weaver_semconv::schema_url::SchemaUrl; use crate::attribute::AttributeCatalog; use crate::dependency::ResolvedDependency; @@ -50,7 +51,7 @@ impl SchemaResolver { } } - // Actually resolves a defiinition registry. + // Actually resolves a definition registry. fn resolve_registry( repo: RegistryRepo, specs: Vec, @@ -88,8 +89,15 @@ impl SchemaResolver { WResult::FatalErr(e) => return WResult::FatalErr(e), } } - let registry_id: String = repo.id().to_string(); let manifest = repo.manifest().cloned(); + let schema_url = if let Some(m) = manifest.as_ref() { + m.schema_url.clone() + } else { + match SchemaUrl::try_from_name_version(repo.name(), repo.version()) { + Ok(url) => url, + Err(_) => return WResult::FatalErr(Error::FailToResolveSchemaUrl {}), + } + }; let mut attr_catalog = AttributeCatalog::default(); // TODO - Do something with non_fatal_errors if we need to. resolve_registry_with_dependencies( @@ -105,8 +113,8 @@ impl SchemaResolver { ResolvedTelemetrySchema { file_format: "1.0.0".to_owned(), - schema_url: "".to_owned(), - registry_id, + schema_url: schema_url.as_str().to_owned(), + registry_id: schema_url.name().to_owned(), registry: resolved_registry, catalog, resource: None, @@ -235,7 +243,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/custom_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![])?; // test with the `include_unreferenced` flag set to false check_semconv_load_and_resolve(registry_repo.clone(), false); // test with the `include_unreferenced` flag set to true @@ -249,7 +257,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/app_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new("app", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![])?; let result = SchemaResolver::load_semconv_repository(registry_repo, true); match result { @@ -263,19 +271,22 @@ mod tests { ); // Verify we have specs from all three registries - let registry_ids = loaded.registry_ids(); + let registry_names = loaded.registry_names(); assert!( - registry_ids.contains(&"app".to_owned()), - "Missing app registry specs" + registry_names.contains(&"app.com/schemas".to_owned()), + "Missing app registry specs, available registries: {:?}", + registry_names ); assert!( - registry_ids.contains(&"acme".to_owned()), - "Missing acme registry specs" + registry_names.contains(&"acme.com/schemas".to_owned()), + "Missing acme registry specs, available registries: {:?}", + registry_names ); assert!( - registry_ids.contains(&"otel".to_owned()), - "Missing otel registry specs" + registry_names.contains(&"opentelemetry.io/schemas".to_owned()), + "Missing otel registry specs, available registries: {:?}", + registry_names ); // Now test the resolved registry content diff --git a/crates/weaver_resolver/src/loader.rs b/crates/weaver_resolver/src/loader.rs index b00634e54..e92ed9410 100644 --- a/crates/weaver_resolver/src/loader.rs +++ b/crates/weaver_resolver/src/loader.rs @@ -14,7 +14,7 @@ use weaver_common::result::WResult; use weaver_resolved_schema::v2::ResolvedTelemetrySchema as V2Schema; use weaver_resolved_schema::ResolvedTelemetrySchema as V1Schema; use weaver_semconv::json_schema::JsonSchemaValidator; -use weaver_semconv::registry_repo::{RegistryRepo, REGISTRY_MANIFEST}; +use weaver_semconv::registry_repo::{RegistryRepo, LEGACY_REGISTRY_MANIFEST, REGISTRY_MANIFEST}; use weaver_semconv::{group::ImportsWithProvenance, semconv::SemConvSpecWithProvenance}; use crate::Error; @@ -48,10 +48,11 @@ impl LoadedSemconvRegistry { use weaver_common::vdir::VirtualDirectoryPath; use weaver_semconv::provenance::Provenance; let path: VirtualDirectoryPath = "data".try_into().expect("Bad fake path for test"); - let repo = RegistryRepo::try_new("default", &path).map_err(|e| Error::InvalidUrl { - url: "test string".to_owned(), - error: format!("{e}"), - })?; + let repo = + RegistryRepo::try_new(None, &path, &mut vec![]).map_err(|e| Error::InvalidUrl { + url: "test string".to_owned(), + error: format!("{e}"), + })?; let provenance = Provenance::new("default", ""); let spec_with_provenance = SemConvSpecWithProvenance::from_string(provenance, spec) .into_result_failing_non_fatal() @@ -80,7 +81,7 @@ impl LoadedSemconvRegistry { LoadedSemconvRegistry::Unresolved { repo, .. } => repo.registry_path_repr(), // TODO - are these correct? LoadedSemconvRegistry::Resolved(schema) => &schema.schema_url, - LoadedSemconvRegistry::ResolvedV2(schema) => &schema.schema_url, + LoadedSemconvRegistry::ResolvedV2(schema) => schema.schema_url.as_str(), } } @@ -104,19 +105,19 @@ impl LoadedSemconvRegistry { /// Returns all the registry ids in this loaded registry and its dependencies. #[cfg(test)] #[must_use] - pub fn registry_ids(&self) -> Vec { + pub fn registry_names(&self) -> Vec { match self { LoadedSemconvRegistry::Unresolved { repo, dependencies, .. } => { - let mut result = vec![repo.id().to_string()]; + let mut result = vec![repo.name().to_owned()]; for d in dependencies { - result.extend(d.registry_ids()); + result.extend(d.registry_names()); } result } - LoadedSemconvRegistry::Resolved(schema) => vec![schema.registry_id.clone()], - LoadedSemconvRegistry::ResolvedV2(schema) => vec![schema.registry_id.clone()], + LoadedSemconvRegistry::Resolved(schema) => vec![schema.registry_id.to_owned()], + LoadedSemconvRegistry::ResolvedV2(schema) => vec![schema.schema_url.name().to_owned()], } } } @@ -132,11 +133,11 @@ impl Display for LoadedSemconvRegistry { } => write!( f, "{} - [{}]", - repo.id(), + repo.schema_url(), dependencies.iter().map(|d| format!("{d}")).join(",") ), - LoadedSemconvRegistry::Resolved(schema) => write!(f, "{}", schema.registry_id), - LoadedSemconvRegistry::ResolvedV2(schema) => write!(f, "{}", schema.registry_id), + LoadedSemconvRegistry::Resolved(schema) => write!(f, "{}", schema.schema_url), + LoadedSemconvRegistry::ResolvedV2(schema) => write!(f, "{}", schema.schema_url), } } } @@ -173,26 +174,26 @@ fn load_semconv_repository_recursive( // Make sure we don't go past our max dependency depth. if max_dependency_depth == 0 { return WResult::FatalErr(Error::MaximumDependencyDepth { - registry: registry_repo.registry_path_repr().to_owned(), + registry_name: registry_repo.registry_path_repr().to_owned(), }); } - let registry_id = registry_repo.id().to_string(); + let registry_name = registry_repo.name().to_owned(); // Check for circular dependency - if visited_registries.contains(®istry_id) { - dependency_chain.push(registry_id.clone()); + if visited_registries.contains(®istry_name) { + dependency_chain.push(registry_name.clone()); let chain_str = dependency_chain.join(" → "); return WResult::FatalErr(Error::CircularDependency { - registry_id, + registry_name: registry_name.clone(), chain: chain_str, }); } // Add current registry to visited set and dependency chain - let _ = visited_registries.insert(registry_id.clone()); - dependency_chain.push(registry_id.clone()); + let _ = visited_registries.insert(registry_name.clone()); + dependency_chain.push(registry_name.clone()); // Either load a fully resolved repository, or read in raw files. if let Some(manifest) = registry_repo.manifest() { - if let Some(resolved_url) = registry_repo.resolved_schema_url() { + if let Some(resolved_url) = registry_repo.resolved_schema_uri() { load_resolved_repository(&resolved_url) } else { if manifest.dependencies.len() > 1 { @@ -200,10 +201,13 @@ fn load_semconv_repository_recursive( } // Load dependencies. let mut loaded_dependencies = vec![]; - let mut non_fatal_errors = vec![]; + let mut non_fatal_errors: Vec = vec![]; for d in manifest.dependencies.iter() { - match RegistryRepo::try_new(&d.name, &d.registry_path) { + let mut semconv_nfes: Vec = vec![]; + match RegistryRepo::try_new_dependency(d, &mut semconv_nfes) { Ok(d_repo) => { + non_fatal_errors + .extend(semconv_nfes.into_iter().map(Error::FailToResolveDefinition)); // so we need to make sure the dependency chain only include direct dependencies of each other. match load_semconv_repository_recursive( d_repo, @@ -281,6 +285,7 @@ fn load_definition_repository( && (extension == "yaml" || extension == "yml") && file_name != "schema-next.yaml" && file_name != REGISTRY_MANIFEST + && file_name != LEGACY_REGISTRY_MANIFEST } let local_path = registry_repo.path().to_path_buf(); let registry_path_repr = registry_repo.registry_path_repr(); @@ -304,7 +309,7 @@ fn load_definition_repository( // TODO - less confusing way to load semconv specs. vec![SemConvRegistry::semconv_spec_from_file( - ®istry_repo.id(), + registry_repo.name(), entry.path(), &unversioned_validator, &versioned_validator, @@ -397,7 +402,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/custom_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![])?; let mut diag_msgs = DiagnosticMessages::empty(); let loaded = load_semconv_repository(registry_repo, false) .capture_non_fatal_errors(&mut diag_msgs)?; @@ -409,7 +414,7 @@ mod tests { dependencies, } = loaded { - assert_eq!("acme", repo.id().as_ref()); + assert_eq!("acme.com/schemas", repo.name()); assert_eq!(dependencies.len(), 1); assert_eq!(specs.len(), 1); assert_eq!(imports.len(), 1); @@ -420,7 +425,7 @@ mod tests { dependencies, }] = &dependencies.as_slice() { - assert_eq!("otel", repo.id().as_ref()); + assert_eq!("opentelemetry.io/schemas", repo.name()); assert_eq!(dependencies.len(), 0); assert_eq!(specs.len(), 1); assert_eq!(imports.len(), 0); @@ -439,7 +444,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/multi-registry/app_registry".to_owned(), }; - let registry_repo = RegistryRepo::try_new("app", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![])?; // Try with depth limit of 1 - should fail at acme->otel transition let mut visited_registries = HashSet::new(); @@ -474,14 +479,14 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data/circular-registry-test/registry_a".to_owned(), }; - let registry_repo = RegistryRepo::try_new("registry_a", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![])?; let result = load_semconv_repository(registry_repo, true); match result { WResult::FatalErr(fatal) => { let error_msg = fatal.to_string(); assert!( - error_msg.contains("Circular dependency detected") && + error_msg.contains("Circular dependency detected") && error_msg.contains("registry_a") && error_msg.contains("registry_b"), "Expected circular dependency error mentioning both registries, got: {error_msg}" diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index 3b6ae70a3..b2073eb67 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -851,6 +851,7 @@ mod tests { use std::error::Error; use std::fs::OpenOptions; use std::path::PathBuf; + use weaver_semconv::schema_url::SchemaUrl; use glob::glob; use serde::Serialize; @@ -918,12 +919,15 @@ mod tests { let observed_output_dir = PathBuf::from(format!("observed_output/{test_dir}")); std::fs::create_dir_all(observed_output_dir.clone()) .expect("Failed to create observed output directory"); - let registry_id = "default"; + let schema_url: SchemaUrl = "https://default/0.1.0" + .try_into() + .expect("Should be valid schema url"); let location: VirtualDirectoryPath = format!("{test_dir}/registry") .try_into() .expect("Failed to parse file directory"); let loaded = SchemaResolver::load_semconv_repository( - RegistryRepo::try_new(registry_id, &location).expect("Failed to load registry"), + RegistryRepo::try_new(Some(schema_url), &location, &mut vec![]) + .expect("Failed to load registry"), true, ) .ignore(|e| { @@ -950,6 +954,14 @@ mod tests { ) ) }) + .ignore(|e| { + matches!( + e, + crate::Error::FailToResolveDefinition( + weaver_semconv::Error::LegacyRegistryManifest { path: _ } + ) + ) + }) .into_result_failing_non_fatal() .expect("Failed to load semconv specs"); @@ -1104,8 +1116,6 @@ groups: #[test] fn test_api_usage() -> Result<(), Box> { - let registry_id = "local"; - // Load a semantic convention registry from a local directory. // Note: A method is also available to load a registry from a git // repository. @@ -1113,7 +1123,12 @@ groups: let path = VirtualDirectoryPath::LocalFolder { path: "data/registry-test-7-spans/registry".to_owned(), }; - let repo = RegistryRepo::try_new(registry_id, &path)?; + + let schema_url: SchemaUrl = "https://local/registry/1.0.0" + .try_into() + .expect("Should be valid schema url"); + let repo = RegistryRepo::try_new(Some(schema_url), &path, &mut vec![]) + .expect("Failed to load registry"); let loaded = SchemaResolver::load_semconv_repository(repo, true).into_result_failing_non_fatal()?; let resolved_schema = diff --git a/crates/weaver_search/src/lib.rs b/crates/weaver_search/src/lib.rs index 8dcd39eb7..a4b945365 100644 --- a/crates/weaver_search/src/lib.rs +++ b/crates/weaver_search/src/lib.rs @@ -543,7 +543,7 @@ mod tests { fn make_test_registry() -> ForgeResolvedRegistry { ForgeResolvedRegistry { - registry_url: "test".to_owned(), + schema_url: "https://example.com/schemas/1.2.3".try_into().unwrap(), registry: Registry { attributes: vec![ make_attribute("http.request.method", "HTTP request method", "", false), diff --git a/crates/weaver_semconv/Cargo.toml b/crates/weaver_semconv/Cargo.toml index 50cff5ad9..f4996ab66 100644 --- a/crates/weaver_semconv/Cargo.toml +++ b/crates/weaver_semconv/Cargo.toml @@ -29,6 +29,7 @@ regex.workspace = true globset.workspace = true itertools.workspace = true log.workspace = true +url.workspace = true glob = "0.3.3" jsonschema = "0.40.0" # JSON Schema validation used to enhance error messages diff --git a/crates/weaver_semconv/src/lib.rs b/crates/weaver_semconv/src/lib.rs index df0346e0b..dcff8f418 100644 --- a/crates/weaver_semconv/src/lib.rs +++ b/crates/weaver_semconv/src/lib.rs @@ -21,6 +21,7 @@ pub mod manifest; pub mod provenance; pub mod registry; pub mod registry_repo; +pub mod schema_url; pub mod semconv; pub mod stability; pub mod stats; @@ -320,6 +321,24 @@ pub enum Error { error: String, }, + /// This error is raised when a registry manifest is using a legacy file name. + #[diagnostic(severity(Warning))] + #[error("The registry manifest at {path:?} is using a legacy file name. Please rename it to `manifest.yaml`.")] + LegacyRegistryManifest { + /// The path to the registry manifest file. + path: PathBuf, + }, + + /// This error is raised when a registry manifest includes deprecated properties. + #[error("The syntax used in the registry manifest at {path:?} is deprecated. {error}")] + #[diagnostic(severity(Warning))] + DeprecatedSyntaxInRegistryManifest { + /// The path to the registry manifest file. + path: PathBuf, + /// The error that occurred. + error: String, + }, + /// A container for multiple errors. #[error("{:?}", format_errors(.0))] CompoundError(#[related] Vec), diff --git a/crates/weaver_semconv/src/manifest.rs b/crates/weaver_semconv/src/manifest.rs index 3619cd073..53ce3838f 100644 --- a/crates/weaver_semconv/src/manifest.rs +++ b/crates/weaver_semconv/src/manifest.rs @@ -8,29 +8,38 @@ //! In the future, this struct may be extended to include additional information //! such as the registry's owner, maintainers, and dependencies. +use std::vec; + +use crate::registry_repo::LEGACY_REGISTRY_MANIFEST; +use crate::schema_url::SchemaUrl; use crate::stability::Stability; use crate::Error; -use crate::Error::{InvalidRegistryManifest, RegistryManifestNotFound}; +use crate::Error::{ + DeprecatedSyntaxInRegistryManifest, InvalidRegistryManifest, LegacyRegistryManifest, + RegistryManifestNotFound, +}; use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use std::path::PathBuf; -use weaver_common::error::handle_errors; +use serde::{Deserialize, Deserializer, Serialize}; use weaver_common::vdir::VirtualDirectoryPath; /// Represents the information of a semantic convention registry manifest. /// /// This information defines the registry's name, version, description, and schema /// base url. -#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[derive(Serialize, Debug, Clone, JsonSchema)] pub struct RegistryManifest { /// The file format for this registry. /// - /// No value is assumed to be `definition/1.0.0` + /// No value is assumed to be `manifest/2.0.0` #[serde(skip_serializing_if = "Option::is_none", default)] pub file_format: Option, - /// The name of the registry. This name is used to define the package name. - pub name: String, + /// The schema URL for this registry. + /// This URL is populated before registry is published and is used as + /// a unique identifier of the registry. It MUST follow OTel schema URL format, which is: + /// `http[s]://server[:port]/path/`. + /// See for more details. + pub schema_url: SchemaUrl, /// An optional description of the registry. /// @@ -40,14 +49,6 @@ pub struct RegistryManifest { #[serde(skip_serializing_if = "Option::is_none")] pub description: Option, - /// The version of the registry which will be used to define the semconv package version. - #[serde(alias = "semconv_version")] - pub version: String, - - /// The base URL where the registry's schema files are hosted. - #[serde(alias = "schema_base_url")] - pub repository_url: String, - /// List of the registry's dependencies. /// Note: In the current phase, we only support zero or one dependency. /// See this GH issue for more details: @@ -60,27 +61,129 @@ pub struct RegistryManifest { /// The location of the resolved telemetry schema, if available. #[serde(skip_serializing_if = "Option::is_none")] - pub resolved_schema_url: Option, + pub resolved_schema_uri: Option, + + #[serde(skip)] + deserialization_warnings: Vec, } /// Represents a dependency of a semantic convention registry. -#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema)] +#[derive(Serialize, Debug, Clone, JsonSchema)] pub struct Dependency { - /// The name of the dependency. - pub name: String, - /// The path to the dependency. + /// The schema URL for the dependency (required). + /// It must follow OTel schema URL format, which is: `http[s]://server[:port]/path/`. + /// This is not necessarily the URL registry can be accessed at, but it provides + /// a unique identifier for the dependency registry and its version. /// + /// When registry is not published yet, this field should be populated with a placeholder URL, + /// but it must follow the URL format and include a version segment. + /// The actual registry files can be provided in `registry_path` field. + pub schema_url: SchemaUrl, + + /// The path to the dependency (optional). /// This can be either: /// - A manifest of a published registry /// - A directory containing the raw definition. - pub registry_path: VirtualDirectoryPath, + #[serde(skip_serializing_if = "Option::is_none")] + pub registry_path: Option, +} + +impl<'de> Deserialize<'de> for Dependency { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + #[derive(Deserialize)] + struct DependencyHelper { + name: Option, + schema_url: Option, + registry_path: Option, + } + + let helper = DependencyHelper::deserialize(deserializer)?; + + let schema_url = match (helper.schema_url, helper.name) { + (Some(url), _) => url, + (None, Some(name)) => SchemaUrl::try_from_name_version(&name, "unknown") + .map_err(serde::de::Error::custom)?, + (None, None) => { + return Err(serde::de::Error::custom( + "Either 'schema_url' or 'name' must be provided for a dependency", + )) + } + }; + + Ok(Dependency { + schema_url, + registry_path: helper.registry_path, + }) + } +} + +impl<'de> Deserialize<'de> for RegistryManifest { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + #[derive(Deserialize)] + struct RegistryManifestHelper { + file_format: Option, + schema_url: Option, + description: Option, + #[allow(deprecated)] + semconv_version: Option, + #[allow(deprecated)] + schema_base_url: Option, + #[serde(default)] + dependencies: Vec, + #[serde(default)] + stability: Stability, + resolved_schema_uri: Option, + } + + let helper = RegistryManifestHelper::deserialize(deserializer)?; + let mut warnings = vec![]; + + let schema_url = if let Some(url) = helper.schema_url { + url + } else { + // Fall back to deprecated fields + let base_url = helper.schema_base_url.as_ref().ok_or_else(|| { + serde::de::Error::custom( + "Either 'schema_url' or both 'schema_base_url' and 'semconv_version' must be provided", + ) + })?; + + let version = helper.semconv_version.as_ref().ok_or_else(|| { + serde::de::Error::custom( + "Either 'schema_url' or both 'schema_base_url' and 'semconv_version' must be provided", + ) + })?; + + warnings.push("The 'semconv_version' and 'schema_base_url' fields are deprecated in favor of 'schema_url'.".to_owned()); + SchemaUrl::try_from_name_version(base_url, version).map_err(serde::de::Error::custom)? + }; + + Ok(RegistryManifest { + file_format: helper.file_format, + schema_url, + description: helper.description, + dependencies: helper.dependencies, + stability: helper.stability, + resolved_schema_uri: helper.resolved_schema_uri, + deserialization_warnings: warnings, + }) + } } impl RegistryManifest { /// Attempts to load a registry manifest from a file. /// /// The expected file format is YAML. - pub fn try_from_file>(path: P) -> Result { + pub fn try_from_file>( + path: P, + nfes: &mut Vec, + ) -> Result { let manifest_path_buf = path.as_ref().to_path_buf(); if !manifest_path_buf.exists() { @@ -100,49 +203,69 @@ impl RegistryManifest { error: e.to_string(), })?; - manifest.validate(manifest_path_buf.clone())?; - - Ok(manifest) - } - - fn validate(&self, path: PathBuf) -> Result<(), Error> { - let mut errors = vec![]; + // Check if this is a legacy manifest file + let is_legacy = if let Some(file_name) = manifest_path_buf.file_name() { + file_name == LEGACY_REGISTRY_MANIFEST + } else { + false + }; - if self.name.is_empty() { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: "The registry name is required.".to_owned(), + if is_legacy { + nfes.push(LegacyRegistryManifest { + path: manifest_path_buf.clone(), }); } - if self.version.is_empty() { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: "The registry version is required.".to_owned(), - }); - } + nfes.extend(manifest.deserialization_warnings.iter().map(|w| { + DeprecatedSyntaxInRegistryManifest { + path: manifest_path_buf.clone(), + error: w.clone(), + } + })); + Ok(manifest) + } - if self.repository_url.is_empty() { - errors.push(InvalidRegistryManifest { - path: path.clone(), - error: "The registry schema base URL is required.".to_owned(), - }); - } + /// Returns the registry name, which is derived from the schema URL. + /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, + /// the registry name would be `opentelemetry.io/schemas/sub-component` + #[must_use] + pub fn name(&self) -> &str { + self.schema_url.name() + } - handle_errors(errors)?; + /// Returns the registry version, which is derived from the schema URL. + /// For example, if the schema URL is `https://opentelemetry.io/schemas/sub-component/1.0.0`, + /// the registry version would be `1.0.0` + #[must_use] + pub fn version(&self) -> &str { + self.schema_url.version() + } - Ok(()) + /// Creates a new `RegistryManifest` from a schema URL with default values. + #[must_use] + pub fn from_schema_url(schema_url: SchemaUrl) -> Self { + Self { + file_format: None, + schema_url, + description: None, + dependencies: vec![], + resolved_schema_uri: None, + stability: Stability::Development, + deserialization_warnings: vec![], + } } } #[cfg(test)] mod tests { + use std::path::PathBuf; + use super::*; - use crate::Error::CompoundError; #[test] fn test_not_found_registry_info() { - let result = RegistryManifest::try_from_file("tests/test_data/missing_registry.yaml"); + let result = + RegistryManifest::try_from_file("tests/test_data/missing_registry.yaml", &mut vec![]); assert!( matches!(result, Err(RegistryManifestNotFound { path, .. }) if path.ends_with("missing_registry.yaml")) ); @@ -152,6 +275,7 @@ mod tests { fn test_incomplete_registry_info() { let result = RegistryManifest::try_from_file( "tests/test_data/incomplete_semconv_registry_manifest.yaml", + &mut vec![], ); assert!( matches!(result, Err(InvalidRegistryManifest { path, .. }) if path.ends_with("incomplete_semconv_registry_manifest.yaml")) @@ -160,35 +284,27 @@ mod tests { #[test] fn test_valid_registry_info() { - let config = - RegistryManifest::try_from_file("tests/test_data/valid_semconv_registry_manifest.yaml") - .expect("Failed to load the registry configuration file."); - assert_eq!(config.name, "vendor_acme"); - assert_eq!(config.version, "0.1.0"); - assert_eq!(config.repository_url, "https://acme.com/schemas/"); + let config = RegistryManifest::try_from_file( + "tests/test_data/valid_semconv_registry_manifest.yaml", + &mut vec![], + ) + .expect("Failed to load the registry configuration file."); + assert_eq!(config.name(), "acme.com/schemas"); + assert_eq!(config.version(), "0.1.0"); } #[test] fn test_invalid_registry_info() { let result = RegistryManifest::try_from_file( "tests/test_data/invalid_semconv_registry_manifest.yaml", + &mut vec![], ); let path = PathBuf::from("tests/test_data/invalid_semconv_registry_manifest.yaml"); - let expected_errs = CompoundError(vec![ - InvalidRegistryManifest { - path: path.clone(), - error: "The registry name is required.".to_owned(), - }, - InvalidRegistryManifest { - path: path.clone(), - error: "The registry version is required.".to_owned(), - }, - InvalidRegistryManifest { - path: path.clone(), - error: "The registry schema base URL is required.".to_owned(), - }, - ]); + let expected_errs = InvalidRegistryManifest { + path: path.clone(), + error: "Registry name and version cannot be empty.".to_owned(), + }; if let Err(observed_errs) = result { assert_eq!(observed_errs, expected_errs); @@ -196,4 +312,166 @@ mod tests { panic!("Expected an error, but got a result."); } } + + // Dependency tests + #[test] + fn test_dependency_deserialize_with_schema_url() { + let yaml = r#" +schema_url: "https://opentelemetry.io/schemas/1.0.0" +"#; + let dep: Dependency = serde_yaml::from_str(yaml).expect("Failed to deserialize"); + assert_eq!( + dep.schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + assert!(dep.registry_path.is_none()); + } + + #[test] + fn test_dependency_deserialize_with_registry_path() { + let yaml = r#" +schema_url: "https://opentelemetry.io/schemas/1.0.0" +registry_path: "./registry" +"#; + let dep: Dependency = serde_yaml::from_str(yaml).expect("Failed to deserialize"); + assert_eq!( + dep.schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + assert!(dep.registry_path.is_some()); + } + + #[test] + fn test_dependency_deserialize_with_deprecated_name() { + let yaml = r#" +name: "acme-registry" +"#; + let dep: Dependency = serde_yaml::from_str(yaml).expect("Failed to deserialize"); + assert_eq!(dep.schema_url.as_str(), "https://acme-registry/unknown"); + } + + #[test] + fn test_dependency_deserialize_schema_url_takes_precedence() { + let yaml = r#" +schema_url: "https://opentelemetry.io/schemas/1.0.0" +name: "ignored-name" +"#; + let dep: Dependency = serde_yaml::from_str(yaml).expect("Failed to deserialize"); + assert_eq!( + dep.schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + } + + #[test] + fn test_dependency_deserialize_missing_both_fields() { + let yaml = r#" +registry_path: "./registry" +"#; + let result: Result = serde_yaml::from_str(yaml); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err + .to_string() + .contains("Either 'schema_url' or 'name' must be provided")); + } + + #[test] + fn test_dependency_serialize() { + let dep = Dependency { + schema_url: "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(), + registry_path: None, + }; + + let yaml = serde_yaml::to_string(&dep).expect("Failed to serialize"); + // Verify schema_url is serialized + assert!(yaml.contains("schema_url")); + assert!(yaml.contains("https://opentelemetry.io/schemas/1.0.0")); + // Verify name is NOT serialized (skip_serializing) + assert!(!yaml.contains("name:")); + } + + #[test] + fn test_dependency_serialize_with_registry_path() { + let dep = Dependency { + schema_url: "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(), + registry_path: Some(VirtualDirectoryPath::LocalFolder { + path: "./registry".to_owned(), + }), + }; + + let yaml = serde_yaml::to_string(&dep).expect("Failed to serialize"); + assert!(yaml.contains("schema_url")); + assert!(yaml.contains("registry_path")); + } + + #[test] + fn test_dependency_serialize_without_optional_path() { + let dep = Dependency { + schema_url: "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(), + registry_path: None, + }; + + let yaml = serde_yaml::to_string(&dep).expect("Failed to serialize"); + // registry_path should not be serialized when None (skip_serializing_if) + assert!(!yaml.contains("registry_path")); + } + + #[test] + fn test_dependency_roundtrip_serialization() { + let original = Dependency { + schema_url: "https://example.com/schemas/1.0.0".try_into().unwrap(), + registry_path: Some(VirtualDirectoryPath::LocalFolder { + path: "./test/registry".to_owned(), + }), + }; + + let yaml = serde_yaml::to_string(&original).expect("Failed to serialize"); + let deserialized: Dependency = serde_yaml::from_str(&yaml).expect("Failed to deserialize"); + + assert_eq!(original.schema_url, deserialized.schema_url); + assert!(deserialized.registry_path.is_some()); + } + + #[test] + fn test_legacy_manifest_file_warning() { + // Test that loading from a legacy manifest filename (registry_manifest.yaml) produces a warning + let mut warnings = vec![]; + let result = RegistryManifest::try_from_file( + "tests/test_data/registry_manifest.yaml", + &mut warnings, + ); + + assert!(result.is_ok()); + assert!( + warnings + .iter() + .any(|w| matches!(w, LegacyRegistryManifest { .. })), + "Expected a LegacyRegistryManifest warning, got: {warnings:?}" + ); + } + + #[test] + fn test_deprecated_properties_warning() { + // Test that using deprecated properties (semconv_version and schema_base_url) produces a warning + let mut warnings = vec![]; + let result = RegistryManifest::try_from_file( + "tests/test_data/valid_semconv_registry_manifest.yaml", + &mut warnings, + ); + + assert!(result.is_ok()); + let manifest = result.unwrap(); + // The manifest should still work and extract the correct values + assert_eq!(manifest.name(), "acme.com/schemas"); + assert_eq!(manifest.version(), "0.1.0"); + + // But it should produce a deprecation warning + assert!( + warnings + .iter() + .any(|w| matches!(w, DeprecatedSyntaxInRegistryManifest { .. })), + "Expected a DeprecatedSyntaxInRegistryManifest warning, got: {warnings:?}" + ); + } } diff --git a/crates/weaver_semconv/src/registry.rs b/crates/weaver_semconv/src/registry.rs index 3ee47d960..5f3581a05 100644 --- a/crates/weaver_semconv/src/registry.rs +++ b/crates/weaver_semconv/src/registry.rs @@ -8,6 +8,7 @@ use crate::json_schema::JsonSchemaValidator; use crate::manifest::RegistryManifest; use crate::provenance::Provenance; use crate::registry_repo::RegistryRepo; +use crate::schema_url::SchemaUrl; use crate::semconv::{SemConvSpecV1WithProvenance, SemConvSpecWithProvenance}; use crate::stats::Stats; use crate::Error; @@ -125,7 +126,7 @@ impl SemConvRegistry { LazyLock::new(|| Regex::new(r".*(v\d+\.\d+\.\d+).*").expect("Invalid regex")); // Load all the semantic convention registry. - let mut registry = SemConvRegistry::new(registry_repo.id().as_ref()); + let mut registry = SemConvRegistry::new(registry_repo.name()); for spec in semconv_specs { registry.add_semconv_spec(spec); @@ -143,16 +144,15 @@ impl SemConvRegistry { } } - registry.set_manifest(RegistryManifest { - file_format: None, - name: registry_repo.id().as_ref().to_owned(), - description: None, - version: semconv_version, - repository_url: "".to_owned(), - dependencies: vec![], - resolved_schema_url: None, - stability: crate::stability::Stability::Development, - }); + let schema_url = + SchemaUrl::try_from_name_version(registry_repo.name(), &semconv_version).map_err( + |e| Error::InvalidRegistryManifest { + path: registry_repo.registry_path_repr().into(), + error: e.clone(), + }, + )?; + + registry.set_manifest(RegistryManifest::from_schema_url(schema_url)); } else { registry.manifest = registry_repo.manifest().cloned(); } @@ -386,7 +386,16 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "data".to_owned(), }; - let registry_repo = RegistryRepo::try_new("test", ®istry_path).unwrap(); + let registry_repo = RegistryRepo::try_new( + Some( + "https://test/42" + .try_into() + .expect("Should be valid schema url"), + ), + ®istry_path, + &mut vec![], + ) + .unwrap(); let registry = SemConvRegistry::from_semconv_specs(®istry_repo, semconv_specs).unwrap(); assert_eq!(registry.id(), "test"); assert_eq!(registry.semconv_spec_count(), 2); diff --git a/crates/weaver_semconv/src/registry_repo.rs b/crates/weaver_semconv/src/registry_repo.rs index 22c7c0eb0..df636e36f 100644 --- a/crates/weaver_semconv/src/registry_repo.rs +++ b/crates/weaver_semconv/src/registry_repo.rs @@ -4,15 +4,52 @@ use std::default::Default; use std::path::{Path, PathBuf}; -use std::sync::Arc; -use crate::manifest::RegistryManifest; +use crate::manifest::{Dependency, RegistryManifest}; +use crate::schema_url::SchemaUrl; use crate::Error; use weaver_common::vdir::{VirtualDirectory, VirtualDirectoryPath}; use weaver_common::{get_path_type, log_info}; +/// The name of the legacy registry manifest file. +#[deprecated(note = "The registry manifest file is renamed to `manifest.yaml`.")] +pub const LEGACY_REGISTRY_MANIFEST: &str = "registry_manifest.yaml"; + /// The name of the registry manifest file. -pub const REGISTRY_MANIFEST: &str = "registry_manifest.yaml"; +pub const REGISTRY_MANIFEST: &str = "manifest.yaml"; + +/// Finds the path to the manifest file, could be +/// - directly the path to the manifest file, or +/// - either `manifest.yaml` or `registry_manifest.yaml` in the given directory. +/// - None otherwise. +fn find_manifest_path(registry_path: &Path) -> Option { + // First check to see if we're pointing at a manifest. + if registry_path.is_file() { + // The path *is* the manifest. + return Some(registry_path.to_path_buf()); + } + let manifest_path = registry_path.join(REGISTRY_MANIFEST); + let legacy_path = registry_path.join(LEGACY_REGISTRY_MANIFEST); + if manifest_path.exists() { + log_info(format!( + "Found registry manifest: {}", + manifest_path.display() + )); + Some(manifest_path) + } else if legacy_path.exists() { + log_info(format!( + "Found registry manifest: {}", + legacy_path.display() + )); + Some(legacy_path) + } else { + log_info(format!( + "No registry manifest found: {}", + manifest_path.display() + )); + None + } +} /// A semantic convention registry repository that can be: /// - A definition repository, which is one of: @@ -21,43 +58,81 @@ pub const REGISTRY_MANIFEST: &str = "registry_manifest.yaml"; /// - Initialized from a Git archive /// - A published repository, which is a manifest file /// that denotes where to find aspects of the registry. -#[derive(Default, Debug, Clone)] +#[derive(Debug, Clone)] pub struct RegistryRepo { - // A unique identifier for the registry (e.g. main, baseline, etc.) - id: Arc, + /// The schema URL associated with the registry + /// May be derived from the manifest or the registry name and version if the manifest is not present. + schema_url: SchemaUrl, // A virtual directory containing the registry. registry: VirtualDirectory, // The registry manifest definition. manifest: Option, + + // Cached path to the manifest file (if it exists). + manifest_path: Option, } impl RegistryRepo { - /// Creates a new `RegistryRepo` from a `RegistryPath` object that + /// Creates a new `RegistryRepo` from a `Dependency` object that specifies the schema URL and path. + pub fn try_new_dependency( + dependency: &Dependency, + nfes: &mut Vec, + ) -> Result { + let path = dependency.registry_path.clone().unwrap_or_else(|| { + // If no registry path is provided, we assume it's the same schema_url. + VirtualDirectoryPath::RemoteArchive { + url: dependency.schema_url.to_string(), + sub_folder: None, + } + }); + Self::try_new(Some(dependency.schema_url.clone()), &path, nfes) + } + + /// Creates a new `RegistryRepo` from a schema URL and `RegistryPath` object that /// specifies the location of the registry. + /// If there is no manifest and schema URL is not provided, registry + /// name and version are set to "unknown". pub fn try_new( - registry_id_if_no_manifest: &str, + schema_url: Option, registry_path: &VirtualDirectoryPath, + nfes: &mut Vec, ) -> Result { - let mut registry_repo = Self { - id: Arc::from(registry_id_if_no_manifest), - registry: VirtualDirectory::try_new(registry_path) - .map_err(Error::VirtualDirectoryError)?, - manifest: None, - }; - if let Some(manifest) = registry_repo.manifest_path() { - let registry_manifest = RegistryManifest::try_from_file(manifest)?; - registry_repo.id = Arc::from(registry_manifest.name.as_str()); - registry_repo.manifest = Some(registry_manifest); + let registry = + VirtualDirectory::try_new(registry_path).map_err(Error::VirtualDirectoryError)?; + // Try to load manifest + let manifest_path = find_manifest_path(registry.path()); + if let Some(ref path) = manifest_path { + let registry_manifest = RegistryManifest::try_from_file(path, nfes)?; + Ok(Self { + schema_url: registry_manifest.schema_url.clone(), + registry, + manifest: Some(registry_manifest), + manifest_path, + }) + } else { + // No manifest + let schema_url_combined = schema_url.unwrap_or_else(SchemaUrl::new_unknown); + Ok(Self { + schema_url: schema_url_combined.clone(), + registry, + manifest: None, + manifest_path: None, + }) } - Ok(registry_repo) } - /// Returns the unique identifier for the registry. + /// Returns the registry name (from manifest if present, otherwise top-level field). #[must_use] - pub fn id(&self) -> Arc { - self.id.clone() + pub fn name(&self) -> &str { + self.schema_url.name() + } + + /// Returns the registry version (from manifest if present, otherwise top-level field). + #[must_use] + pub fn version(&self) -> &str { + self.schema_url.version() } /// Returns the local path to the semconv registry. @@ -78,51 +153,46 @@ impl RegistryRepo { self.manifest.as_ref() } - /// Returns the resolved schema URL, if available in the manifest. + /// Returns the resolved schema URI, if available in the manifest. #[must_use] - pub fn resolved_schema_url(&self) -> Option { + pub fn resolved_schema_uri(&self) -> Option { let manifest = self.manifest.as_ref()?; - let resolved_url: &str = manifest.resolved_schema_url.as_ref()?; - match get_path_type(resolved_url) { + let resolved_uri: &str = manifest.resolved_schema_uri.as_ref()?; + match get_path_type(resolved_uri) { weaver_common::PathType::RelativePath => { - // We need to understand if the manifest URL is the same as the registry URL. - let vdir_was_manifest_file = self.manifest_path()? == self.registry.path(); + // We need to understand if the manifest URI is the same as the registry URI. + let vdir_was_manifest_file = self + .manifest_path + .clone() + .is_some_and(|mp| mp == self.registry.path()); Some(self.registry.vdir_path().map_sub_folder(|path| { if vdir_was_manifest_file { match Path::new(&path).parent() { - Some(parent) => format!("{}/{resolved_url}", parent.display()), + Some(parent) => format!("{}/{resolved_uri}", parent.display()), None => "".to_owned(), } } else { - format!("{path}/{resolved_url}") + format!("{path}/{resolved_uri}") } })) } - _ => resolved_url.try_into().ok(), + _ => resolved_uri.try_into().ok(), } } - /// Returns the path to the `registry_manifest.yaml` file (if any). - #[must_use] - pub fn manifest_path(&self) -> Option { - // First check to see if we're pointing at a manifest. - if self.registry.path().is_file() { - // The VirtualDirectory *is* the registry. - return Some(self.registry.path().to_path_buf()); - } - let manifest_path = self.registry.path().join(REGISTRY_MANIFEST); - if manifest_path.exists() { - log_info(format!( - "Found registry manifest: {}", - manifest_path.display() - )); - Some(manifest_path) - } else { - log_info(format!( - "No registry manifest found: {}", - manifest_path.display() - )); - None + /// Returns the registry schema URL. + pub fn schema_url(&self) -> &SchemaUrl { + &self.schema_url + } +} + +impl Default for RegistryRepo { + fn default() -> Self { + Self { + schema_url: SchemaUrl::new_unknown(), + registry: VirtualDirectory::default(), + manifest: None, + manifest_path: None, } } } @@ -147,7 +217,7 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "../../crates/weaver_codegen_test/semconv_registry".to_owned(), }; - let repo = RegistryRepo::try_new("main", ®istry_path).unwrap(); + let repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![]).unwrap(); let repo_path = repo.path().to_path_buf(); assert!(repo_path.exists()); assert!( @@ -166,14 +236,16 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/1.0.0".to_owned(), }; - let repo = - RegistryRepo::try_new("main", ®istry_path).expect("Failed to load test repository."); + + let repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![]) + .expect("Failed to load test repository."); + let Some(manifest) = repo.manifest() else { panic!("Did not resolve manifest for repo: {repo:?}"); }; - assert_eq!(manifest.name, "resolved"); + assert_eq!(manifest.name(), "resolved"); - let Some(resolved_path) = repo.resolved_schema_url() else { + let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", repo.registry_path_repr() @@ -188,9 +260,9 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/resolved/2.0.0".to_owned(), }; - let repo = - RegistryRepo::try_new("main", ®istry_path).expect("Failed to load test repository."); - let Some(resolved_path) = repo.resolved_schema_url() else { + let repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![]) + .expect("Failed to load test repository."); + let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", repo.registry_path_repr() @@ -202,9 +274,9 @@ mod tests { let registry_path = VirtualDirectoryPath::LocalFolder { path: "tests/published_repository/3.0.0".to_owned(), }; - let repo = - RegistryRepo::try_new("main", ®istry_path).expect("Failed to load test repository."); - let Some(resolved_path) = repo.resolved_schema_url() else { + let repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![]) + .expect("Failed to load test repository."); + let Some(resolved_path) = repo.resolved_schema_uri() else { panic!( "Should find a resolved schema path from manifest in {}", repo.registry_path_repr() diff --git a/crates/weaver_semconv/src/schema_url.rs b/crates/weaver_semconv/src/schema_url.rs new file mode 100644 index 000000000..c5ce38910 --- /dev/null +++ b/crates/weaver_semconv/src/schema_url.rs @@ -0,0 +1,413 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Schema URL type for uniquely identifying semantic convention registries. + +use schemars::JsonSchema; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::sync::OnceLock; + +/// Represents the schema URL of a registry, which serves as a unique identifier for the registry +/// along with its version. +#[derive(Debug, Clone, JsonSchema)] +pub struct SchemaUrl { + /// The schema URL string. + url: String, + #[serde(skip)] + #[schemars(skip)] + name: OnceLock, + #[serde(skip)] + #[schemars(skip)] + version: OnceLock, +} + +impl SchemaUrl { + /// Create a new SchemaUrl from a string. + #[must_use] + fn new(url: String) -> Self { + Self { + url, + name: OnceLock::new(), + version: OnceLock::new(), + } + } + + /// Get the URL as a string. + pub fn as_str(&self) -> &str { + &self.url + } + + /// Validate the schema URL format. + pub fn validate(&self) -> Result<(), String> { + let parsed = url::Url::parse(&self.url).map_err(|e| format!("Invalid schema URL: {e}"))?; + let has_path = parsed + .path_segments() + .map(|segments| segments.filter(|s| !s.is_empty()).count() > 0) + .unwrap_or(false); + + if !has_path { + return Err("The schema URL must have at least one path segment.".to_owned()); + } + Ok(()) + } + + /// Returns the registry name, derived from the schema URL. + #[must_use] + pub fn name(&self) -> &str { + self.name.get_or_init(|| { + let parsed_url = url::Url::parse(&self.url).expect("schema_url must be valid"); + let path = parsed_url.path().trim_matches('/'); + let mut segments: Vec<&str> = path.split('/').collect(); + if !segments.is_empty() { + _ = segments.pop(); + } + + // Construct authority from host and port (replaces deprecated authority() method) + let authority = match (parsed_url.host_str(), parsed_url.port()) { + (Some(host), Some(port)) => format!("{}:{}", host, port), + (Some(host), None) => host.to_owned(), + _ => String::new(), + }; + + if segments.is_empty() { + return authority; + } + + format!("{}/{}", authority, segments.join("/")) + }) + } + + /// Returns the registry version, derived from the schema URL. + #[must_use] + pub fn version(&self) -> &str { + self.version.get_or_init(|| { + let parsed_url = url::Url::parse(&self.url).expect("schema_url must be valid"); + parsed_url + .path() + .trim_matches('/') + .rsplit('/') + .next() + .unwrap_or("") + .to_owned() + }) + } + + /// Create a SchemaUrl from name and version. + pub fn try_from_name_version(name: &str, version: &str) -> Result { + if name.trim().is_empty() || version.trim().is_empty() { + return Err("Registry name and version cannot be empty.".to_owned()); + } + // TODO: replace with scheme regex + + if name.starts_with("http://") || name.starts_with("https://") { + format!("{}/{}", name.trim_end_matches('/'), version).try_into() + } else { + format!("https://{}/{}", name.trim_end_matches('/'), version).try_into() + } + } + + /// Returns a default unknown schema URL. + #[must_use] + pub fn new_unknown() -> Self { + Self::new("https://unknown/unknown".to_owned()) + } +} + +impl PartialEq for SchemaUrl { + fn eq(&self, other: &Self) -> bool { + self.url == other.url + } +} + +impl Eq for SchemaUrl {} + +impl std::hash::Hash for SchemaUrl { + fn hash(&self, state: &mut H) { + self.url.hash(state); + } +} + +impl std::fmt::Display for SchemaUrl { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.url) + } +} + +impl<'de> Deserialize<'de> for SchemaUrl { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let s = String::deserialize(deserializer)?; + let schema: SchemaUrl = s.try_into().map_err(serde::de::Error::custom)?; + Ok(schema) + } +} + +impl Serialize for SchemaUrl { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.serialize_str(&self.url) + } +} + +impl TryFrom<&str> for SchemaUrl { + type Error = String; + + fn try_from(value: &str) -> Result { + let schema_url = Self::new(value.to_owned()); + schema_url.validate()?; + Ok(schema_url) + } +} + +impl TryFrom for SchemaUrl { + type Error = String; + + fn try_from(value: String) -> Result { + let schema_url = Self::new(value); + schema_url.validate()?; + Ok(schema_url) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_new_and_as_str() { + let url = "https://opentelemetry.io/schemas/1.0.0"; + let schema_url: SchemaUrl = url.try_into().unwrap(); + assert_eq!(schema_url.as_str(), url); + } + + #[test] + fn test_validate_invalid_url_syntax() { + let result: Result = "not a valid url".try_into(); + assert!(result.is_err()); + } + + #[test] + fn test_validate_url_without_path() { + let result = TryInto::::try_into("https://opentelemetry.io"); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("at least one path segment")); + } + + #[test] + fn test_try_new_valid_url() { + let result = TryInto::::try_into("https://opentelemetry.io/schemas/1.0.0"); + assert!(result.is_ok()); + let schema_url = result.unwrap(); + assert_eq!( + schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + } + + #[test] + fn test_name_extraction_simple() { + let schema_url: SchemaUrl = + TryInto::::try_into("https://opentelemetry.io/schemas/1.0.0").unwrap(); + assert_eq!(schema_url.name(), "opentelemetry.io/schemas"); + } + + #[test] + fn test_name_extraction_nested_path() { + let schema_url: SchemaUrl = + TryInto::::try_into("https://opentelemetry.io/schemas/sub-component/1.0.0") + .unwrap(); + assert_eq!(schema_url.name(), "opentelemetry.io/schemas/sub-component"); + } + + #[test] + fn test_name_extraction_single_segment() { + let schema_url: SchemaUrl = "https://opentelemetry.io/1.0.0".try_into().unwrap(); + assert_eq!(schema_url.name(), "opentelemetry.io"); + } + + #[test] + fn test_name_extraction_with_port() { + let schema_url: SchemaUrl = "https://example.com:8080/schemas/1.0.0".try_into().unwrap(); + assert_eq!(schema_url.name(), "example.com:8080/schemas"); + } + + #[test] + fn test_version_extraction_simple() { + let schema_url: SchemaUrl = "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); + assert_eq!(schema_url.version(), "1.0.0"); + } + + #[test] + fn test_version_extraction_semantic_version() { + let schema_url: SchemaUrl = "https://example.com/schemas/1.2.3".try_into().unwrap(); + assert_eq!(schema_url.version(), "1.2.3"); + } + + #[test] + fn test_version_extraction_single_segment() { + let schema_url: SchemaUrl = "https://example.com/v1".try_into().unwrap(); + assert_eq!(schema_url.version(), "v1"); + } + + #[test] + fn test_try_from_name_version_with_https() { + let result = SchemaUrl::try_from_name_version("https://opentelemetry.io/schemas", "1.0.0"); + assert!(result.is_ok()); + let schema_url = result.unwrap(); + assert_eq!( + schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + } + + #[test] + fn test_try_from_name_version_without_scheme() { + let result = SchemaUrl::try_from_name_version("opentelemetry.io/schemas", "1.0.0"); + assert!(result.is_ok()); + let schema_url = result.unwrap(); + assert_eq!( + schema_url.as_str(), + "https://opentelemetry.io/schemas/1.0.0" + ); + } + + #[test] + fn test_try_from_name_version_with_http() { + let result = SchemaUrl::try_from_name_version("http://example.com/schemas", "1.0.0"); + assert!(result.is_ok()); + let schema_url = result.unwrap(); + assert_eq!(schema_url.as_str(), "http://example.com/schemas/1.0.0"); + } + + #[test] + fn test_try_from_name_version_with_trailing_slash() { + let result = SchemaUrl::try_from_name_version("https://example.com/schemas/", "1.0.0"); + assert!(result.is_ok()); + let schema_url = result.unwrap(); + assert_eq!(schema_url.as_str(), "https://example.com/schemas/1.0.0"); + } + + #[test] + fn test_equality() { + let url1: SchemaUrl = "https://example.com/schemas/1.0.0".try_into().unwrap(); + let url2: SchemaUrl = "https://example.com/schemas/1.0.0".try_into().unwrap(); + let url3: SchemaUrl = "https://example.com/schemas/2.0.0".try_into().unwrap(); + + assert_eq!(url1, url2); + assert_ne!(url1, url3); + } + + #[test] + fn test_hash() { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let url1: SchemaUrl = "https://example.com/schemas/1.0.0".try_into().unwrap(); + let url2: SchemaUrl = "https://example.com/schemas/1.0.0".try_into().unwrap(); + + let mut hasher1 = DefaultHasher::new(); + url1.hash(&mut hasher1); + let hash1 = hasher1.finish(); + + let mut hasher2 = DefaultHasher::new(); + url2.hash(&mut hasher2); + let hash2 = hasher2.finish(); + + assert_eq!(hash1, hash2); + } + + #[test] + fn test_display() { + let schema_url: SchemaUrl = "https://example.com/schemas/1.0.0".try_into().unwrap(); + assert_eq!( + format!("{}", schema_url), + "https://example.com/schemas/1.0.0" + ); + } + + #[test] + fn test_serialize() { + let schema_url: SchemaUrl = "https://example.com/schemas/1.0.0".try_into().unwrap(); + let json = serde_json::to_string(&schema_url).unwrap(); + assert_eq!(json, "\"https://example.com/schemas/1.0.0\""); + } + + #[test] + fn test_deserialize() { + let json = "\"https://example.com/schemas/1.0.0\""; + let schema_url: SchemaUrl = serde_json::from_str(json).unwrap(); + assert_eq!(schema_url.as_str(), "https://example.com/schemas/1.0.0"); + } + + #[test] + fn test_deserialize_invalid_url() { + let json = "\"not a valid url\""; + let result: Result = serde_json::from_str(json); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("Invalid schema URL")); + } + + #[test] + fn test_deserialize_url_without_path() { + let json = "\"https://example.com\""; + let result: Result = serde_json::from_str(json); + assert!(result.is_err()); + let err = result.unwrap_err(); + assert!(err.to_string().contains("at least one path segment")); + } + + #[test] + fn test_serialize_deserialize_roundtrip() { + let original: SchemaUrl = "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); + let json = serde_json::to_string(&original).unwrap(); + let deserialized: SchemaUrl = serde_json::from_str(&json).unwrap(); + assert_eq!(original, deserialized); + } + + #[test] + fn test_name_caching() { + let schema_url: SchemaUrl = "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); + + // Call name() twice and verify they return the same reference + let name1 = schema_url.name(); + let name2 = schema_url.name(); + + assert_eq!(name1, name2); + assert_eq!(name1, "opentelemetry.io/schemas"); + + // Verify we're getting the same pointer (cached value) + assert_eq!(name1.as_ptr(), name2.as_ptr()); + } + + #[test] + fn test_version_caching() { + let schema_url: SchemaUrl = "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); + + // Call version() twice and verify they return the same reference + let version1 = schema_url.version(); + let version2 = schema_url.version(); + + assert_eq!(version1, version2); + assert_eq!(version1, "1.0.0"); + + // Verify we're getting the same pointer (cached value) + assert_eq!(version1.as_ptr(), version2.as_ptr()); + } + + #[test] + fn test_clone_preserves_url_but_resets_cache() { + let original: SchemaUrl = "https://opentelemetry.io/schemas/1.0.0".try_into().unwrap(); + + // Access name to populate cache + let _ = original.name(); + + // Clone should have the same URL but empty cache + let cloned = original.clone(); + assert_eq!(original.as_str(), cloned.as_str()); + assert_eq!(original.name(), cloned.name()); + } +} diff --git a/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml b/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml index eb2ca0198..0c752664c 100644 --- a/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml +++ b/crates/weaver_semconv/tests/published_repository/3.0.0/registry_manifest.yaml @@ -1,7 +1,6 @@ file_format: manifest/2.0.0 -name: resolved description: Test repository that has been resolved. -version: 3.0.0 +schema_url: http://resolved/3.0.0 repository_url: https://github.com/open-telemetry/weaver.git stability: stable -resolved_schema_url: resolved_schema.yaml +resolved_schema_uri: resolved_schema.yaml diff --git a/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 b/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 index 1dc1d84e7..b65691d98 100644 --- a/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 +++ b/crates/weaver_semconv/tests/published_repository/resolved/1.0.0 @@ -1,7 +1,6 @@ file_format: manifest/2.0.0 -name: resolved description: Test repository that has been resolved. -version: 1.0.0 +schema_url: http://resolved/1.0.0 repository_url: https://github.com/open-telemetry/weaver.git stability: stable -resolved_schema_url: resolved_1.0.0.yaml +resolved_schema_uri: resolved_1.0.0.yaml diff --git a/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 b/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 index 681fa6400..d8bf526e8 100644 --- a/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 +++ b/crates/weaver_semconv/tests/published_repository/resolved/2.0.0 @@ -1,7 +1,6 @@ file_format: manifest/2.0.0 -name: resolved description: Test repository that has been resolved. -version: 2.0.0 +schema_url: http://resolved/2.0.0 repository_url: https://github.com/open-telemetry/weaver.git stability: stable -resolved_schema_url: https://github.com/open-telemetry/weaver.git\creates/weaver_semconv/tests/published_respository/resolved/resolved_2.0.0 +resolved_schema_uri: https://github.com/open-telemetry/weaver.git\creates/weaver_semconv/tests/published_respository/resolved/resolved_2.0.0 diff --git a/crates/weaver_semconv/tests/test_data/registry_manifest.yaml b/crates/weaver_semconv/tests/test_data/registry_manifest.yaml new file mode 100644 index 000000000..9ccd8d297 --- /dev/null +++ b/crates/weaver_semconv/tests/test_data/registry_manifest.yaml @@ -0,0 +1,2 @@ +schema_url: "https://acme.com/schemas/1.0.0" +description: This is a legacy manifest file that uses the old filename. diff --git a/crates/weaver_semconv_gen/data_v2/templates.md b/crates/weaver_semconv_gen/data_v2/templates.md index 28b87f7eb..1d9e54184 100644 --- a/crates/weaver_semconv_gen/data_v2/templates.md +++ b/crates/weaver_semconv_gen/data_v2/templates.md @@ -22,8 +22,8 @@ test.common Custom Snippet Name - -todo/1.0.0 + +https://todo/1.0.0 diff --git a/crates/weaver_semconv_gen/src/v1.rs b/crates/weaver_semconv_gen/src/v1.rs index 3a686f77b..3329da7b7 100644 --- a/crates/weaver_semconv_gen/src/v1.rs +++ b/crates/weaver_semconv_gen/src/v1.rs @@ -197,7 +197,7 @@ mod tests { path: "data".to_owned(), }; let mut diag_msgs = DiagnosticMessages::empty(); - let registry_repo = RegistryRepo::try_new("main", ®istry_path)?; + let registry_repo = RegistryRepo::try_new(None, ®istry_path, &mut vec![])?; let generator = SnippetGenerator::try_from_registry_repo( ®istry_repo, output, diff --git a/crates/weaver_semconv_gen/src/v2.rs b/crates/weaver_semconv_gen/src/v2.rs index bf6a55aa4..e3cd224f9 100644 --- a/crates/weaver_semconv_gen/src/v2.rs +++ b/crates/weaver_semconv_gen/src/v2.rs @@ -450,8 +450,7 @@ mod tests { fn test_registry() -> ResolvedTelemetrySchema { ResolvedTelemetrySchema { file_format: "resolved/2.0.0".to_owned(), - schema_url: "todo/1.0.0".to_owned(), - registry_id: "main".to_owned(), + schema_url: "https://todo/1.0.0".try_into().unwrap(), attribute_catalog: vec![Attribute { key: "attr1".to_owned(), r#type: AttributeType::PrimitiveOrArray(PrimitiveOrArrayTypeSpec::String), @@ -465,7 +464,6 @@ mod tests { attributes: vec![AttributeRef(0)], common: CommonFields::default(), }], - registry_url: "todo".to_owned(), spans: vec![Span { r#type: "trace.test".to_owned().into(), kind: weaver_semconv::group::SpanKindSpec::Client, @@ -569,7 +567,6 @@ mod tests { }, }], }, - registry_manifest: None, } } } diff --git a/src/registry/check.rs b/src/registry/check.rs index 0f2a981da..ac93a7950 100644 --- a/src/registry/check.rs +++ b/src/registry/check.rs @@ -43,9 +43,9 @@ pub(crate) fn command(args: &RegistryCheckArgs) -> Result Result Result Result>) -> impl Into let registry = &state.registry; let stats = RegistryStats { - registry_url: registry.registry_url.clone(), + schema_url: registry.schema_url.to_string(), counts: RegistryCounts { attributes: registry.registry.attributes.len(), metrics: registry.registry.metrics.len(), diff --git a/src/serve/types.rs b/src/serve/types.rs index ed894c948..b750b9f10 100644 --- a/src/serve/types.rs +++ b/src/serve/types.rs @@ -10,8 +10,8 @@ use weaver_semconv::stability::Stability; /// Registry stats response. #[derive(Debug, Serialize, ToSchema)] pub struct RegistryStats { - /// The registry URL. - pub registry_url: String, + /// The schema URL. + pub schema_url: String, /// Counts of different entity types. pub counts: RegistryCounts, // TODO: It would be better to serve the output of `weaver registry stats` here diff --git a/src/weaver.rs b/src/weaver.rs index 19f018bbe..64e424ca4 100644 --- a/src/weaver.rs +++ b/src/weaver.rs @@ -58,7 +58,11 @@ impl<'a> WeaverEngine<'a> { diag_msgs: &mut DiagnosticMessages, ) -> Result { let registry_path = &self.registry_config.registry; - let main_registry_repo = RegistryRepo::try_new("main", registry_path)?; + let mut nfes = vec![]; + let main_registry_repo = RegistryRepo::try_new(None, registry_path, &mut nfes)?; + + diag_msgs.extend_from_vec(nfes.into_iter().map(DiagnosticMessage::new).collect()); + self.load_definitions(main_registry_repo, diag_msgs) } diff --git a/tests/custom_registry/registry_manifest.yaml b/tests/custom_registry/registry_manifest.yaml index fd749cc86..ced966aa3 100644 --- a/tests/custom_registry/registry_manifest.yaml +++ b/tests/custom_registry/registry_manifest.yaml @@ -3,5 +3,5 @@ description: This registry contains the semantic conventions for the Acme vendor semconv_version: 0.1.0 schema_base_url: https://acme.com/schemas/ dependencies: - - name: otel + - schema_url: https://opentelemetry.io/schemas/1.30.0 registry_path: https://github.com/open-telemetry/semantic-conventions/archive/refs/tags/v1.30.0.zip[model] diff --git a/tests/registry_stats.rs b/tests/registry_stats.rs index a05ae06dd..32514720e 100644 --- a/tests/registry_stats.rs +++ b/tests/registry_stats.rs @@ -18,5 +18,9 @@ fn test_cli_interface() { .output() .expect("failed to execute process"); - assert!(output.status.success()); + assert!( + output.status.success(), + "Process did not exit successfully. Stderr: {}", + String::from_utf8_lossy(&output.stderr) + ); } diff --git a/tests/resolution_process.rs b/tests/resolution_process.rs index 2e09f3afd..1827108a1 100644 --- a/tests/resolution_process.rs +++ b/tests/resolution_process.rs @@ -6,7 +6,7 @@ use miette::Diagnostic; use weaver_common::vdir::VirtualDirectoryPath; use weaver_resolver::SchemaResolver; -use weaver_semconv::registry_repo::RegistryRepo; +use weaver_semconv::{registry_repo::RegistryRepo, schema_url::SchemaUrl}; /// The URL of the official semantic convention registry. const SEMCONV_REGISTRY_URL: &str = "https://github.com/open-telemetry/semantic-conventions.git"; @@ -33,9 +33,16 @@ fn test_cli_interface() { sub_folder: Some(SEMCONV_REGISTRY_MODEL.to_owned()), refspec: None, }; - let registry_repo = RegistryRepo::try_new("main", ®istry_path).unwrap_or_else(|e| { - panic!("Failed to create the registry repo, error: {e}"); - }); + + let schema_url: Option = Some( + "https://opentelemetry.io/schemas/1.40.0" + .try_into() + .unwrap(), + ); + let registry_repo = RegistryRepo::try_new(schema_url, ®istry_path, &mut vec![]) + .unwrap_or_else(|e| { + panic!("Failed to create the registry repo, error: {e}"); + }); let loaded = SchemaResolver::load_semconv_repository(registry_repo, false) .ignore(|e| matches!(e.severity(), Some(miette::Severity::Warning))) .into_result_failing_non_fatal()