Skip to content

Commit 9599283

Browse files
committed
Move azure.identity mappings to categories
It seems azure.identity is missing from some categories, and there are significant structural and semantic differences between them.
1 parent 3c89c01 commit 9599283

25 files changed

Lines changed: 519 additions & 321 deletions

extension/encoding/azureencodingextension/internal/unmarshaler/logs/README.md

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,19 @@ in OpenTelemetry Collector pipeline (for example, using `transformprocessor`) or
4747

4848
### Identity Field
4949

50-
The `identity` field contains authorization and claims information. Rather than storing it as a nested structure,
51-
specific fields are extracted into flat, semantically meaningful attributes. Only known useful fields are extracted
52-
to minimize the risk of accidentally including sensitive data.
50+
The `identity` field has different structures across Azure log categories, so identity parsing is handled per-category:
5351

54-
#### Authorization Fields
52+
- **Activity Logs**: Specific known fields are extracted into flat, semantically meaningful attributes (see below)
53+
- **Storage Logs**: Stored as a nested map under `azure.identity` (different structure with authorization as an array)
54+
- **Unknown/Generic categories**: Stored as a nested map under `azure.identity`
55+
56+
Only known useful fields are extracted to minimize the risk of accidentally including sensitive data.
57+
58+
#### Activity Log Identity
59+
60+
Activity Logs contain caller identity information with JWT claims from Azure AD/Entra ID tokens and authorization details.
61+
62+
##### Authorization Fields
5563

5664
| Azure identity Field | OpenTelemetry | OpenTelemetry Scope |
5765
|---------------------|---------------|---------------------|
@@ -64,7 +72,7 @@ to minimize the risk of accidentally including sensitive data.
6472
| `identity.authorization.evidence.principalId` | `azure.identity.authorization.evidence.principal.id` | Log Attribute |
6573
| `identity.authorization.evidence.principalType` | `azure.identity.authorization.evidence.principal.type` | Log Attribute |
6674

67-
#### Claims Fields
75+
##### Claims Fields
6876

6977
Unix timestamps (`exp`, `nbf`, `iat`) are converted to RFC3339 format.
7078

@@ -82,9 +90,17 @@ Unix timestamps (`exp`, `nbf`, `iat`) are converted to RFC3339 format.
8290
| `http://schemas.microsoft.com/claims/authnmethodsreferences` | `azure.identity.auth.methods.references` | Log Attribute |
8391
| `http://schemas.microsoft.com/identity/claims/identityprovider` | `azure.identity.provider` | Log Attribute |
8492
| `http://schemas.microsoft.com/identity/claims/objectidentifier` | `azure.identity.identifier.object` | Log Attribute |
85-
| `http://schemas.xmlsoap.org/ws/2005/05/identity/claims/nameidentifier` | `user.name` | Log Attribute |
93+
| `http://schemas.xmlsoap.org/ws/2005/05/identity/claims/nameidentifier` | `user.id` | Log Attribute |
8694
| `http://schemas.xmlsoap.org/ws/2005/05/identity/claims/emailaddress` | `user.email` | Log Attribute |
8795

96+
#### Storage Log Identity
97+
98+
Storage Logs have a different identity structure containing authorization decisions as an array, token information, and requester details. The entire identity object is stored as a nested map under `azure.identity`.
99+
100+
#### Unknown/Generic Categories
101+
102+
For log categories where the identity structure is not known, the entire identity object is stored as a nested map under `azure.identity` to preserve all data.
103+
88104
## Application Gateway
89105

90106
### Application Gateway Access Logs (both v1 and v2)

extension/encoding/azureencodingextension/internal/unmarshaler/logs/category.go

Lines changed: 23 additions & 187 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@ import (
77
"encoding/json"
88
"errors"
99
"fmt"
10-
"time"
1110

12-
gojson "github.com/goccy/go-json"
1311
jsoniter "github.com/json-iterator/go"
1412
"go.opentelemetry.io/collector/pdata/pcommon"
1513
"go.opentelemetry.io/collector/pdata/plog"
@@ -110,80 +108,6 @@ const (
110108
attributeAzureRequestDuration = "azure.request.duration"
111109
)
112110

113-
// Constants for Identity > claims
114-
const (
115-
identityClaimIssuer = "iss"
116-
identityClaimSubject = "sub"
117-
identityClaimAudience = "aud"
118-
identityClaimExpires = "exp"
119-
identityClaimNotBefore = "nbf"
120-
identityClaimIssuedAt = "iat"
121-
122-
identityClaimScope = "http://schemas.microsoft.com/identity/claims/scope"
123-
identityClaimType = "idtyp"
124-
identityClaimApplicationID = "appid"
125-
identityClaimAuthMethodsReferences = "http://schemas.microsoft.com/claims/authnmethodsreferences"
126-
identityClaimProvider = "http://schemas.microsoft.com/identity/claims/identityprovider"
127-
identityClaimIdentifierObject = "http://schemas.microsoft.com/identity/claims/objectidentifier"
128-
identityClaimIdentifierName = "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/nameidentifier"
129-
identityClaimEmailAddress = "http://schemas.xmlsoap.org/ws/2005/05/identity/claims/emailaddress"
130-
)
131-
132-
// Identity specific attributes
133-
const (
134-
attributeIdentityAuthorizationScope = "azure.identity.authorization.scope"
135-
attributeIdentityAuthorizationAction = "azure.identity.authorization.action"
136-
// Identity > authorization > evidence
137-
attributeIdentityAuthorizationEvidenceRole = "azure.identity.authorization.evidence.role"
138-
attributeIdentityAuthorizationEvidenceRoleAssignmentScope = "azure.identity.authorization.evidence.role.assignment.scope"
139-
attributeIdentityAuthorizationEvidenceRoleAssignmentID = "azure.identity.authorization.evidence.role.assignment.id"
140-
attributeIdentityAuthorizationEvidenceRoleDefinitionID = "azure.identity.authorization.evidence.role.definition.id"
141-
attributeIdentityAuthorizationEvidencePrincipalID = "azure.identity.authorization.evidence.principal.id"
142-
attributeIdentityAuthorizationEvidencePrincipalType = "azure.identity.authorization.evidence.principal.type"
143-
// Identity > claims (standard JWT claims)
144-
attributeIdentityClaimsAudience = "azure.identity.audience"
145-
attributeIdentityClaimsIssuer = "azure.identity.issuer"
146-
attributeIdentityClaimsSubject = "azure.identity.subject"
147-
attributeIdentityClaimsNotAfter = "azure.identity.not_after"
148-
attributeIdentityClaimsNotBefore = "azure.identity.not_before"
149-
attributeIdentityClaimsCreated = "azure.identity.created"
150-
// Identity > claims (Azure specific claims)
151-
attributeIdentityClaimsScope = "azure.identity.scope"
152-
attributeIdentityClaimsType = "azure.identity.type"
153-
attributeIdentityClaimsApplicationID = "azure.identity.application.id"
154-
attributeIdentityClaimsAuthMethodsReferences = "azure.identity.auth.methods.references"
155-
attributeIdentityClaimsIdentifierObject = "azure.identity.identifier.object"
156-
attributeIdentityClaimsIdentifierName = "user.name"
157-
attributeIdentityClaimsProvider = "azure.identity.provider"
158-
)
159-
160-
// evidence describes role assignment evidence in identity authorization
161-
type evidence struct {
162-
Role string `json:"role"`
163-
RoleAssignmentScope string `json:"roleAssignmentScope"`
164-
RoleAssignmentID string `json:"roleAssignmentId"`
165-
RoleDefinitionID string `json:"roleDefinitionId"`
166-
PrincipalID string `json:"principalId"`
167-
PrincipalType string `json:"principalType"`
168-
}
169-
170-
// authorization describes identity authorization details
171-
type authorization struct {
172-
Scope string `json:"scope"`
173-
Action string `json:"action"`
174-
Evidence *evidence `json:"evidence"`
175-
}
176-
177-
// identity describes the identity of the user or application that performed the operation
178-
// described by the log event.
179-
type identity struct {
180-
// Claims usually contains the JWT token used by Active Directory
181-
// to authenticate the user or application to perform this
182-
// operation in Resource Manager.
183-
Claims map[string]string `json:"claims"`
184-
Authorization *authorization `json:"authorization"`
185-
}
186-
187111
var errNoTimestamp = errors.New("no valid time fields are set on Log record ('time' or 'timestamp')")
188112

189113
// azureLogRecord is a common interface for all category-specific structures
@@ -200,21 +124,20 @@ type azureLogRecord interface {
200124
// This schema are applicable to most Resource Logs and
201125
// can be extended with additional fields for specific Log Categories
202126
type azureLogRecordBase struct {
203-
Time string `json:"time"` // most Categories use this field for timestamp
204-
TimeStamp string `json:"timestamp"` // some Categories use this field for timestamp
205-
ResourceID string `json:"resourceId"`
206-
TenantID string `json:"tenantId"`
207-
OperationName string `json:"operationName"`
208-
OperationVersion *string `json:"operationVersion"`
209-
ResultType *string `json:"resultType"`
210-
ResultSignature *string `json:"resultSignature"`
211-
ResultDescription *string `json:"resultDescription"`
212-
DurationMs *json.Number `json:"durationMs"` // int
213-
CallerIPAddress *string `json:"callerIpAddress"`
214-
CorrelationID *string `json:"correlationId"`
215-
Identity json.RawMessage `json:"identity"`
216-
Level *json.Number `json:"level"`
217-
Location string `json:"location"`
127+
Time string `json:"time"` // most Categories use this field for timestamp
128+
TimeStamp string `json:"timestamp"` // some Categories use this field for timestamp
129+
ResourceID string `json:"resourceId"`
130+
TenantID string `json:"tenantId"`
131+
OperationName string `json:"operationName"`
132+
OperationVersion *string `json:"operationVersion"`
133+
ResultType *string `json:"resultType"`
134+
ResultSignature *string `json:"resultSignature"`
135+
ResultDescription *string `json:"resultDescription"`
136+
DurationMs *json.Number `json:"durationMs"` // int
137+
CallerIPAddress *string `json:"callerIpAddress"`
138+
CorrelationID *string `json:"correlationId"`
139+
Level *json.Number `json:"level"`
140+
Location string `json:"location"`
218141
}
219142

220143
// GetResource returns resource attributes for the parsed Log Record
@@ -274,99 +197,10 @@ func (r *azureLogRecordBase) PutCommonAttributes(attrs pcommon.Map, _ pcommon.Va
274197
unmarshaler.AttrPutStrPtrIf(attrs, string(conventions.NetworkPeerAddressKey), r.CallerIPAddress)
275198
unmarshaler.AttrPutStrPtrIf(attrs, attributeAzureCorrelationID, r.CorrelationID)
276199
unmarshaler.AttrPutIntNumberPtrIf(attrs, attributeAzureOperationDuration, r.DurationMs)
277-
addIdentityAttributes(r.Identity, attrs)
278-
}
279-
280-
// addIdentityAttributes extracts identity details
281-
//
282-
// The `identity` field is part of the Top-level common schema for
283-
// resource logs and it's also in use in the activity logs.
284-
//
285-
// We're applying the strategy to only pick the identity elements
286-
// that we know are useful. This approach also minimizes the risk
287-
// of accidentally including sensitive data.
288-
func addIdentityAttributes(identityJSON json.RawMessage, attrs pcommon.Map) {
289-
if len(identityJSON) == 0 {
290-
return
291-
}
292-
293-
var id identity
294-
if err := gojson.Unmarshal(identityJSON, &id); err != nil {
295-
return
296-
}
297-
298-
// Authorization
299-
// ------------------------------------------------------------
300-
301-
if id.Authorization != nil {
302-
unmarshaler.AttrPutStrIf(attrs, attributeIdentityAuthorizationScope, id.Authorization.Scope)
303-
unmarshaler.AttrPutStrIf(attrs, attributeIdentityAuthorizationAction, id.Authorization.Action)
304-
305-
if id.Authorization.Evidence != nil {
306-
unmarshaler.AttrPutStrIf(attrs, attributeIdentityAuthorizationEvidenceRole, id.Authorization.Evidence.Role)
307-
unmarshaler.AttrPutStrIf(attrs, attributeIdentityAuthorizationEvidenceRoleAssignmentScope, id.Authorization.Evidence.RoleAssignmentScope)
308-
unmarshaler.AttrPutStrIf(attrs, attributeIdentityAuthorizationEvidenceRoleAssignmentID, id.Authorization.Evidence.RoleAssignmentID)
309-
unmarshaler.AttrPutStrIf(attrs, attributeIdentityAuthorizationEvidenceRoleDefinitionID, id.Authorization.Evidence.RoleDefinitionID)
310-
unmarshaler.AttrPutStrIf(attrs, attributeIdentityAuthorizationEvidencePrincipalID, id.Authorization.Evidence.PrincipalID)
311-
unmarshaler.AttrPutStrIf(attrs, attributeIdentityAuthorizationEvidencePrincipalType, id.Authorization.Evidence.PrincipalType)
312-
}
313-
}
314-
315-
// Claims
316-
// ------------------------------------------------------------
317-
318-
// Extract known claims details we want to include in the
319-
// log record.
320-
// Extract common claim fields
321-
322-
if iss := id.Claims[identityClaimIssuer]; iss != "" {
323-
attrs.PutStr(attributeIdentityClaimsIssuer, iss)
324-
}
325-
if sub := id.Claims[identityClaimSubject]; sub != "" {
326-
attrs.PutStr(attributeIdentityClaimsSubject, sub)
327-
}
328-
if aud := id.Claims[identityClaimAudience]; aud != "" {
329-
attrs.PutStr(attributeIdentityClaimsAudience, aud)
330-
}
331-
if exp := id.Claims[identityClaimExpires]; exp != "" {
332-
if expTime, err := parseUnixTimestamp(exp); err == nil {
333-
attrs.PutStr(attributeIdentityClaimsNotAfter, expTime.Format(time.RFC3339))
334-
}
335-
}
336-
if nbf := id.Claims[identityClaimNotBefore]; nbf != "" {
337-
if nbfTime, err := parseUnixTimestamp(nbf); err == nil {
338-
attrs.PutStr(attributeIdentityClaimsNotBefore, nbfTime.Format(time.RFC3339))
339-
}
340-
}
341-
if iat := id.Claims[identityClaimIssuedAt]; iat != "" {
342-
if iatTime, err := parseUnixTimestamp(iat); err == nil {
343-
attrs.PutStr(attributeIdentityClaimsCreated, iatTime.Format(time.RFC3339))
344-
}
345-
}
346-
if scope := id.Claims[identityClaimScope]; scope != "" {
347-
attrs.PutStr(attributeIdentityClaimsScope, scope)
348-
}
349-
if idtyp := id.Claims[identityClaimType]; idtyp != "" {
350-
attrs.PutStr(attributeIdentityClaimsType, idtyp)
351-
}
352-
if appid := id.Claims[identityClaimApplicationID]; appid != "" {
353-
attrs.PutStr(attributeIdentityClaimsApplicationID, appid)
354-
}
355-
if authmethods := id.Claims[identityClaimAuthMethodsReferences]; authmethods != "" {
356-
attrs.PutStr(attributeIdentityClaimsAuthMethodsReferences, authmethods)
357-
}
358-
if provider := id.Claims[identityClaimProvider]; provider != "" {
359-
attrs.PutStr(attributeIdentityClaimsProvider, provider)
360-
}
361-
if object := id.Claims[identityClaimIdentifierObject]; object != "" {
362-
attrs.PutStr(attributeIdentityClaimsIdentifierObject, object)
363-
}
364-
if name := id.Claims[identityClaimIdentifierName]; name != "" {
365-
attrs.PutStr(attributeIdentityClaimsIdentifierName, name)
366-
}
367-
if email := id.Claims[identityClaimEmailAddress]; email != "" {
368-
attrs.PutStr(string(conventions.UserEmailKey), email)
369-
}
200+
// Identity is NOT processed here. Each category-specific struct is
201+
// responsible for calling the appropriate identity parser in its own
202+
// PutCommonAttributes override, because the identity field has different
203+
// structures across Azure log categories (Activity, Storage, etc.).
370204
}
371205

372206
// PutProperties puts already attributes from "properties" field into provided Attributes Map/Body
@@ -376,10 +210,12 @@ func (*azureLogRecordBase) PutProperties(_ pcommon.Map, _ pcommon.Value) error {
376210
return nil
377211
}
378212

379-
// azureLogRecordBase represents a single Azure log following the common schema,
380-
// but has unknown for us Category
213+
// azureLogRecordGeneric represents a single Azure log following the common schema,
214+
// but has unknown for us Category.
381215
// In this case we couldn't correctly map properties to attributes and simply copy them
382-
// as-is to the attributes
216+
// as-is to the attributes.
217+
// Identity is not handled for unknown categories - each known category handles
218+
// its own identity structure with a typed struct.
383219
type azureLogRecordGeneric struct {
384220
azureLogRecordBase
385221

0 commit comments

Comments
 (0)