Skip to content

Commit dc9c08d

Browse files
Don't call Marshal in unmarshalerEmbeddedStructsHookFunc (#14213)
## Context `confmap` uses a `mapstructure` hook to call the `Unmarshal` method on types which define it as part of the unmarshaling process, but `mapstructure` hooks are not called for squashed fields, so we have a separate hook which manually inspects all structs for squashed fields that implement `Unmarshaler`. This second hook currently returns a modified version of the raw YAML config, to be used by `mapstructure` to finish unmarshaling the full struct. This is a problem, because an `Unmarshal` method can set the state of the struct in arbitrary ways, which we want to preserve without `mapstructure` overwriting fields. (See `TestEmbeddedUnmarshaler`) The way this is currently worked around is by marshaling the embedded struct into a map, and then copying that map back into the raw config, in the hope that nothing will change when the raw config is mapped back into the struct. However, this assumption breaks when dealing with a `configopaque.String` inside an embedded `Unmarshaler`. That type has the very deliberate behavior of returning `[REDACTED]` when marshaled instead of its true contents, meaning marshaling then unmarshaling does not round trip. ## Description This PR attempts to fix this issue and make embedded `Unmarshaler`s more reliable by modifying the hook so that it returns a fully unmarshaled struct, leading `mapstructure` to do nothing. (This is what the basic `Unmarshaler` hook does to avoid "interference".) This means the hook needs to somehow unmarshal the non-`Unmarshaler` subfields manually, without affecting the input or output of `Unmarshal`. I did this by constructing a custom "partial" struct using `reflect.StructOf` which only contains the non-`Unmarshaler` fields, copying the initial values into it, calling `Decode` on it, then copying the output values back into the original struct. In a previous version of this PR, I kept most of the hook intact, but changed the logic so that we clear fields from the raw config when they might conflict with the embedded struct (instead of setting those fields to the embedded struct's current value). However, this has the side effect of breaking unmarshaling of structs with multiple identically-named fields, of which there is [at least one in contrib](https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/320ef1ca9bb4593b498eb3d849659c55d6a7eccc/receiver/dockerstatsreceiver/config.go#L17) (both `docker.Config` and `scraperhelper.ControllerConfig` have a `Timeout` field, and one of them implements `Unmarshaler`). --------- Co-authored-by: Pablo Baeyens <pablo.baeyens@datadoghq.com>
1 parent 0f06212 commit dc9c08d

File tree

3 files changed

+138
-25
lines changed

3 files changed

+138
-25
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: bug_fix
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. receiver/otlp)
7+
component: pkg/confmap
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: Ensure that embedded structs are not overwritten after Unmarshal is called
11+
12+
# One or more tracking issues or pull requests related to the change
13+
issues: [14213]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext: |
19+
This allows embedding structs which implement Unmarshal and contain a configopaque.String.
20+
21+
# Optional: The change log or logs in which this entry should be included.
22+
# e.g. '[user]' or '[user, api]'
23+
# Include 'user' if the change is relevant to end users.
24+
# Include 'api' if there is a change to a library API.
25+
# Default: '[user]'
26+
change_logs: [api]

confmap/internal/confmap_test.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
package internal
55

66
import (
7+
"encoding"
78
"errors"
89
"math"
910
"os"
@@ -559,6 +560,53 @@ func TestEmbeddedMarshalerError(t *testing.T) {
559560
assert.EqualError(t, cfgMap.Unmarshal(tc), "error running encode hook: marshaling error")
560561
}
561562

563+
// stringOpaque is similar to configopaque.String, in that
564+
// marshaling then unmarshaling it changes its value.
565+
type stringOpaque string
566+
567+
var _ encoding.TextMarshaler = stringOpaque("")
568+
569+
func (s stringOpaque) MarshalText() (text []byte, err error) {
570+
return []byte("opaque"), nil
571+
}
572+
573+
type squashedConfigOpaque struct {
574+
Value stringOpaque `mapstructure:"value"`
575+
secret bool
576+
}
577+
578+
var _ Unmarshaler = (*squashedConfigOpaque)(nil)
579+
580+
func (ecwrt *squashedConfigOpaque) Unmarshal(conf *Conf) error {
581+
if err := conf.Unmarshal(ecwrt); err != nil {
582+
return err
583+
}
584+
ecwrt.secret = true
585+
return nil
586+
}
587+
588+
type testConfigOpaque struct {
589+
// Don't embed, otherwise testConfigOpaque will implement Unmarshaler,
590+
// and unmarshalerHookFunc will be called instead of unmarshalerEmbeddedStructsHookFunc.
591+
Squashed squashedConfigOpaque `mapstructure:",squash"`
592+
}
593+
594+
// Regression test: the hook processing embedded marshalers previously made the assumption that
595+
// marshaling a struct then unmarshaling the resulting map back into the struct
596+
// is a no-op, which is not true for configopaque.String.
597+
func TestEmbeddedMarshalerWithoutRoundtrip(t *testing.T) {
598+
cfgMap := NewFromStringMap(map[string]any{
599+
"value": "hello",
600+
})
601+
602+
tc := &testConfigOpaque{}
603+
require.NoError(t, cfgMap.Unmarshal(tc))
604+
// Check that "hello" hasn't been replaced by "opaque"
605+
assert.Equal(t, "hello", string(tc.Squashed.Value))
606+
// Check that the inner Unmarshal was called
607+
assert.True(t, tc.Squashed.secret)
608+
}
609+
562610
type B struct {
563611
String string `mapstructure:"string"`
564612
}

confmap/internal/decoder.go

Lines changed: 64 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import (
77
"encoding"
88
"errors"
99
"fmt"
10-
"maps"
1110
"reflect"
1211
"slices"
1312
"strings"
@@ -57,7 +56,7 @@ func Decode(input, result any, settings UnmarshalOptions, skipTopLevelUnmarshale
5756
unmarshalerHookFunc(result, skipTopLevelUnmarshaler),
5857
// after the main unmarshaler hook is called,
5958
// we unmarshal the embedded structs if present to merge with the result:
60-
unmarshalerEmbeddedStructsHookFunc(),
59+
unmarshalerEmbeddedStructsHookFunc(settings),
6160
zeroSliceAndMapHookFunc(),
6261
),
6362
}
@@ -189,7 +188,7 @@ func mapKeyStringToMapKeyTextUnmarshalerHookFunc() mapstructure.DecodeHookFuncTy
189188

190189
// unmarshalerEmbeddedStructsHookFunc provides a mechanism for embedded structs to define their own unmarshal logic,
191190
// by implementing the Unmarshaler interface.
192-
func unmarshalerEmbeddedStructsHookFunc() mapstructure.DecodeHookFuncValue {
191+
func unmarshalerEmbeddedStructsHookFunc(settings UnmarshalOptions) mapstructure.DecodeHookFuncValue {
193192
return safeWrapDecodeHookFunc(func(from, to reflect.Value) (any, error) {
194193
if to.Type().Kind() != reflect.Struct {
195194
return from.Interface(), nil
@@ -198,32 +197,72 @@ func unmarshalerEmbeddedStructsHookFunc() mapstructure.DecodeHookFuncValue {
198197
if !ok {
199198
return from.Interface(), nil
200199
}
200+
201+
// First call Unmarshaler on squashed embedded fields, if necessary.
202+
var squashedUnmarshalers []int
201203
for i := 0; i < to.Type().NumField(); i++ {
202-
// embedded structs passed in via `squash` cannot be pointers. We just check if they are structs:
203204
f := to.Type().Field(i)
204-
if f.IsExported() && slices.Contains(strings.Split(f.Tag.Get(MapstructureTag), ","), "squash") {
205-
if unmarshaler, ok := to.Field(i).Addr().Interface().(Unmarshaler); ok {
206-
c := NewFromStringMap(fromAsMap)
207-
c.skipTopLevelUnmarshaler = true
208-
if err := unmarshaler.Unmarshal(c); err != nil {
209-
return nil, err
210-
}
211-
// the struct we receive from this unmarshaling only contains fields related to the embedded struct.
212-
// we merge this partially unmarshaled struct with the rest of the result.
213-
// note we already unmarshaled the main struct earlier, and therefore merge with it.
214-
conf := New()
215-
if err := conf.Marshal(unmarshaler); err != nil {
216-
return nil, err
217-
}
218-
resultMap := conf.ToStringMap()
219-
if fromAsMap == nil && len(resultMap) > 0 {
220-
fromAsMap = make(map[string]any, len(resultMap))
221-
}
222-
maps.Copy(fromAsMap, resultMap)
223-
}
205+
if !f.IsExported() {
206+
continue
207+
}
208+
tagParts := strings.Split(f.Tag.Get(MapstructureTag), ",")
209+
if !slices.Contains(tagParts[1:], "squash") {
210+
continue
211+
}
212+
unmarshaler, ok := to.Field(i).Addr().Interface().(Unmarshaler)
213+
if !ok {
214+
continue
224215
}
216+
c := NewFromStringMap(fromAsMap)
217+
c.skipTopLevelUnmarshaler = true
218+
if err := unmarshaler.Unmarshal(c); err != nil {
219+
return nil, err
220+
}
221+
squashedUnmarshalers = append(squashedUnmarshalers, i)
222+
}
223+
224+
// No squashed unmarshalers, we can let mapstructure do its job.
225+
if len(squashedUnmarshalers) == 0 {
226+
return fromAsMap, nil
225227
}
226-
return fromAsMap, nil
228+
229+
// We need to unmarshal into all other fields without overwriting the output of the Unmarshal calls.
230+
// To do that, create a custom "partial" struct containing only the non-squashed fields.
231+
var fields []reflect.StructField
232+
var fieldValues []reflect.Value
233+
for i := 0; i < to.Type().NumField(); i++ {
234+
f := to.Type().Field(i)
235+
if !f.IsExported() {
236+
continue
237+
}
238+
if slices.Contains(squashedUnmarshalers, i) {
239+
continue
240+
}
241+
fields = append(fields, f)
242+
fieldValues = append(fieldValues, to.Field(i))
243+
}
244+
restType := reflect.StructOf(fields)
245+
restValue := reflect.New(restType)
246+
247+
// Copy initial values into partial struct.
248+
for i, fieldValue := range fieldValues {
249+
restValue.Elem().Field(i).Set(fieldValue)
250+
}
251+
252+
// Decode into the partial struct.
253+
// This performs a recursive call into this hook, which will be handled by the "no squashed unmarshalers" case above.
254+
// We need to set `IgnoreUnused` to avoid errors from the map containing fields only present in the full struct.
255+
settings.IgnoreUnused = true
256+
if err := Decode(fromAsMap, restValue.Interface(), settings, true); err != nil {
257+
return nil, err
258+
}
259+
260+
// Copy decoding results back to the original struct.
261+
for i, fieldValue := range fieldValues {
262+
fieldValue.Set(restValue.Elem().Field(i))
263+
}
264+
265+
return to, nil
227266
})
228267
}
229268

0 commit comments

Comments
 (0)