From 19c9367aed504b1728ac6d9e41ee171996e117b3 Mon Sep 17 00:00:00 2001 From: oiweiwei Date: Wed, 29 Jan 2025 16:03:04 +0100 Subject: [PATCH 1/2] encoding/json: improve decoder alloc count --- src/encoding/json/scanner.go | 17 ++++++++++++----- src/encoding/json/stream.go | 6 +++--- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/encoding/json/scanner.go b/src/encoding/json/scanner.go index da6ea2ac8f843f..d8799cb2ebe974 100644 --- a/src/encoding/json/scanner.go +++ b/src/encoding/json/scanner.go @@ -45,11 +45,18 @@ func checkValid(data []byte, scan *scanner) error { // A SyntaxError is a description of a JSON syntax error. // [Unmarshal] will return a SyntaxError if the JSON can't be parsed. type SyntaxError struct { - msg string // description of error - Offset int64 // error occurred after reading Offset bytes + msg string // description of error + Offset int64 // error occurred after reading Offset bytes + invalidCharContext string // invalid character error context + invalidChar byte // the invalid character } -func (e *SyntaxError) Error() string { return e.msg } +func (e *SyntaxError) Error() string { + if e.invalidCharContext != "" { + return "invalid character " + quoteChar(e.invalidChar) + " " + e.invalidCharContext + } + return e.msg +} // A scanner is a JSON scanning state machine. // Callers call scan.reset and then pass bytes in one at a time @@ -168,7 +175,7 @@ func (s *scanner) eof() int { return scanEnd } if s.err == nil { - s.err = &SyntaxError{"unexpected end of JSON input", s.bytes} + s.err = &SyntaxError{"unexpected end of JSON input", s.bytes, "", 0} } return scanError } @@ -590,7 +597,7 @@ func stateError(s *scanner, c byte) int { // error records an error and switches to the error state. func (s *scanner) error(c byte, context string) int { s.step = stateError - s.err = &SyntaxError{"invalid character " + quoteChar(c) + " " + context, s.bytes} + s.err = &SyntaxError{invalidCharContext: context, invalidChar: c, Offset: s.bytes} return scanError } diff --git a/src/encoding/json/stream.go b/src/encoding/json/stream.go index e2d9470bcc7fca..896b824d086807 100644 --- a/src/encoding/json/stream.go +++ b/src/encoding/json/stream.go @@ -312,7 +312,7 @@ func (dec *Decoder) tokenPrepareForDecode() error { return err } if c != ',' { - return &SyntaxError{"expected comma after array element", dec.InputOffset()} + return &SyntaxError{"expected comma after array element", dec.InputOffset(), "", 0} } dec.scanp++ dec.tokenState = tokenArrayValue @@ -322,7 +322,7 @@ func (dec *Decoder) tokenPrepareForDecode() error { return err } if c != ':' { - return &SyntaxError{"expected colon after object key", dec.InputOffset()} + return &SyntaxError{"expected colon after object key", dec.InputOffset(), "", 0} } dec.scanp++ dec.tokenState = tokenObjectValue @@ -475,7 +475,7 @@ func (dec *Decoder) tokenError(c byte) (Token, error) { case tokenObjectComma: context = " after object key:value pair" } - return nil, &SyntaxError{"invalid character " + quoteChar(c) + context, dec.InputOffset()} + return nil, &SyntaxError{invalidChar: c, invalidCharContext: context, Offset: dec.InputOffset()} } // More reports whether there is another element in the From b0c5899d8fbeb968807fd3375151e48f30885330 Mon Sep 17 00:00:00 2001 From: oiweiwei Date: Thu, 30 Jan 2025 22:34:15 +0100 Subject: [PATCH 2/2] fix: ut --- src/encoding/json/decode_test.go | 41 ++++++++++++++++--------------- src/encoding/json/scanner_test.go | 4 +-- src/encoding/json/stream.go | 12 ++++----- src/encoding/json/stream_test.go | 8 +++--- 4 files changed, 33 insertions(+), 32 deletions(-) diff --git a/src/encoding/json/decode_test.go b/src/encoding/json/decode_test.go index 8aad11b8bfbce2..c5b73802c2f4be 100644 --- a/src/encoding/json/decode_test.go +++ b/src/encoding/json/decode_test.go @@ -467,21 +467,21 @@ var unmarshalTests = []struct { {CaseName: Name(""), in: `{"alphabet": "xyz"}`, ptr: new(U), err: fmt.Errorf("json: unknown field \"alphabet\""), disallowUnknownFields: true}, // syntax errors - {CaseName: Name(""), in: `{"X": "foo", "Y"}`, err: &SyntaxError{"invalid character '}' after object key", 17}}, - {CaseName: Name(""), in: `[1, 2, 3+]`, err: &SyntaxError{"invalid character '+' after array element", 9}}, - {CaseName: Name(""), in: `{"X":12x}`, err: &SyntaxError{"invalid character 'x' after object key:value pair", 8}, useNumber: true}, + {CaseName: Name(""), in: `{"X": "foo", "Y"}`, err: &SyntaxError{invalidChar: '}', invalidCharContext: "after object key", Offset: 17}}, + {CaseName: Name(""), in: `[1, 2, 3+]`, err: &SyntaxError{invalidChar: '+', invalidCharContext: "after array element", Offset: 9}}, + {CaseName: Name(""), in: `{"X":12x}`, err: &SyntaxError{invalidChar: 'x', invalidCharContext: "after object key:value pair", Offset: 8}, useNumber: true}, {CaseName: Name(""), in: `[2, 3`, err: &SyntaxError{msg: "unexpected end of JSON input", Offset: 5}}, - {CaseName: Name(""), in: `{"F3": -}`, ptr: new(V), err: &SyntaxError{msg: "invalid character '}' in numeric literal", Offset: 9}}, + {CaseName: Name(""), in: `{"F3": -}`, ptr: new(V), err: &SyntaxError{invalidChar: '}', invalidCharContext: "in numeric literal", Offset: 9}}, // raw value errors - {CaseName: Name(""), in: "\x01 42", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, - {CaseName: Name(""), in: " 42 \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 5}}, - {CaseName: Name(""), in: "\x01 true", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, - {CaseName: Name(""), in: " false \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 8}}, - {CaseName: Name(""), in: "\x01 1.2", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, - {CaseName: Name(""), in: " 3.4 \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 6}}, - {CaseName: Name(""), in: "\x01 \"string\"", err: &SyntaxError{"invalid character '\\x01' looking for beginning of value", 1}}, - {CaseName: Name(""), in: " \"string\" \x01", err: &SyntaxError{"invalid character '\\x01' after top-level value", 11}}, + {CaseName: Name(""), in: "\x01 42", err: &SyntaxError{invalidChar: '\x01', invalidCharContext: "looking for beginning of value", Offset: 1}}, + {CaseName: Name(""), in: " 42 \x01", err: &SyntaxError{invalidChar: '\x01', invalidCharContext: "after top-level value", Offset: 5}}, + {CaseName: Name(""), in: "\x01 true", err: &SyntaxError{invalidChar: '\x01', invalidCharContext: "looking for beginning of value", Offset: 1}}, + {CaseName: Name(""), in: " false \x01", err: &SyntaxError{invalidChar: '\x01', invalidCharContext: "after top-level value", Offset: 8}}, + {CaseName: Name(""), in: "\x01 1.2", err: &SyntaxError{invalidChar: '\x01', invalidCharContext: "looking for beginning of value", Offset: 1}}, + {CaseName: Name(""), in: " 3.4 \x01", err: &SyntaxError{invalidChar: '\x01', invalidCharContext: "after top-level value", Offset: 6}}, + {CaseName: Name(""), in: "\x01 \"string\"", err: &SyntaxError{invalidChar: '\x01', invalidCharContext: "looking for beginning of value", Offset: 1}}, + {CaseName: Name(""), in: " \"string\" \x01", err: &SyntaxError{invalidChar: '\x01', invalidCharContext: "after top-level value", Offset: 11}}, // array tests {CaseName: Name(""), in: `[1, 2, 3]`, ptr: new([3]int), out: [3]int{1, 2, 3}}, @@ -1096,8 +1096,9 @@ var unmarshalTests = []struct { in: `invalid`, ptr: new(Number), err: &SyntaxError{ - msg: "invalid character 'i' looking for beginning of value", - Offset: 1, + invalidChar: 'i', + invalidCharContext: "looking for beginning of value", + Offset: 1, }, }, { @@ -1178,7 +1179,7 @@ var unmarshalTests = []struct { CaseName: Name(""), in: `[1,2,true,4,5}`, ptr: new([]int), - err: &SyntaxError{msg: "invalid character '}' after array element", Offset: 14}, + err: &SyntaxError{invalidChar: '}', invalidCharContext: "after array element", Offset: 14}, }, { CaseName: Name(""), @@ -2589,23 +2590,23 @@ func TestUnmarshalErrorAfterMultipleJSON(t *testing.T) { }{{ CaseName: Name(""), in: `1 false null :`, - err: &SyntaxError{"invalid character ':' looking for beginning of value", 14}, + err: &SyntaxError{invalidChar: ':', invalidCharContext: "looking for beginning of value", Offset: 14}, }, { CaseName: Name(""), in: `1 [] [,]`, - err: &SyntaxError{"invalid character ',' looking for beginning of value", 7}, + err: &SyntaxError{invalidChar: ',', invalidCharContext: "looking for beginning of value", Offset: 7}, }, { CaseName: Name(""), in: `1 [] [true:]`, - err: &SyntaxError{"invalid character ':' after array element", 11}, + err: &SyntaxError{invalidChar: ':', invalidCharContext: "after array element", Offset: 11}, }, { CaseName: Name(""), in: `1 {} {"x"=}`, - err: &SyntaxError{"invalid character '=' after object key", 14}, + err: &SyntaxError{invalidChar: '=', invalidCharContext: "after object key", Offset: 14}, }, { CaseName: Name(""), in: `falsetruenul#`, - err: &SyntaxError{"invalid character '#' in literal null (expecting 'l')", 13}, + err: &SyntaxError{invalidChar: '#', invalidCharContext: "in literal null (expecting 'l')", Offset: 13}, }} for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { diff --git a/src/encoding/json/scanner_test.go b/src/encoding/json/scanner_test.go index 068439dcaca858..896dbbd67d4daa 100644 --- a/src/encoding/json/scanner_test.go +++ b/src/encoding/json/scanner_test.go @@ -190,8 +190,8 @@ func TestIndentErrors(t *testing.T) { in string err error }{ - {Name(""), `{"X": "foo", "Y"}`, &SyntaxError{"invalid character '}' after object key", 17}}, - {Name(""), `{"X": "foo" "Y": "bar"}`, &SyntaxError{"invalid character '\"' after object key:value pair", 13}}, + {Name(""), `{"X": "foo", "Y"}`, &SyntaxError{invalidChar: '}', invalidCharContext: "after object key", Offset: 17}}, + {Name(""), `{"X": "foo" "Y": "bar"}`, &SyntaxError{invalidChar: '"', invalidCharContext: "after object key:value pair", Offset: 13}}, } for _, tt := range tests { t.Run(tt.Name, func(t *testing.T) { diff --git a/src/encoding/json/stream.go b/src/encoding/json/stream.go index 896b824d086807..3ccb86678acaef 100644 --- a/src/encoding/json/stream.go +++ b/src/encoding/json/stream.go @@ -463,17 +463,17 @@ func (dec *Decoder) tokenError(c byte) (Token, error) { var context string switch dec.tokenState { case tokenTopValue: - context = " looking for beginning of value" + context = "looking for beginning of value" case tokenArrayStart, tokenArrayValue, tokenObjectValue: - context = " looking for beginning of value" + context = "looking for beginning of value" case tokenArrayComma: - context = " after array element" + context = "after array element" case tokenObjectKey: - context = " looking for beginning of object key string" + context = "looking for beginning of object key string" case tokenObjectColon: - context = " after object key" + context = "after object key" case tokenObjectComma: - context = " after object key:value pair" + context = "after object key:value pair" } return nil, &SyntaxError{invalidChar: c, invalidCharContext: context, Offset: dec.InputOffset()} } diff --git a/src/encoding/json/stream_test.go b/src/encoding/json/stream_test.go index 46f9407c881c36..bb4f5a595fc63e 100644 --- a/src/encoding/json/stream_test.go +++ b/src/encoding/json/stream_test.go @@ -444,18 +444,18 @@ func TestDecodeInStream(t *testing.T) { {CaseName: Name(""), json: ` [{"a": 1} {"a": 2}] `, expTokens: []any{ Delim('['), decodeThis{map[string]any{"a": float64(1)}}, - decodeThis{&SyntaxError{"expected comma after array element", 11}}, + decodeThis{&SyntaxError{"expected comma after array element", 11, "", 0}}, }}, {CaseName: Name(""), json: `{ "` + strings.Repeat("a", 513) + `" 1 }`, expTokens: []any{ Delim('{'), strings.Repeat("a", 513), - decodeThis{&SyntaxError{"expected colon after object key", 518}}, + decodeThis{&SyntaxError{"expected colon after object key", 518, "", 0}}, }}, {CaseName: Name(""), json: `{ "\a" }`, expTokens: []any{ Delim('{'), - &SyntaxError{"invalid character 'a' in string escape code", 3}, + &SyntaxError{invalidChar: 'a', invalidCharContext: "in string escape code", Offset: 3}, }}, {CaseName: Name(""), json: ` \a`, expTokens: []any{ - &SyntaxError{"invalid character '\\\\' looking for beginning of value", 1}, + &SyntaxError{invalidChar: '\\', invalidCharContext: "looking for beginning of value", Offset: 1}, }}, } for _, tt := range tests {