Skip to content

Commit 9fbc887

Browse files
authored
add RFC5987ExtendedNotationParameterValue (#11)
1 parent b5595dd commit 9fbc887

File tree

13 files changed

+1438
-22
lines changed

13 files changed

+1438
-22
lines changed

.golangci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ linters:
3939
- mnd
4040
- gochecknoglobals
4141
- funcorder # todo: reconsider
42+
- ireturn
4243

4344
settings:
4445
govet:

.vscode/sample.settings.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
//"-mod=readonly",
2222
],
2323
"build.standaloneTags": [
24-
"integration_tests"
24+
"integration_tests",
25+
"generators"
2526
],
2627
"build.env": {
2728
"GO111MODULE": "on"

http/echo.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ func UnpackRequestBody(h http.Header, body io.ReadCloser) (any, error) {
108108
}
109109

110110
s := strings.Builder{}
111-
enc := base64.NewEncoder(urlSafeBase64, &s)
111+
enc := base64.NewEncoder(base64.RawURLEncoding, &s)
112112
_, err := io.Copy(enc, body)
113113

114114
return s.String(), err

http/encoding.go

Lines changed: 0 additions & 17 deletions
This file was deleted.

http/encoding/encoding.go

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
package encoding
2+
3+
import (
4+
"encoding/base64"
5+
"errors"
6+
"net/url"
7+
"strings"
8+
"sync"
9+
10+
"golang.org/x/text/encoding"
11+
"golang.org/x/text/encoding/charmap"
12+
"golang.org/x/text/encoding/japanese"
13+
"golang.org/x/text/encoding/korean"
14+
"golang.org/x/text/encoding/simplifiedchinese"
15+
"golang.org/x/text/encoding/traditionalchinese"
16+
"golang.org/x/text/encoding/unicode"
17+
)
18+
19+
// URLSafeBase64 returns a [base64.Encoding] based on [base64.URLEncoding] replacing the default padding character ('=') padding character to a url safe one ('~').
20+
// In URL parameters, the following characters are considered safe and do not need encoding [rfc3986](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.1):
21+
// Alphabetic characters: A-Z, a-z
22+
// Digits: 0-9
23+
// Hyphen: -
24+
// Underscore: _
25+
// Period: .
26+
// Tilde: ~
27+
var URLSafeBase64 = base64.URLEncoding.WithPadding('~')
28+
29+
// RFC5987ExtendedNotationParameterValue decodes RFC 5987 encoded filenames expecting the extended notation
30+
// (charset "'" [ language ] "'" value-chars)
31+
// example: UTF-8'en'file%20name.jpg
32+
// https://datatracker.ietf.org/doc/html/rfc5987#section-3.2
33+
func RFC5987ExtendedNotationParameterValue(parameterValue string) (charset string, lang string, value string, err error) {
34+
parts := strings.Split(parameterValue, "'")
35+
if len(parts) != 3 {
36+
return "", "", "", ErrRFC5987ParameterValueMalformed
37+
}
38+
39+
charset, lang, value = parts[0], parts[1], parts[2]
40+
41+
// unescape value
42+
decodedValue, er := url.QueryUnescape(value)
43+
if er != nil {
44+
return "", "", "", errors.Join(ErrRFC5987ParameterValueMalformed, er)
45+
}
46+
value = decodedValue
47+
48+
if strings.ToUpper(charset) == "UTF-8" {
49+
return
50+
}
51+
52+
enc, er := encodingFromCharset(charset)
53+
if er != nil {
54+
return "", "", "", er
55+
}
56+
57+
value, er = enc.NewDecoder().String(value)
58+
if er != nil {
59+
return "", "", "", errors.Join(ErrRFC5987ParameterValueMalformed, er)
60+
}
61+
62+
return
63+
}
64+
65+
var (
66+
ErrRFC5987ParameterValueMalformed = errors.New("RFC5987 Parameter Value Malformed")
67+
ErrCharsetNotSupported = errors.New("charset is not supported")
68+
)
69+
70+
// FromCharset maps the official names (plus preferred mime names and aliases) for character sets to the equivalent golang [encoding.Encoding].
71+
// https://www.iana.org/assignments/character-sets/character-sets.xhtml
72+
// https://github.com/unicode-org/icu-data
73+
// https://encoding.spec.whatwg.org/
74+
func FromCharset(mimeName string) (encoding.Encoding, error) {
75+
m := strings.TrimSpace(strings.ToUpper(mimeName))
76+
return encodingFromCharset(m)
77+
}
78+
79+
//go:generate go run -tags=generators mktable.go
80+
81+
var (
82+
encoderPerMIB map[uint16]encoding.Encoding
83+
encoderPerMIBOnce sync.Once
84+
)
85+
86+
func encodingFromCharset(mimeName string) (encoding.Encoding, error) {
87+
encoderPerMIBOnce.Do(initEncoderPerMID)
88+
89+
mid, midFound := toMIB[strings.ToUpper(mimeName)]
90+
if !midFound {
91+
return nil, ErrCharsetNotSupported
92+
}
93+
enc, encFound := encoderPerMIB[mid]
94+
if !encFound || enc == nil {
95+
return nil, ErrCharsetNotSupported
96+
}
97+
98+
return enc, nil
99+
}
100+
101+
func initEncoderPerMID() {
102+
encoderPerMIB = map[uint16]encoding.Encoding{}
103+
for _, c := range charmap.All {
104+
if cm, is := c.(*charmap.Charmap); is {
105+
id, _ := cm.ID()
106+
encoderPerMIB[uint16(id)] = c
107+
}
108+
}
109+
110+
encoderPerMIB[3] = charmap.Windows1252
111+
112+
encoderPerMIB[106] = unicode.UTF8 // UTF-8
113+
encoderPerMIB[1013] = unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM) // UTF-16BE
114+
encoderPerMIB[1014] = unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) // UTF-16LE
115+
encoderPerMIB[1015] = unicode.UTF16(unicode.BigEndian, unicode.UseBOM) // UTF-16
116+
117+
encoderPerMIB[17] = japanese.ShiftJIS
118+
encoderPerMIB[18] = japanese.EUCJP
119+
encoderPerMIB[39] = japanese.ISO2022JP
120+
121+
encoderPerMIB[38] = korean.EUCKR
122+
123+
encoderPerMIB[113] = simplifiedchinese.GBK
124+
encoderPerMIB[114] = simplifiedchinese.GB18030
125+
encoderPerMIB[2085] = simplifiedchinese.HZGB2312
126+
127+
encoderPerMIB[2026] = traditionalchinese.Big5
128+
}
129+
130+
// TODO: identify the utf-16 based on bom:
131+
// var (
132+
// utf16BEBOM = []byte{0xFE, 0xFF}
133+
// utf16LEBOM = []byte{0xFF, 0xFE}
134+
// )

http/encoding/encoding_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package encoding
2+
3+
import (
4+
"testing"
5+
6+
"github.com/stretchr/testify/require"
7+
)
8+
9+
func TestRFC5987ExtendedNotationParameterValue(t *testing.T) {
10+
tests := map[string]struct {
11+
input string
12+
expectedCharset string
13+
expectedLang string
14+
expectedValue string
15+
errorAssertion require.ErrorAssertionFunc
16+
}{
17+
"happy path UTF-8 with lang": {
18+
input: `UTF-8'en'file%20name.jpg`,
19+
expectedCharset: "UTF-8",
20+
expectedLang: "en",
21+
expectedValue: "file name.jpg",
22+
errorAssertion: require.NoError,
23+
},
24+
"happy path UTF-8 no lang": {
25+
input: `UTF-8''file%20name.jpg`,
26+
expectedCharset: "UTF-8",
27+
expectedLang: "",
28+
expectedValue: "file name.jpg",
29+
errorAssertion: require.NoError,
30+
},
31+
"happy path UTF-8 no lang and special characters": {
32+
input: `UTF-8''%c2%a3%20and%20%e2%82%ac%20rates.txt`,
33+
expectedCharset: "UTF-8",
34+
expectedLang: "",
35+
expectedValue: "£ and € rates.txt",
36+
errorAssertion: require.NoError,
37+
},
38+
"happy path iso-8859-7 no lang": {
39+
input: `iso-8859-7''%EA%E1%EB%E7%EC%DD%F1%E1+%DE%EB%E9%E5%2C+%EA%E1%EB%E7%EC%DD%F1%E1`,
40+
expectedCharset: "iso-8859-7",
41+
expectedLang: "",
42+
expectedValue: "καλημέρα ήλιε, καλημέρα",
43+
errorAssertion: require.NoError,
44+
},
45+
}
46+
47+
for name, tc := range tests {
48+
t.Run(name, func(t *testing.T) {
49+
charset, lang, value, err := RFC5987ExtendedNotationParameterValue(tc.input)
50+
tc.errorAssertion(t, err)
51+
require.Equal(t, tc.expectedCharset, charset)
52+
require.Equal(t, tc.expectedLang, lang)
53+
require.Equal(t, tc.expectedValue, value)
54+
})
55+
}
56+
}

http/encoding/mktable.go

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
//go:build generators
2+
3+
package main
4+
5+
import (
6+
"bytes"
7+
"encoding/xml"
8+
"go/format"
9+
"io"
10+
"log"
11+
"net/http"
12+
"os"
13+
"slices"
14+
"strings"
15+
"text/template"
16+
)
17+
18+
// https://pkg.go.dev/text/template
19+
20+
const tmp = `// Code generated by mktable.go; DO NOT EDIT.
21+
22+
package encoding
23+
24+
var toMIB = map[string]uint16{
25+
{{- range $alias, $mib := .ToMIB }}
26+
"{{ $alias }}": {{ $mib }},
27+
{{- end }}
28+
}
29+
`
30+
31+
type templateValues struct {
32+
PerMIB map[string][]string
33+
ToMIB map[string]string
34+
}
35+
36+
type registry struct {
37+
XMLName xml.Name `xml:"registry"`
38+
Updated string `xml:"updated"`
39+
Registry []struct {
40+
ID string `xml:"id,attr"`
41+
Record []record `xml:"record"`
42+
} `xml:"registry"`
43+
}
44+
45+
type record struct {
46+
Name string `xml:"name"`
47+
Xref []struct {
48+
Type string `xml:"type,attr"`
49+
Data string `xml:"data,attr"`
50+
} `xml:"xref"`
51+
Desc struct {
52+
Data string `xml:",innerxml"`
53+
} `xml:"description,"`
54+
MIB string `xml:"value"`
55+
Alias []string `xml:"alias"`
56+
MIME string `xml:"preferred_alias"`
57+
}
58+
59+
func (r record) AllAliases() []string {
60+
sl := make([]string, 0, len(r.Alias))
61+
62+
for _, a := range r.Alias {
63+
if strings.IndexByte(a, '\n') > -1 {
64+
lines := strings.SplitN(a, "\n", 2)
65+
if len(lines) == 2 {
66+
a = strings.SplitN(a, "\n", 2)[0]
67+
}
68+
}
69+
70+
if strings.HasPrefix(a, "cs") {
71+
sl = append(sl, strings.SplitN(a[2:], "\n", 2)[0])
72+
}
73+
74+
sl = append(sl, a)
75+
}
76+
77+
switch r.MIB {
78+
case "2085":
79+
sl = append(sl, "HZGB2312")
80+
}
81+
82+
if !slices.Contains(sl, r.Name) {
83+
sl = append(sl, r.Name)
84+
}
85+
86+
return sl
87+
}
88+
89+
// https://www.iana.org/assignments/character-sets/character-sets.xml
90+
91+
func main() {
92+
r, err := http.Get("https://www.iana.org/assignments/character-sets/character-sets.xml")
93+
if err != nil {
94+
log.Fatalf("error during xml fetching %s", err.Error())
95+
}
96+
defer r.Body.Close()
97+
98+
reg := &registry{}
99+
if err := xml.NewDecoder(r.Body).Decode(&reg); err != nil && err != io.EOF {
100+
log.Fatalf("Error decoding charset registry xml: %s", err.Error())
101+
}
102+
103+
t := templateValues{
104+
PerMIB: map[string][]string{},
105+
ToMIB: map[string]string{},
106+
}
107+
for _, rec := range reg.Registry[0].Record {
108+
aliases := rec.AllAliases()
109+
t.PerMIB[rec.MIB] = aliases
110+
for _, a := range aliases {
111+
t.ToMIB[strings.ToUpper(a)] = rec.MIB
112+
}
113+
}
114+
115+
// Create and execute the template
116+
tmpl, err := template.New("mapInit").Parse(tmp)
117+
if err != nil {
118+
log.Fatalf("error while parsing the file template: %s", err.Error())
119+
}
120+
121+
w := &bytes.Buffer{}
122+
err = tmpl.Execute(w, t)
123+
if err != nil {
124+
log.Fatalf("error while rendering the file template: %s", err.Error())
125+
}
126+
127+
if err := writeGoFile(w.String()); err != nil {
128+
log.Fatalf("error while generating go file: %s", err.Error())
129+
}
130+
}
131+
132+
func writeGoFile(code string) error {
133+
f, err := format.Source([]byte(code))
134+
if err != nil {
135+
return err
136+
}
137+
138+
if err := os.WriteFile("table.go", f, 0o644); err != nil {
139+
return err
140+
}
141+
142+
return nil
143+
}

0 commit comments

Comments
 (0)