Skip to content

Commit 33bb372

Browse files
cstocktonChris Stockton
andauthored
fix: tighten email validation rules (#2304)
The goal here is to limit the conditions which resolver implementations can affect the determinism of our DNS checks, without allowing transient DNS failures to block signups: * Reject single label email domains (`a@a`, `a@gmail`) * Use absolute FQDN for DNS lookups to avoid implicit search behavior * Preserves the RFC 5321 fallback, but narrows when it is called * Add an allow list for major email providers to lower latency * Reject mutated display name address that the mail parser might accept * Add test coverage for some corner cases Co-authored-by: Chris Stockton <chris.stockton@supabase.io>
1 parent e8f679b commit 33bb372

2 files changed

Lines changed: 128 additions & 9 deletions

File tree

internal/mailer/validateclient/validateclient.go

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,51 @@ var invalidHostMap = map[string]bool{
5656

5757
// Hundreds of typos per day for this.
5858
"gamil.com": true,
59+
"gamai.com": true,
5960

6061
// These are not email providers, but people often use them.
6162
"anonymous.com": true,
6263
"email.com": true,
6364
}
6465

66+
// We skip checking hosts for some of the biggest well known public email
67+
// providers, generated via:
68+
//
69+
// test -f "gmass.html" \
70+
// || wget -O "gmass.html" https://www.gmass.co/domains
71+
// cat "gmass.html" \
72+
// | pup 'div#status-details a json{}' \
73+
// | jq -r 'map([(.text | split(" "))[1], .children[0].text])
74+
// | map("`" + .[0] + ".` : true, // " + .[1]) | join("\n")' \
75+
// | sed 's| emails sent||g' \
76+
// | head -20
77+
//
78+
// Note:
79+
// This only affects the validateHost code, if we have an exact match we don't
80+
// bother to make a dns request.
81+
var hostAllowList = map[string]bool{
82+
`gmail.com.`: true, // 563,185,814
83+
`yahoo.com.`: true, // 107,413,999
84+
`hotmail.com.`: true, // 98,895,904
85+
`aol.com.`: true, // 31,839,178
86+
`outlook.com.`: true, // 11,826,511
87+
`comcast.net.`: true, // 9,663,112
88+
`icloud.com.`: true, // 9,274,437
89+
`msn.com.`: true, // 7,101,124
90+
`hotmail.co.uk.`: true, // 5,456,609
91+
`sbcglobal.net.`: true, // 5,167,305
92+
`live.com.`: true, // 5,140,589
93+
`yahoo.co.in.`: true, // 4,091,798
94+
`me.com.`: true, // 3,920,969
95+
`att.net.`: true, // 3,688,388
96+
`mail.ru.`: true, // 3,583,276
97+
`bellsouth.net.`: true, // 3,455,683
98+
`rediffmail.com`: true, // 3,400,300
99+
`cox.net.`: true, // 3,254,227
100+
`yahoo.co.uk.`: true, // 3,218,049
101+
`verizon.net.`: true, // 3,046,288
102+
}
103+
65104
const (
66105
validateEmailTimeout = 3 * time.Second
67106
)
@@ -222,6 +261,12 @@ func (ev *emailValidator) validateStatic(email string) (string, error) {
222261
return "", ErrInvalidEmailFormat
223262
}
224263

264+
// The mail package supports RFC 5322 addresses which are not valid for
265+
// signup users (e.g. Chris Stockton <chris.stockton@host...>).
266+
if ea.Address != email {
267+
return "", ErrInvalidEmailFormat
268+
}
269+
225270
i := strings.LastIndex(ea.Address, "@")
226271
if i == -1 {
227272
return "", ErrInvalidEmailFormat
@@ -291,13 +336,16 @@ func (ev *emailValidator) validateService(ctx context.Context, email string) err
291336
return nil
292337
}
293338

339+
// 32 bytes is plenty for the payload: {"valid": true|false}
294340
dec := json.NewDecoder(io.LimitReader(res.Body, 1<<5))
295341
if err := dec.Decode(&resObject); err != nil {
296342
return nil
297343
}
298344

299-
// If the object did not contain a valid key we consider the check as
300-
// failed. We _must_ get a valid JSON response with a "valid" field.
345+
// If the resObject contained no "valid" key we ignore the service and
346+
// return a nil error. If the Valid key is present AND set to true we
347+
// will return a nil error, otherwise the valid key was present & false
348+
// so we fall through to ErrInvalidEmailAddress.
301349
if resObject.Valid == nil || *resObject.Valid {
302350
return nil
303351
}
@@ -320,7 +368,29 @@ func (ev *emailValidator) validateProviders(name, host string) error {
320368
return nil
321369
}
322370

371+
// NOTE(cstockton): We could consider using[1] in the future for an additional
372+
// data point.
373+
//
374+
// [1] https://pkg.go.dev/golang.org/x/net/publicsuffix
323375
func (ev *emailValidator) validateHost(ctx context.Context, host string) error {
376+
377+
// As far as I know there is no such thing as valid single label hosts for
378+
// email. This will block anything like: email@a, email@mycompanygltd and
379+
// so on.
380+
if !strings.Contains(host, ".") {
381+
return ErrInvalidEmailDNS
382+
}
383+
384+
// Require a FQDN to remove possible implict search behavior.
385+
if !strings.HasSuffix(host, ".") {
386+
host = host + "."
387+
}
388+
389+
// If the host is in the allow list skip mx check all together.
390+
if hostAllowList[host] {
391+
return nil
392+
}
393+
324394
mxs, err := validateEmailResolver.LookupMX(ctx, host)
325395
if !isHostNotFound(err) {
326396
return ev.validateMXRecords(mxs, nil)

internal/mailer/validateclient/validateclient_test.go

Lines changed: 56 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package validateclient
33
import (
44
"context"
55
"fmt"
6+
"net"
67
"net/http"
78
"net/http/httptest"
89
"sync/atomic"
@@ -201,7 +202,10 @@ func TestValidateEmailExtended(t *testing.T) {
201202
// valid (has mx record)
202203
{email: "a@supabase.io"},
203204
{email: "support@supabase.io"},
204-
{email: "chris.stockton@supabase.io"},
205+
{email: "abc@supabase.io"},
206+
207+
// valid (RFC 5321 fallback, supabase.co has no mx, but valid A)
208+
{email: "invalid@supabase.co"},
205209

206210
// bad format
207211
{email: "", err: "invalid_email_format"},
@@ -210,6 +214,25 @@ func TestValidateEmailExtended(t *testing.T) {
210214
{email: "@supabase.io", err: "invalid_email_format"},
211215
{email: "test@.supabase.io", err: "invalid_email_format"},
212216

217+
// invalid providers check doesn't allow short gmails
218+
{email: "short@gmail.com", err: "invalid_email_address"},
219+
{email: "short@hotmail.com"}, // allow other providers
220+
221+
// ensure the mail parser does not result in mutated addr
222+
{email: "Chris Stockton <chris.stockton@someaddr.test>",
223+
err: "invalid_email_format"},
224+
225+
// Check dot suffixes are invalid (mutations)
226+
{email: "a@example.org.", err: "invalid_email_format"},
227+
{email: "a@", err: "invalid_email_format"},
228+
{email: "a@.", err: "invalid_email_format"},
229+
{email: "a@a.", err: "invalid_email_format"},
230+
{email: "a@gmail.com.", err: "invalid_email_format"},
231+
{email: "a@gmail.com..", err: "invalid_email_format"},
232+
{email: "aaaaaaaa@.abc", err: "invalid_email_format"},
233+
{email: "aaaaaaaa@.abc.", err: "invalid_email_format"},
234+
{email: "aaaaaaaa@.abc.abc", err: "invalid_email_format"},
235+
213236
// invalid: valid mx records, but invalid and often typed
214237
// (invalidEmailMap)
215238
{email: "test@email.com", err: "invalid_email_address"},
@@ -235,23 +258,23 @@ func TestValidateEmailExtended(t *testing.T) {
235258
// valid but not actually valid and typed a lot
236259
{email: "a@invalid", err: "invalid_email_dns"},
237260
{email: "a@a.invalid", err: "invalid_email_dns"},
238-
{email: "test@invalid", err: "invalid_email_dns"},
239261

240262
// various invalid emails
241263
{email: "test@test.localhost", err: "invalid_email_dns"},
242264
{email: "test@invalid.example.com", err: "invalid_email_dns"},
243265
{email: "test@no.such.email.host.supabase.io", err: "invalid_email_dns"},
244-
245-
// test blocked mx records
246-
{email: "test@hotmail.com", err: "invalid_email_mx"},
247-
248266
// this low timeout should simulate a dns timeout, which should
249267
// not be treated as an invalid email.
250268
{email: "validemail@probablyaaaaaaaanotarealdomain.com",
251269
timeout: time.Millisecond},
252270

253271
// likewise for a valid email
254-
{email: "support@supabase.io", timeout: time.Millisecond},
272+
{email: "timeout@supabase.io", timeout: time.Millisecond},
273+
274+
// invalid dns
275+
{email: "a@a", err: "invalid_email_dns"},
276+
{email: "a@a.a", err: "invalid_email_dns"},
277+
{email: "a@abcd", err: "invalid_email_dns"},
255278
}
256279

257280
cfg := conf.MailerConfiguration{
@@ -268,6 +291,7 @@ func TestValidateEmailExtended(t *testing.T) {
268291

269292
ev := newEmailValidator(cfg)
270293

294+
seen := make(map[string]bool)
271295
for idx, tc := range cases {
272296
func(timeout time.Duration) {
273297
if timeout == 0 {
@@ -277,6 +301,11 @@ func TestValidateEmailExtended(t *testing.T) {
277301
ctx, cancel := context.WithTimeout(ctx, timeout)
278302
defer cancel()
279303

304+
if seen[tc.email] {
305+
t.Fatalf("duplicate email %v in test cases", tc.email)
306+
}
307+
seen[tc.email] = true
308+
280309
now := time.Now()
281310
err := ev.Validate(ctx, tc.email)
282311
dur := time.Since(now)
@@ -294,4 +323,24 @@ func TestValidateEmailExtended(t *testing.T) {
294323

295324
}(tc.timeout)
296325
}
326+
327+
// test blocked mx list
328+
{
329+
for _, v := range []string{
330+
"hotmail-com.olc.protection.outlook.com",
331+
"hotmail-com.olc.protection.outlook.com.",
332+
} {
333+
{
334+
err := ev.validateMXRecords([]*net.MX{{Host: v}}, nil)
335+
require.Error(t, err)
336+
require.Contains(t, err.Error(), ErrInvalidEmailMX.Error())
337+
}
338+
339+
{
340+
err := ev.validateMXRecords(nil, []string{v})
341+
require.Error(t, err)
342+
require.Contains(t, err.Error(), ErrInvalidEmailMX.Error())
343+
}
344+
}
345+
}
297346
}

0 commit comments

Comments
 (0)