Skip to content

Commit dce2de2

Browse files
committed
Improve DNS solver efficiency and resolve deadlock
Previously, simultaneously solving for example.com and *.example.com using the DNS challenge (i.e. both SANs on the same cert, which CertMagic doesn't do; but this is a general-purpose library) would cause a deadlock because both those names use the same-name TXT record. This caused problems with previous dependencies like lego and legacy DNS solvers, but the new acmez library and libdns packages support this. I've removed the locking around same-name records, which resolves the deadlock, and improves efficiency as well, since we can now solve challenges for example.com and *.example.com at the same time.
1 parent 5981e55 commit dce2de2

File tree

1 file changed

+78
-75
lines changed

1 file changed

+78
-75
lines changed

solvers.go

+78-75
Original file line numberDiff line numberDiff line change
@@ -238,9 +238,18 @@ func (s *tlsALPNSolver) CleanUp(ctx context.Context, chal acme.Challenge) error
238238
return nil
239239
}
240240

241-
// DNS01Solver is a type that makes libdns providers usable
242-
// as ACME dns-01 challenge solvers.
243-
// See https://github.com/libdns/libdns
241+
// DNS01Solver is a type that makes libdns providers usable as ACME dns-01
242+
// challenge solvers. See https://github.com/libdns/libdns
243+
//
244+
// Note that challenges may be solved concurrently by some clients (such as
245+
// acmez, which CertMagic uses), meaning that multiple TXT records may be
246+
// created in a DNS zone simultaneously, and in some cases distinct TXT records
247+
// may have the same name. For example, solving challenges for both example.com
248+
// and *.example.com create a TXT record named _acme_challenge.example.com,
249+
// but with different tokens as their values. This solver distinguishes
250+
// between different records with the same name by looking at their values.
251+
// DNS provider APIs and implementations of the libdns interfaces must also
252+
// support multiple same-named TXT records.
244253
type DNS01Solver struct {
245254
// The implementation that interacts with the DNS
246255
// provider to set or delete records. (REQUIRED)
@@ -266,7 +275,18 @@ type DNS01Solver struct {
266275
// that the solver doesn't follow CNAME/NS record.
267276
OverrideDomain string
268277

269-
txtRecords map[string]dnsPresentMemory // keyed by domain name
278+
// Remember DNS records while challenges are active; i.e.
279+
// records we have presented and not yet cleaned up.
280+
// This lets us clean them up quickly and efficiently.
281+
// Keyed by domain name (specifically the ACME DNS name).
282+
// The map value is a slice because there can be multiple
283+
// concurrent challenges for different domains that have
284+
// the same ACME DNS name, for example: example.com and
285+
// *.example.com. We distinguish individual memories by
286+
// the value of their TXT records, which should contain
287+
// unique challenge tokens.
288+
// See https://github.com/caddyserver/caddy/issues/3474.
289+
txtRecords map[string][]dnsPresentMemory
270290
txtRecordsMu sync.Mutex
271291
}
272292

@@ -278,13 +298,6 @@ func (s *DNS01Solver) Present(ctx context.Context, challenge acme.Challenge) err
278298
}
279299
keyAuth := challenge.DNS01KeyAuthorization()
280300

281-
// multiple identifiers can have the same ACME challenge
282-
// domain (e.g. example.com and *.example.com) so we need
283-
// to ensure that we don't solve those concurrently and
284-
// step on each challenges' metaphorical toes; see
285-
// https://github.com/caddyserver/caddy/issues/3474
286-
activeDNSChallenges.Lock(dnsName)
287-
288301
zone, err := findZoneByFQDN(dnsName, recursiveNameservers(s.Resolvers))
289302
if err != nil {
290303
return fmt.Errorf("could not determine zone for domain %q: %v", dnsName, err)
@@ -299,19 +312,18 @@ func (s *DNS01Solver) Present(ctx context.Context, challenge acme.Challenge) err
299312

300313
results, err := s.DNSProvider.AppendRecords(ctx, zone, []libdns.Record{rec})
301314
if err != nil {
302-
return fmt.Errorf("adding temporary record for zone %s: %w", zone, err)
315+
return fmt.Errorf("adding temporary record for zone %q: %w", zone, err)
303316
}
304317
if len(results) != 1 {
305318
return fmt.Errorf("expected one record, got %d: %v", len(results), results)
306319
}
307320

308321
// remember the record and zone we got so we can clean up more efficiently
309-
s.txtRecordsMu.Lock()
310-
if s.txtRecords == nil {
311-
s.txtRecords = make(map[string]dnsPresentMemory)
312-
}
313-
s.txtRecords[dnsName] = dnsPresentMemory{dnsZone: zone, rec: results[0]}
314-
s.txtRecordsMu.Unlock()
322+
s.saveDNSPresentMemory(dnsPresentMemory{
323+
dnsZone: zone,
324+
dnsName: dnsName,
325+
rec: results[0],
326+
})
315327

316328
return nil
317329
}
@@ -363,7 +375,7 @@ func (s *DNS01Solver) Wait(ctx context.Context, challenge acme.Challenge) error
363375
var ready bool
364376
ready, err = checkDNSPropagation(dnsName, keyAuth, resolvers)
365377
if err != nil {
366-
return fmt.Errorf("checking DNS propagation of %s: %w", dnsName, err)
378+
return fmt.Errorf("checking DNS propagation of %q: %w", dnsName, err)
367379
}
368380
if ready {
369381
return nil
@@ -379,89 +391,80 @@ func (s *DNS01Solver) CleanUp(ctx context.Context, challenge acme.Challenge) err
379391
if s.OverrideDomain != "" {
380392
dnsName = s.OverrideDomain
381393
}
382-
383-
defer func() {
384-
// always forget about it so we don't leak memory
385-
s.txtRecordsMu.Lock()
386-
delete(s.txtRecords, dnsName)
387-
s.txtRecordsMu.Unlock()
394+
keyAuth := challenge.DNS01KeyAuthorization()
388395

389-
// always do this last - but always do it!
390-
activeDNSChallenges.Unlock(dnsName)
391-
}()
396+
// always forget about the record so we don't leak memory
397+
defer s.deleteDNSPresentMemory(dnsName, keyAuth)
392398

393399
// recall the record we created and zone we looked up
394-
s.txtRecordsMu.Lock()
395-
memory, ok := s.txtRecords[dnsName]
396-
if !ok {
397-
s.txtRecordsMu.Unlock()
398-
return fmt.Errorf("no memory of presenting a DNS record for %s (probably OK if presenting failed)", challenge.Identifier.Value)
400+
memory, err := s.getDNSPresentMemory(dnsName, keyAuth)
401+
if err != nil {
402+
return err
399403
}
400-
s.txtRecordsMu.Unlock()
401404

402405
// clean up the record
403-
_, err := s.DNSProvider.DeleteRecords(ctx, memory.dnsZone, []libdns.Record{memory.rec})
406+
_, err = s.DNSProvider.DeleteRecords(ctx, memory.dnsZone, []libdns.Record{memory.rec})
404407
if err != nil {
405-
return fmt.Errorf("deleting temporary record for zone %s: %w", memory.dnsZone, err)
408+
return fmt.Errorf("deleting temporary record for name %q in zone %q: %w", memory.dnsName, memory.dnsZone, err)
406409
}
407410

408411
return nil
409412
}
410413

411414
type dnsPresentMemory struct {
412415
dnsZone string
416+
dnsName string
413417
rec libdns.Record
414418
}
415419

416-
// ACMEDNSProvider defines the set of operations required for
417-
// ACME challenges. A DNS provider must be able to append and
418-
// delete records in order to solve ACME challenges. Find one
419-
// you can use at https://github.com/libdns. If your provider
420-
// isn't implemented yet, feel free to contribute!
421-
type ACMEDNSProvider interface {
422-
libdns.RecordAppender
423-
libdns.RecordDeleter
420+
func (s *DNS01Solver) saveDNSPresentMemory(mem dnsPresentMemory) {
421+
s.txtRecordsMu.Lock()
422+
if s.txtRecords == nil {
423+
s.txtRecords = make(map[string][]dnsPresentMemory)
424+
}
425+
s.txtRecords[mem.dnsName] = append(s.txtRecords[mem.dnsName], mem)
426+
s.txtRecordsMu.Unlock()
424427
}
425428

426-
// activeDNSChallenges synchronizes DNS challenges for
427-
// names to ensure that challenges for the same ACME
428-
// DNS name do not overlap; for example, the TXT record
429-
// to make for both example.com and *.example.com are
430-
// the same; thus we cannot solve them concurrently.
431-
var activeDNSChallenges = newMapMutex()
432-
433-
// mapMutex implements named mutexes.
434-
type mapMutex struct {
435-
cond *sync.Cond
436-
set map[interface{}]struct{}
437-
}
429+
func (s *DNS01Solver) getDNSPresentMemory(dnsName, keyAuth string) (dnsPresentMemory, error) {
430+
s.txtRecordsMu.Lock()
431+
defer s.txtRecordsMu.Unlock()
438432

439-
func newMapMutex() *mapMutex {
440-
return &mapMutex{
441-
cond: sync.NewCond(new(sync.Mutex)),
442-
set: make(map[interface{}]struct{}),
433+
var memory dnsPresentMemory
434+
for _, mem := range s.txtRecords[dnsName] {
435+
if mem.rec.Value == keyAuth {
436+
memory = mem
437+
break
438+
}
443439
}
444-
}
445440

446-
func (mmu *mapMutex) Lock(key interface{}) {
447-
mmu.cond.L.Lock()
448-
defer mmu.cond.L.Unlock()
449-
for mmu.locked(key) {
450-
mmu.cond.Wait()
441+
if memory.rec.Name == "" {
442+
return dnsPresentMemory{}, fmt.Errorf("no memory of presenting a DNS record for %q (usually OK if presenting also failed)", dnsName)
451443
}
452-
mmu.set[key] = struct{}{}
444+
445+
return memory, nil
453446
}
454447

455-
func (mmu *mapMutex) Unlock(key interface{}) {
456-
mmu.cond.L.Lock()
457-
defer mmu.cond.L.Unlock()
458-
delete(mmu.set, key)
459-
mmu.cond.Broadcast()
448+
func (s *DNS01Solver) deleteDNSPresentMemory(dnsName, keyAuth string) {
449+
s.txtRecordsMu.Lock()
450+
defer s.txtRecordsMu.Unlock()
451+
452+
for i, mem := range s.txtRecords[dnsName] {
453+
if mem.rec.Value == keyAuth {
454+
s.txtRecords[dnsName] = append(s.txtRecords[dnsName][:i], s.txtRecords[dnsName][i+1:]...)
455+
return
456+
}
457+
}
460458
}
461459

462-
func (mmu *mapMutex) locked(key interface{}) (ok bool) {
463-
_, ok = mmu.set[key]
464-
return
460+
// ACMEDNSProvider defines the set of operations required for
461+
// ACME challenges. A DNS provider must be able to append and
462+
// delete records in order to solve ACME challenges. Find one
463+
// you can use at https://github.com/libdns. If your provider
464+
// isn't implemented yet, feel free to contribute!
465+
type ACMEDNSProvider interface {
466+
libdns.RecordAppender
467+
libdns.RecordDeleter
465468
}
466469

467470
// distributedSolver allows the ACME HTTP-01 and TLS-ALPN challenges

0 commit comments

Comments
 (0)