From 978f972685fcbeee1590d433339b9ba7f8f39a3f Mon Sep 17 00:00:00 2001 From: Deniz Cakan <55599638+gohumble@users.noreply.github.com> Date: Fri, 25 Oct 2019 15:19:27 +0200 Subject: [PATCH] Update util.go added isDomainName --- domainutil/util.go | 60 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/domainutil/util.go b/domainutil/util.go index a22fe9e..b22bc74 100644 --- a/domainutil/util.go +++ b/domainutil/util.go @@ -69,10 +69,12 @@ func DomainSuffix(url string) string { // If no TLD is found in provided url, this function returns empty string. func Domain(url string) string { domain, top := stripURLParts(url), "" + if !isDomainName(domain){ + return "" + } parts := strings.Split(domain, ".") currentTld := *tlds foundTld := false - // Cycle trough parts in reverse if len(parts) > 1 { for i := len(parts) - 1; i >= 0; i-- { @@ -184,3 +186,59 @@ func Password(url string) string { } return auth[1] } +// isDomainName checks if a string is a presentation-format domain name +// (currently restricted to hostname-compatible "preferred name" LDH labels and +// SRV-like "underscore labels"; see golang.org/issue/12421). +func isDomainName(s string) bool { + // See RFC 1035, RFC 3696. + // Presentation format has dots before every label except the first, and the + // terminal empty label is optional here because we assume fully-qualified + // (absolute) input. We must therefore reserve space for the first and last + // labels' length octets in wire format, where they are necessary and the + // maximum total length is 255. + // So our _effective_ maximum is 253, but 254 is not rejected if the last + // character is a dot. + l := len(s) + if l == 0 || l > 254 || l == 254 && s[l-1] != '.' { + return false + } + + last := byte('.') + nonNumeric := false // true once we've seen a letter or hyphen + partlen := 0 + for i := 0; i < len(s); i++ { + c := s[i] + switch { + default: + return false + case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || c == '_': + nonNumeric = true + partlen++ + case '0' <= c && c <= '9': + // fine + partlen++ + case c == '-': + // Byte before dash cannot be dot. + if last == '.' { + return false + } + partlen++ + nonNumeric = true + case c == '.': + // Byte before dot cannot be dot, dash. + if last == '.' || last == '-' { + return false + } + if partlen > 63 || partlen == 0 { + return false + } + partlen = 0 + } + last = c + } + if last == '-' || partlen > 63 { + return false + } + + return nonNumeric +}