mirror of
https://github.com/asmogo/nws.git
synced 2024-12-13 18:56:21 +00:00
8ad87406cb
Introduce a new `ParseDestinationDomain` function to handle .nostr domains. Integrate base32 decoding for subdomains and constructing full domain strings using relays. Added corresponding tests to verify domain parsing logic.
381 lines
12 KiB
Go
381 lines
12 KiB
Go
package protocol
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
"net/url"
|
|
"strings"
|
|
|
|
"golang.org/x/net/idna"
|
|
"golang.org/x/net/publicsuffix"
|
|
)
|
|
|
|
const (
|
|
ipV6URINotationPrefix = "["
|
|
ipV6URINotationSuffix = "]"
|
|
)
|
|
|
|
var ErrEmptyURL = errors.New("url to be parsed is empty")
|
|
|
|
// URL represents a URL with additional fields and methods.
|
|
type URL struct {
|
|
SubName, Name, TLD, Port string
|
|
IsDomain bool
|
|
*url.URL
|
|
}
|
|
|
|
// String returns the string representation of the URL.
|
|
// It includes the scheme if `includeScheme` is true.
|
|
func (url URL) String(includeScheme bool) string {
|
|
s := url.URL.String()
|
|
if !includeScheme {
|
|
s = RemoveScheme(s)
|
|
}
|
|
return s
|
|
}
|
|
|
|
// Domain returns the domain name of the URL. If includeSub is true and there is a subdomain, it includes the subdomain
|
|
// in the returned string. Otherwise, it only includes the domain.
|
|
func (url URL) Domain(includeSub bool) string {
|
|
if includeSub && url.SubName != "" {
|
|
return fmt.Sprintf("%s.%s.%s", url.SubName, url.Name, url.TLD)
|
|
}
|
|
return fmt.Sprintf("%s.%s", url.Name, url.TLD)
|
|
}
|
|
|
|
// NoWWW returns the domain name without the "www" subdomain.
|
|
// If the subdomain is not "www" or is empty, it returns the domain name as is.
|
|
// The returned domain name is a string in the format "subname.name.tld".
|
|
func (url URL) NoWWW() string {
|
|
if url.SubName != "www" && url.SubName != "" {
|
|
return fmt.Sprintf("%s.%s.%s", url.SubName, url.Name, url.TLD)
|
|
}
|
|
return fmt.Sprintf("%s.%s", url.Name, url.TLD)
|
|
}
|
|
|
|
// WWW returns the domain name with the "www" subdomain.
|
|
// If the subdomain is not "www", it returns the domain name as is.
|
|
// The returned domain name is a string in the format "subname.name.tld".
|
|
func (url URL) WWW() string {
|
|
if url.SubName != "" {
|
|
return fmt.Sprintf("%s.%s.%s", url.SubName, url.Name, url.TLD)
|
|
}
|
|
return fmt.Sprintf("%s.%s.%s", "www", url.Name, url.TLD)
|
|
}
|
|
|
|
// HTTPS returns the URL with HTTPS Scheme but leaves the URL itself untouched.
|
|
func (url URL) HTTPS() string {
|
|
rememberScheme := url.Scheme
|
|
url.Scheme = "https"
|
|
httpsURL := url.String(true)
|
|
url.Scheme = rememberScheme
|
|
return httpsURL
|
|
}
|
|
|
|
// StripWWW returns the URL without "www" subdomain, but leaves the URL itself untouched.
|
|
// This function returns the whole URL with its path, in contrast to NoWWW().
|
|
func (url URL) StripWWW(includeScheme bool) string {
|
|
if url.SubName == "www" {
|
|
return strings.Replace(url.String(includeScheme), "www.", "", 1)
|
|
}
|
|
return url.String(includeScheme)
|
|
}
|
|
|
|
// StripQueryParams removes query parameters and fragments from the URL and returns
|
|
// the URL as a string. If includeScheme is true, it includes the scheme in the returned URL.
|
|
func (url URL) StripQueryParams(includeScheme bool) string {
|
|
// Remember the original values of query parameters and fragments
|
|
rememberRawQuery := url.RawQuery
|
|
rememberFragment := url.Fragment
|
|
rememberRawFragment := url.RawFragment
|
|
|
|
// Clear the query parameters and fragments
|
|
url.RawQuery = ""
|
|
url.RawFragment = ""
|
|
url.Fragment = ""
|
|
|
|
// Get the URL without query parameters
|
|
urlWithoutQuery := url.String(includeScheme)
|
|
|
|
// Restore the original values of query parameters and fragments
|
|
url.RawQuery = rememberRawQuery
|
|
url.RawFragment = rememberRawFragment
|
|
url.Fragment = rememberFragment
|
|
|
|
return urlWithoutQuery
|
|
}
|
|
|
|
// IsLocal checks if the URL is a local address.
|
|
// It returns true if the URL's top-level domain (TLD) is "localhost" or if the URL's
|
|
// hostname resolves to a loopback IP address.
|
|
func (url URL) IsLocal() bool {
|
|
ip := net.ParseIP(strings.TrimPrefix(strings.TrimSuffix(url.Name, ipV6URINotationSuffix), ipV6URINotationPrefix))
|
|
return url.TLD == "localhost" || (ip != nil && ip.IsLoopback())
|
|
}
|
|
|
|
// Parse parses a string representation of a URL and returns a *URL and error.
|
|
// It mirrors the net/url.Parse function but returns a tld.URL, which contains extra fields.
|
|
func Parse(urlString string) (*URL, error) {
|
|
urlString = strings.TrimSpace(urlString)
|
|
|
|
// if the url to be parsed is empty after trimming, we return an error
|
|
if len(urlString) == 0 {
|
|
return nil, ErrEmptyURL
|
|
}
|
|
|
|
urlString = AddDefaultScheme(urlString)
|
|
parsedURL, err := url.Parse(urlString)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("could not parse url: %w", err)
|
|
}
|
|
// always lowercase subdomain.domain.tld (host property)
|
|
parsedURL.Host = strings.ToLower(parsedURL.Host)
|
|
if parsedURL.Host == "" {
|
|
return &URL{URL: parsedURL}, nil
|
|
}
|
|
dom, port := domainPort(parsedURL.Host)
|
|
var domName, tld, sub string
|
|
ip := net.ParseIP(strings.TrimPrefix(strings.TrimSuffix(dom, ipV6URINotationSuffix), ipV6URINotationPrefix))
|
|
switch {
|
|
case ip != nil:
|
|
domName = dom
|
|
case dom == "localhost":
|
|
tld = dom
|
|
default:
|
|
etld1, err := publicsuffix.EffectiveTLDPlusOne(dom)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to extract eTLD+1: %w", err)
|
|
}
|
|
i := strings.Index(etld1, ".")
|
|
domName = etld1[0:i]
|
|
tld = etld1[i+1:]
|
|
sub = ""
|
|
if rest := strings.TrimSuffix(dom, "."+etld1); rest != dom {
|
|
sub = rest
|
|
}
|
|
}
|
|
urlString, err = idna.ToASCII(dom)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to convert domain to ASCII: %w", err)
|
|
}
|
|
return &URL{
|
|
SubName: sub,
|
|
Name: domName,
|
|
TLD: tld,
|
|
Port: port,
|
|
URL: parsedURL,
|
|
IsDomain: IsDomainName(urlString),
|
|
}, nil
|
|
}
|
|
|
|
// FromParsed mirrors the net/url.Parse function,
|
|
// but instead of returning a *url.URL, it returns a *URL,
|
|
// which is a struct that contains additional fields.
|
|
//
|
|
// The function first checks if the parsedUrl.Host field is empty.
|
|
// If it is empty, it returns a *URL with the URL field set to parsedUrl
|
|
// and all other fields set to their zero values.
|
|
//
|
|
// If the parsedUrl.Host field is not empty, it extracts the domain and port
|
|
// using the domainPort function.
|
|
//
|
|
// It then calculates the effective top-level domain plus one (etld+1)
|
|
// using the publicsuffix.EffectiveTLDPlusOne function.
|
|
//
|
|
// The etld+1 is then split into the domain name (domName) and the top-level domain (tld).
|
|
//
|
|
// It further determines the subdomain (sub) by checking if the domain is a subdomain of the etld+1.
|
|
//
|
|
// The domain name (domName) is then converted to ASCII using the idna.ToASCII function.
|
|
//
|
|
// Finally, it returns a *URL with the extracted values and the URL field set to parsedUrl.
|
|
// The IsDomain field is set to the result of the IsDomainName function called with the ASCII domain name.
|
|
// The SubName field is set to sub, the Name field is set to domName, and the T.
|
|
func FromParsed(parsedURL *url.URL) (*URL, error) {
|
|
if parsedURL.Host == "" {
|
|
return &URL{URL: parsedURL}, nil
|
|
}
|
|
dom, port := domainPort(parsedURL.Host)
|
|
// etld+1
|
|
etld1, err := publicsuffix.EffectiveTLDPlusOne(dom)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to extract eTLD+1: %w", err)
|
|
}
|
|
// convert to domain name, and tld
|
|
i := strings.Index(etld1, ".")
|
|
domName := etld1[0:i]
|
|
tld := etld1[i+1:]
|
|
// and subdomain
|
|
sub := ""
|
|
if rest := strings.TrimSuffix(dom, "."+etld1); rest != dom {
|
|
sub = rest
|
|
}
|
|
asciiDom, err := idna.ToASCII(dom)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to convert domain to ASCII: %w", err)
|
|
}
|
|
return &URL{
|
|
SubName: sub,
|
|
Name: domName,
|
|
TLD: tld,
|
|
Port: port,
|
|
URL: parsedURL,
|
|
IsDomain: IsDomainName(asciiDom),
|
|
}, nil
|
|
}
|
|
|
|
// domainPort extracts the domain and port from the host part of a URL.
|
|
// If the host contains a port, it returns the domain without the port and the port as strings.
|
|
// If the host does not contain a port, it returns the domain and an empty string for the port.
|
|
// If the host is all numeric characters, it returns the host itself and an empty string for the port.
|
|
// Note that the net/url package should prevent the string from being all numeric characters.
|
|
func domainPort(host string) (string, string) {
|
|
for i := len(host) - 1; i >= 0; i-- {
|
|
if host[i] == ':' {
|
|
return host[:i], host[i+1:]
|
|
} else if host[i] < '0' || host[i] > '9' {
|
|
return host, ""
|
|
}
|
|
}
|
|
// will only land here if the string is all digits,
|
|
// net/url should prevent that from happening
|
|
return host, ""
|
|
}
|
|
|
|
// IsDomainName checks if a string represents a valid domain name.
|
|
//
|
|
// It follows the rules specified in RFC 1035 and RFC 3696 for domain name validation.
|
|
//
|
|
// The input string is first processed with the RemoveScheme function to remove any scheme prefix.
|
|
// The domain name is then split into labels using the dot separator.
|
|
// The function checks that the number of labels is at least 2 and that the total length of the string is between 1 and
|
|
// 254 characters.
|
|
//
|
|
// The function iterates over the characters of the string and performs checks based on the character type.
|
|
// Valid characters include letters (a-zA-Z), digits (0-9), underscore (_), and hyphen (-).
|
|
// Each label can contain up to 63 characters and the last label cannot end with a hyphen.
|
|
// The function also checks that the byte before a dot or a hyphen is not a dot or a hyphen, respectively.
|
|
// Non-numeric characters are tracked to ensure the presence of at least one non-numeric character in the domain name.
|
|
//
|
|
// If any of the checks fail, the function returns false. Otherwise, it returns true.
|
|
//
|
|
// Example usage:
|
|
// s := "mail.google.com"
|
|
// isValid := IsDomainName(s).
|
|
func IsDomainName(name string) bool { //nolint:cyclop
|
|
name = RemoveScheme(name)
|
|
// See RFC 1035, RFC 3696.
|
|
// Presentation format has dots before every label except the first, and the
|
|
// terminal empty label is optional here because we assume fully-qualified
|
|
// (absolute) input. We must therefore reserve space for the first and last
|
|
// labels' length octets in wire format, where they are necessary and the
|
|
// maximum total length is 255.
|
|
// So our _effective_ maximum is 253, but 254 is not rejected if the last
|
|
// character is a dot.
|
|
split := strings.Split(name, ".")
|
|
|
|
// Need a TLD and a domain.
|
|
if len(split) < 2 { //nolint:gomnd
|
|
return false
|
|
}
|
|
l := len(name)
|
|
if l == 0 || l > 254 || l == 254 && name[l-1] != '.' {
|
|
return false
|
|
}
|
|
|
|
last := byte('.')
|
|
nonNumeric := false // true once we've seen a letter or hyphen
|
|
partlen := 0
|
|
for i := 0; i < len(name); i++ {
|
|
char := name[i]
|
|
switch {
|
|
default:
|
|
return false
|
|
case 'a' <= char && char <= 'z' || 'A' <= char && char <= 'Z' || char == '_':
|
|
nonNumeric = true
|
|
partlen++
|
|
case '0' <= char && char <= '9':
|
|
// fine
|
|
partlen++
|
|
case char == '-':
|
|
// Byte before dash cannot be dot.
|
|
if last == '.' {
|
|
return false
|
|
}
|
|
partlen++
|
|
nonNumeric = true
|
|
case char == '.':
|
|
// Byte before dot cannot be dot, dash.
|
|
if last == '.' || last == '-' {
|
|
return false
|
|
}
|
|
if partlen > 63 || partlen == 0 {
|
|
return false
|
|
}
|
|
partlen = 0
|
|
}
|
|
last = char
|
|
}
|
|
if last == '-' || partlen > 63 {
|
|
return false
|
|
}
|
|
|
|
return nonNumeric
|
|
}
|
|
|
|
// RemoveScheme removes the scheme from a URL string.
|
|
// If the URL string includes a scheme (e.g., "http://"), the scheme will be removed and the remaining string will be returned.
|
|
// If the URL string includes a default scheme (e.g., "//"), the default scheme will be removed and the remaining string will be returned.
|
|
// If the URL string does not include a scheme, the original string will be returned unchanged.
|
|
func RemoveScheme(s string) string {
|
|
if strings.Contains(s, "://") {
|
|
return removeScheme(s)
|
|
}
|
|
if strings.Contains(s, "//") {
|
|
return removeDefaultScheme(s)
|
|
}
|
|
return s
|
|
}
|
|
|
|
// add default scheme if string does not include a scheme.
|
|
func AddDefaultScheme(s string) string {
|
|
if !strings.Contains(s, "//") ||
|
|
(!strings.Contains(s, "//") && !strings.Contains(s, ":") && !strings.Contains(s, "@")) {
|
|
return addDefaultScheme(s)
|
|
}
|
|
return s
|
|
}
|
|
|
|
func AddScheme(s, scheme string) string {
|
|
if scheme == "" {
|
|
return AddDefaultScheme(s)
|
|
}
|
|
if strings.Index(s, "//") == -1 {
|
|
return fmt.Sprintf("%s://%s", scheme, s)
|
|
}
|
|
return s
|
|
}
|
|
|
|
// addDefaultScheme returns a new string with a default scheme added.
|
|
// The default scheme format is "//<original_string>".
|
|
func addDefaultScheme(s string) string {
|
|
return fmt.Sprintf("//%s", s)
|
|
}
|
|
|
|
// removeDefaultScheme removes the default scheme from a string.
|
|
func removeDefaultScheme(s string) string {
|
|
return s[index(s, "//"):]
|
|
}
|
|
|
|
func removeScheme(s string) string {
|
|
return s[index(s, "://"):]
|
|
}
|
|
|
|
// index returns the starting index of the first occurrence of the specified scheme in the given string.
|
|
// If the scheme is not found, it returns -1.
|
|
// The returned index is incremented by the length of the scheme to obtain the starting position of the remaining string.
|
|
func index(s, scheme string) int {
|
|
return strings.Index(s, scheme) + len(scheme)
|
|
}
|