From 8ad87406cb82fb8e4b6c0508e026233eb5bb0bc2 Mon Sep 17 00:00:00 2001 From: dd dd Date: Sat, 27 Jul 2024 21:48:16 +0200 Subject: [PATCH] Add domain parsing for .nostr addresses Introduce a new `ParseDestinationDomain` function to handle .nostr domains. Integrate base32 decoding for subdomains and constructing full domain strings using relays. Added corresponding tests to verify domain parsing logic. --- exit/exit.go | 12 +- netstr/conn.go | 32 ++++ netstr/dial.go | 2 - protocol/domain.go | 380 ++++++++++++++++++++++++++++++++++++++++ protocol/domain_test.go | 45 +++++ 5 files changed, 467 insertions(+), 4 deletions(-) create mode 100644 protocol/domain.go create mode 100644 protocol/domain_test.go diff --git a/exit/exit.go b/exit/exit.go index 62d9b84..7d5feeb 100644 --- a/exit/exit.go +++ b/exit/exit.go @@ -2,6 +2,7 @@ package exit import ( "crypto/tls" + "encoding/base32" "fmt" "github.com/asmogo/nws/config" "github.com/asmogo/nws/netstr" @@ -14,6 +15,7 @@ import ( "golang.org/x/net/context" "log/slog" "net" + "strings" ) const ( @@ -92,6 +94,7 @@ func NewExit(ctx context.Context, exitNodeConfig *config.ExitConfig) *Exit { // set config exit.config = exitNodeConfig // add relays to the pool + var domain string for _, relayUrl := range exitNodeConfig.NostrRelays { relay, err := exit.pool.EnsureRelay(relayUrl) if err != nil { @@ -100,9 +103,14 @@ func NewExit(ctx context.Context, exitNodeConfig *config.ExitConfig) *Exit { } exit.relays = append(exit.relays, relay) fmt.Printf("added relay connection to %s\n", relayUrl) + if domain == "" { + domain = base32.HexEncoding.WithPadding(base32.NoPadding).EncodeToString([]byte(relayUrl)) + } else { + domain = fmt.Sprintf("%s.%s", domain, base32.HexEncoding.WithPadding(base32.NoPadding).EncodeToString([]byte(relayUrl))) + } } - - slog.Info("created exit node", "profile", profile) + domain = strings.ToLower(fmt.Sprintf("%s.%s.nostr", domain, fmt.Sprintf("%s.%s", exit.publicKey[:32], exit.publicKey[32:]))) + slog.Info("created exit node", "profile", profile, "domain", domain) // setup subscriptions err = exit.setSubscriptions(ctx) if err != nil { diff --git a/netstr/conn.go b/netstr/conn.go index 461995c..4889b8d 100644 --- a/netstr/conn.go +++ b/netstr/conn.go @@ -3,6 +3,7 @@ package netstr import ( "bytes" "context" + "encoding/base32" "encoding/base64" "fmt" "github.com/asmogo/nws/protocol" @@ -13,6 +14,7 @@ import ( "github.com/samber/lo" "log/slog" "net" + "strings" "time" ) @@ -216,6 +218,10 @@ func (nc *NostrConnection) handleNostrWrite(b []byte, err error) (int, error) { // If the prefix is "nprofile", the public key and relays are extracted. // Returns the public key, relays (if any), and any error encountered. func ParseDestination(destination string) (string, []string, error) { + // check if destination ends with .nostr + if strings.HasSuffix(destination, ".nostr") { + return ParseDestinationDomain(destination) + } // destination can be npub or nprofile prefix, pubKey, err := nip19.Decode(destination) @@ -237,6 +243,32 @@ func ParseDestination(destination string) (string, []string, error) { return publicKey, relays, nil } +func ParseDestinationDomain(destination string) (string, []string, error) { + url, err := protocol.Parse(destination) + if err != nil { + return "", nil, err + } + if !url.IsDomain { + // return "", nil, fmt.Errorf("destination is not a domain") + } + var subdomains []string + split := strings.Split(url.SubName, ".") + for i, subdomain := range split { + if i == len(split)-1 { + break + } + decodedSubDomain, err := base32.HexEncoding.WithPadding(base32.NoPadding).DecodeString(strings.ToUpper(subdomain)) + if err != nil { + continue + } + subdomains = append(subdomains, string(decodedSubDomain)) + } + + // base32 decode the subdomain + domain := split[len(split)-1] + url.Name + return domain, subdomains, nil +} + func (nc *NostrConnection) Close() error { nc.cancel() return nil diff --git a/netstr/dial.go b/netstr/dial.go index 103b2c1..cb42838 100644 --- a/netstr/dial.go +++ b/netstr/dial.go @@ -8,7 +8,6 @@ import ( "github.com/nbd-wtf/go-nostr" "log/slog" "net" - "strings" ) type DialOptions struct { @@ -26,7 +25,6 @@ type DialOptions struct { // Finally, it returns the Connection and nil error. If there are any errors, nil connection and the error are returned. func DialSocks(options DialOptions) func(ctx context.Context, net_, addr string) (net.Conn, error) { return func(ctx context.Context, net_, addr string) (net.Conn, error) { - addr = strings.ReplaceAll(addr, ".", "") key := nostr.GeneratePrivateKey() connection := NewConnection(ctx, WithPrivateKey(key), diff --git a/protocol/domain.go b/protocol/domain.go new file mode 100644 index 0000000..0281be9 --- /dev/null +++ b/protocol/domain.go @@ -0,0 +1,380 @@ +package protocol + +import ( + "errors" + "fmt" + "net" + "net/url" + "strings" + + "golang.org/x/net/idna" + "golang.org/x/net/publicsuffix" +) + +const ( + ipV6URINotationPrefix = "[" + ipV6URINotationSuffix = "]" +) + +var ErrEmptyURL = errors.New("url to be parsed is empty") + +// URL represents a URL with additional fields and methods. +type URL struct { + SubName, Name, TLD, Port string + IsDomain bool + *url.URL +} + +// String returns the string representation of the URL. +// It includes the scheme if `includeScheme` is true. +func (url URL) String(includeScheme bool) string { + s := url.URL.String() + if !includeScheme { + s = RemoveScheme(s) + } + return s +} + +// Domain returns the domain name of the URL. If includeSub is true and there is a subdomain, it includes the subdomain +// in the returned string. Otherwise, it only includes the domain. +func (url URL) Domain(includeSub bool) string { + if includeSub && url.SubName != "" { + return fmt.Sprintf("%s.%s.%s", url.SubName, url.Name, url.TLD) + } + return fmt.Sprintf("%s.%s", url.Name, url.TLD) +} + +// NoWWW returns the domain name without the "www" subdomain. +// If the subdomain is not "www" or is empty, it returns the domain name as is. +// The returned domain name is a string in the format "subname.name.tld". +func (url URL) NoWWW() string { + if url.SubName != "www" && url.SubName != "" { + return fmt.Sprintf("%s.%s.%s", url.SubName, url.Name, url.TLD) + } + return fmt.Sprintf("%s.%s", url.Name, url.TLD) +} + +// WWW returns the domain name with the "www" subdomain. +// If the subdomain is not "www", it returns the domain name as is. +// The returned domain name is a string in the format "subname.name.tld". +func (url URL) WWW() string { + if url.SubName != "" { + return fmt.Sprintf("%s.%s.%s", url.SubName, url.Name, url.TLD) + } + return fmt.Sprintf("%s.%s.%s", "www", url.Name, url.TLD) +} + +// HTTPS returns the URL with HTTPS Scheme but leaves the URL itself untouched. +func (url URL) HTTPS() string { + rememberScheme := url.Scheme + url.Scheme = "https" + httpsURL := url.String(true) + url.Scheme = rememberScheme + return httpsURL +} + +// StripWWW returns the URL without "www" subdomain, but leaves the URL itself untouched. +// This function returns the whole URL with its path, in contrast to NoWWW(). +func (url URL) StripWWW(includeScheme bool) string { + if url.SubName == "www" { + return strings.Replace(url.String(includeScheme), "www.", "", 1) + } + return url.String(includeScheme) +} + +// StripQueryParams removes query parameters and fragments from the URL and returns +// the URL as a string. If includeScheme is true, it includes the scheme in the returned URL. +func (url URL) StripQueryParams(includeScheme bool) string { + // Remember the original values of query parameters and fragments + rememberRawQuery := url.RawQuery + rememberFragment := url.Fragment + rememberRawFragment := url.RawFragment + + // Clear the query parameters and fragments + url.RawQuery = "" + url.RawFragment = "" + url.Fragment = "" + + // Get the URL without query parameters + urlWithoutQuery := url.String(includeScheme) + + // Restore the original values of query parameters and fragments + url.RawQuery = rememberRawQuery + url.RawFragment = rememberRawFragment + url.Fragment = rememberFragment + + return urlWithoutQuery +} + +// IsLocal checks if the URL is a local address. +// It returns true if the URL's top-level domain (TLD) is "localhost" or if the URL's +// hostname resolves to a loopback IP address. +func (url URL) IsLocal() bool { + ip := net.ParseIP(strings.TrimPrefix(strings.TrimSuffix(url.Name, ipV6URINotationSuffix), ipV6URINotationPrefix)) + return url.TLD == "localhost" || (ip != nil && ip.IsLoopback()) +} + +// Parse parses a string representation of a URL and returns a *URL and error. +// It mirrors the net/url.Parse function but returns a tld.URL, which contains extra fields. +func Parse(urlString string) (*URL, error) { + urlString = strings.TrimSpace(urlString) + + // if the url to be parsed is empty after trimming, we return an error + if len(urlString) == 0 { + return nil, ErrEmptyURL + } + + urlString = AddDefaultScheme(urlString) + parsedURL, err := url.Parse(urlString) + if err != nil { + return nil, fmt.Errorf("could not parse url: %w", err) + } + // always lowercase subdomain.domain.tld (host property) + parsedURL.Host = strings.ToLower(parsedURL.Host) + if parsedURL.Host == "" { + return &URL{URL: parsedURL}, nil + } + dom, port := domainPort(parsedURL.Host) + var domName, tld, sub string + ip := net.ParseIP(strings.TrimPrefix(strings.TrimSuffix(dom, ipV6URINotationSuffix), ipV6URINotationPrefix)) + switch { + case ip != nil: + domName = dom + case dom == "localhost": + tld = dom + default: + etld1, err := publicsuffix.EffectiveTLDPlusOne(dom) + if err != nil { + return nil, fmt.Errorf("failed to extract eTLD+1: %w", err) + } + i := strings.Index(etld1, ".") + domName = etld1[0:i] + tld = etld1[i+1:] + sub = "" + if rest := strings.TrimSuffix(dom, "."+etld1); rest != dom { + sub = rest + } + } + urlString, err = idna.ToASCII(dom) + if err != nil { + return nil, fmt.Errorf("failed to convert domain to ASCII: %w", err) + } + return &URL{ + SubName: sub, + Name: domName, + TLD: tld, + Port: port, + URL: parsedURL, + IsDomain: IsDomainName(urlString), + }, nil +} + +// FromParsed mirrors the net/url.Parse function, +// but instead of returning a *url.URL, it returns a *URL, +// which is a struct that contains additional fields. +// +// The function first checks if the parsedUrl.Host field is empty. +// If it is empty, it returns a *URL with the URL field set to parsedUrl +// and all other fields set to their zero values. +// +// If the parsedUrl.Host field is not empty, it extracts the domain and port +// using the domainPort function. +// +// It then calculates the effective top-level domain plus one (etld+1) +// using the publicsuffix.EffectiveTLDPlusOne function. +// +// The etld+1 is then split into the domain name (domName) and the top-level domain (tld). +// +// It further determines the subdomain (sub) by checking if the domain is a subdomain of the etld+1. +// +// The domain name (domName) is then converted to ASCII using the idna.ToASCII function. +// +// Finally, it returns a *URL with the extracted values and the URL field set to parsedUrl. +// The IsDomain field is set to the result of the IsDomainName function called with the ASCII domain name. +// The SubName field is set to sub, the Name field is set to domName, and the T. +func FromParsed(parsedURL *url.URL) (*URL, error) { + if parsedURL.Host == "" { + return &URL{URL: parsedURL}, nil + } + dom, port := domainPort(parsedURL.Host) + // etld+1 + etld1, err := publicsuffix.EffectiveTLDPlusOne(dom) + if err != nil { + return nil, fmt.Errorf("failed to extract eTLD+1: %w", err) + } + // convert to domain name, and tld + i := strings.Index(etld1, ".") + domName := etld1[0:i] + tld := etld1[i+1:] + // and subdomain + sub := "" + if rest := strings.TrimSuffix(dom, "."+etld1); rest != dom { + sub = rest + } + asciiDom, err := idna.ToASCII(dom) + if err != nil { + return nil, fmt.Errorf("failed to convert domain to ASCII: %w", err) + } + return &URL{ + SubName: sub, + Name: domName, + TLD: tld, + Port: port, + URL: parsedURL, + IsDomain: IsDomainName(asciiDom), + }, nil +} + +// domainPort extracts the domain and port from the host part of a URL. +// If the host contains a port, it returns the domain without the port and the port as strings. +// If the host does not contain a port, it returns the domain and an empty string for the port. +// If the host is all numeric characters, it returns the host itself and an empty string for the port. +// Note that the net/url package should prevent the string from being all numeric characters. +func domainPort(host string) (string, string) { + for i := len(host) - 1; i >= 0; i-- { + if host[i] == ':' { + return host[:i], host[i+1:] + } else if host[i] < '0' || host[i] > '9' { + return host, "" + } + } + // will only land here if the string is all digits, + // net/url should prevent that from happening + return host, "" +} + +// IsDomainName checks if a string represents a valid domain name. +// +// It follows the rules specified in RFC 1035 and RFC 3696 for domain name validation. +// +// The input string is first processed with the RemoveScheme function to remove any scheme prefix. +// The domain name is then split into labels using the dot separator. +// The function checks that the number of labels is at least 2 and that the total length of the string is between 1 and +// 254 characters. +// +// The function iterates over the characters of the string and performs checks based on the character type. +// Valid characters include letters (a-zA-Z), digits (0-9), underscore (_), and hyphen (-). +// Each label can contain up to 63 characters and the last label cannot end with a hyphen. +// The function also checks that the byte before a dot or a hyphen is not a dot or a hyphen, respectively. +// Non-numeric characters are tracked to ensure the presence of at least one non-numeric character in the domain name. +// +// If any of the checks fail, the function returns false. Otherwise, it returns true. +// +// Example usage: +// s := "mail.google.com" +// isValid := IsDomainName(s). +func IsDomainName(name string) bool { //nolint:cyclop + name = RemoveScheme(name) + // See RFC 1035, RFC 3696. + // Presentation format has dots before every label except the first, and the + // terminal empty label is optional here because we assume fully-qualified + // (absolute) input. We must therefore reserve space for the first and last + // labels' length octets in wire format, where they are necessary and the + // maximum total length is 255. + // So our _effective_ maximum is 253, but 254 is not rejected if the last + // character is a dot. + split := strings.Split(name, ".") + + // Need a TLD and a domain. + if len(split) < 2 { //nolint:gomnd + return false + } + l := len(name) + if l == 0 || l > 254 || l == 254 && name[l-1] != '.' { + return false + } + + last := byte('.') + nonNumeric := false // true once we've seen a letter or hyphen + partlen := 0 + for i := 0; i < len(name); i++ { + char := name[i] + switch { + default: + return false + case 'a' <= char && char <= 'z' || 'A' <= char && char <= 'Z' || char == '_': + nonNumeric = true + partlen++ + case '0' <= char && char <= '9': + // fine + partlen++ + case char == '-': + // Byte before dash cannot be dot. + if last == '.' { + return false + } + partlen++ + nonNumeric = true + case char == '.': + // Byte before dot cannot be dot, dash. + if last == '.' || last == '-' { + return false + } + if partlen > 63 || partlen == 0 { + return false + } + partlen = 0 + } + last = char + } + if last == '-' || partlen > 63 { + return false + } + + return nonNumeric +} + +// RemoveScheme removes the scheme from a URL string. +// If the URL string includes a scheme (e.g., "http://"), the scheme will be removed and the remaining string will be returned. +// If the URL string includes a default scheme (e.g., "//"), the default scheme will be removed and the remaining string will be returned. +// If the URL string does not include a scheme, the original string will be returned unchanged. +func RemoveScheme(s string) string { + if strings.Contains(s, "://") { + return removeScheme(s) + } + if strings.Contains(s, "//") { + return removeDefaultScheme(s) + } + return s +} + +// add default scheme if string does not include a scheme. +func AddDefaultScheme(s string) string { + if !strings.Contains(s, "//") || + (!strings.Contains(s, "//") && !strings.Contains(s, ":") && !strings.Contains(s, "@")) { + return addDefaultScheme(s) + } + return s +} + +func AddScheme(s, scheme string) string { + if scheme == "" { + return AddDefaultScheme(s) + } + if strings.Index(s, "//") == -1 { + return fmt.Sprintf("%s://%s", scheme, s) + } + return s +} + +// addDefaultScheme returns a new string with a default scheme added. +// The default scheme format is "//". +func addDefaultScheme(s string) string { + return fmt.Sprintf("//%s", s) +} + +// removeDefaultScheme removes the default scheme from a string. +func removeDefaultScheme(s string) string { + return s[index(s, "//"):] +} + +func removeScheme(s string) string { + return s[index(s, "://"):] +} + +// index returns the starting index of the first occurrence of the specified scheme in the given string. +// If the scheme is not found, it returns -1. +// The returned index is incremented by the length of the scheme to obtain the starting position of the remaining string. +func index(s, scheme string) int { + return strings.Index(s, scheme) + len(scheme) +} diff --git a/protocol/domain_test.go b/protocol/domain_test.go new file mode 100644 index 0000000..c552c65 --- /dev/null +++ b/protocol/domain_test.go @@ -0,0 +1,45 @@ +package protocol + +import ( + "net/url" + "reflect" + "testing" +) + +type args struct { + s string +} + +type parseTest struct { + name string + args args + want *URL + wantErr bool +} + +func TestParse(t *testing.T) { + t.Parallel() + for _, test := range createParseTests() { + testCopy := test + t.Run(testCopy.name, func(t *testing.T) { + t.Parallel() + got, err := Parse(testCopy.args.s) + if (err != nil) != testCopy.wantErr { + t.Errorf("Parse() error = %v, wantErr %v", err, testCopy.wantErr) + return + } + if !reflect.DeepEqual(got, testCopy.want) { + t.Errorf("Parse() got = %v, want %v", got, testCopy.want) + } + }) + } +} + +func createParseTests() []parseTest { + return []parseTest{ + {name: "1", args: args{s: "http://D1Q78S3J78NIURJFEDQ74BJQCLH6AP35CKN66R3FELI0.9B7NTQSU4PBM2JJQJ0CMGHUENQON4GB28RLGQCH3D3NK2AQVFE70.nostr"}, want: &URL{IsDomain: true, TLD: "nostr", Name: "9b7ntqsu4pbm2jjqj0cmghuenqon4gb28rlgqch3d3nk2aqvfe70", SubName: "d1q78s3j78niurjfedq74bjqclh6ap35ckn66r3feli0", URL: &url.URL{Host: "d1q78s3j78niurjfedq74bjqclh6ap35ckn66r3feli0.9b7ntqsu4pbm2jjqj0cmghuenqon4gb28rlgqch3d3nk2aqvfe70.nostr", Scheme: "http"}}, wantErr: false}, //nolint:lll + {name: "1", args: args{s: "http://d1q78s3j78niurjfedq74bjqclh6ap35ckn66r3feli0.9b7ntqsu4pbm2jjqj0cmghuenqon4gb28rlgqch3d3nk2aqvfe70.nostr"}, want: &URL{IsDomain: true, TLD: "nostr", Name: "9b7ntqsu4pbm2jjqj0cmghuenqon4gb28rlgqch3d3nk2aqvfe70", SubName: "d1q78s3j78niurjfedq74bjqclh6ap35ckn66r3feli0", URL: &url.URL{Host: "d1q78s3j78niurjfedq74bjqclh6ap35ckn66r3feli0.9b7ntqsu4pbm2jjqj0cmghuenqon4gb28rlgqch3d3nk2aqvfe70.nostr", Scheme: "http"}}, wantErr: false}, //nolint:lll + {name: "1", args: args{s: "https://d1q78s3j78niurjfedq74bjqclh6ap35ckn66r3feli0.9b7ntqsu4pbm2jjqj0cmghuenqon4gb28rlgqch3d3nk2aqvfe70.nostr"}, want: &URL{IsDomain: true, TLD: "nostr", Name: "9b7ntqsu4pbm2jjqj0cmghuenqon4gb28rlgqch3d3nk2aqvfe70", SubName: "d1q78s3j78niurjfedq74bjqclh6ap35ckn66r3feli0", URL: &url.URL{Host: "d1q78s3j78niurjfedq74bjqclh6ap35ckn66r3feli0.9b7ntqsu4pbm2jjqj0cmghuenqon4gb28rlgqch3d3nk2aqvfe70.nostr", Scheme: "https"}}, wantErr: false}, //nolint:lll + + } +}