From bd6d3251960f67bc30d1674115cd847c0799e67e Mon Sep 17 00:00:00 2001 From: fiatjaf Date: Wed, 10 May 2023 21:17:48 -0300 Subject: [PATCH 1/2] initial nson nip draft. --- 93.md | 184 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 184 insertions(+) create mode 100644 93.md diff --git a/93.md b/93.md new file mode 100644 index 00000000..121ed5e2 --- /dev/null +++ b/93.md @@ -0,0 +1,184 @@ +NIP-93 +====== + +NSON +---- + +`draft` `optional` `author:fiatjaf` + +### Preamble + +Some [benchmarks](https://github.com/fiatjaf/nostr-json-benchmarks/tree/2f254fff91b3ad063ef9726bb4a3d25316cf12d8) made using all libraries available on Golang show that JSON decoding is very slow. And even when people do assembly-level optimizations things only improve up to a point (e.g. for decoding a Nostr event, the decoding time is 50% smaller). + +Meanwhile, doing a simple TLV encoding reduces the decoding time to 35% and a simpler static binary format for Nostr events reduces makes that number drop to 4%. However, it would be bad for Nostr if a binary encoding was introduced, as it would be likely to cause compatibility issues, centralize the protocol and/or increase the work for everybody, more about this at [this comment](https://github.com/nostr-protocol/nips/pull/512#issuecomment-1542368664). + +### The actual NIP + +NSON is a crazy idea that, according to the benchmarks above, reduces the decoding time to 14% of that of the standard library. It works by having the JSON sender encode the event _as JSON_, but in a specific, very strict, order of fields (taking advantage of the fact that Nostr events have static fields, static lengths for some fields, and an overall rigid structure) and include in the JSON object a new field called `"nson"` that contains metadata the JSON receiver can read to help in the decoding process. + +Here's an example of a NSON-encoded Nostr event: + +`{"id":"57ff66490a6a2af3992accc26ae95f3f60c6e5f84ed0ddf6f59c534d3920d3d2","pubkey":"79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798","sig":"504d142aed7fa7e0f6dab5bcd7eed63963b0277a8e11bbcb03b94531beb4b95a12f1438668b02746bd5362161bc782068e6b71494060975414e793f9e19f57ea","created_at":1683762317,"nson":"2801000b0203000100400005040001004000000014","kind":1,"content":"hello world","tags":[["e","b6de44a9dd47d1c000f795ea0453046914f44ba7d5e369608b04867a575ea83e","reply"],["p","c26f7b252cea77a5b94f42b1a4771021be07d4df766407e47738605f7e3ab774","","wss://relay.damus.io"]]}` + +The idea is that `"id"` comes first, so it can be accessed by reading a slice of the string from character `7` to character `71`, `pubkey` from character `83` to `147` and so on. `"content"`, `"kind"` and `"tags"` have dynamic sizes, so these are given by the values inside the `"nson"` field (which is also dynamic, its size by its first byte). + +### Anatomy of the `"nson"` field + +It is hex-encoded. Some fields are a single byte, others are two bytes (4 characters). + +Each explanation starts at the same line as the field it is referring to. + + number of tags (let's say it's two) + number of items on the first tag (let's say it's three) + number of chars on the first item + number of chars on the second item + number of chars on the third item + number of items on the second tag (let's say it's two) + number of chars on the first item + number of chars on the second item + "nson":"xxkkccccttnn111122223333nn11112222" + nson size + kind chars + content chars + +### Reference implementation + +Beware, all Rust maniacs, the following reference implementation is written in Go: + +```go +func decodeNson(data string) *Event { + evt := &Event{} + + // static fields + evt.ID = data[7 : 7+64] + evt.PubKey = data[83 : 83+64] + evt.Sig = data[156 : 156+128] + ts, _ := strconv.ParseInt(data[299:299+10], 10, 64) + evt.CreatedAt = Timestamp(ts) + + // nson values + nsonSizeBytes, _ := hex.DecodeString(data[318 : 318+2]) + nsonSize := int(nsonSizeBytes[0]) + nsonDescriptors, _ := hex.DecodeString(data[320 : 320+nsonSize]) + + // dynamic fields + // kind + kindChars := int(nsonDescriptors[0]) + kindStart := 320 + nsonSize + 9 // len(`","kind":`) + evt.Kind, _ = strconv.Atoi(data[kindStart : kindStart+kindChars]) + + // content + contentChars := int(binary.BigEndian.Uint16(nsonDescriptors[1:3])) + contentStart := kindStart + kindChars + 12 // len(`,"content":"`) + evt.Content, _ = strconv.Unquote(`"` + data[contentStart:contentStart+contentChars] + `"`) + + // tags + nTags := int(nsonDescriptors[3]) + evt.Tags = make(Tags, nTags) + tagsStart := contentStart + contentChars + 9 // len(`","tags":`) + + nsonIndex := 3 + tagsIndex := tagsStart + for t := 0; t < nTags; t++ { + nsonIndex++ + tagsIndex += 1 // len(`[`) or len(`,`) + nItems := int(nsonDescriptors[nsonIndex]) + tag := make(Tag, nItems) + for n := 0; n < nItems; n++ { + nsonIndex++ + itemStart := tagsIndex + 2 // len(`["`) or len(`,"`) + itemChars := int(binary.BigEndian.Uint16(nsonDescriptors[nsonIndex:])) + nsonIndex++ + tag[n], _ = strconv.Unquote(`"` + data[itemStart:itemStart+itemChars] + `"`) + tagsIndex = itemStart + itemChars + 1 // len(`"`) + } + tagsIndex += 1 // len(`]`) + evt.Tags[t] = tag + } + + return evt +} + +func encodeNson(evt *Event) string { + // start building the nson descriptors (without the first byte that represents the nson size) + nsonBuf := make([]byte, 256) + + // build the tags + nTags := len(evt.Tags) + nsonBuf[3] = uint8(nTags) + nsonIndex := 3 // start here + + tagBuilder := strings.Builder{} + tagBuilder.Grow(1000) // a guess + tagBuilder.WriteString(`[`) + for t, tag := range evt.Tags { + nItems := len(tag) + nsonIndex++ + nsonBuf[nsonIndex] = uint8(nItems) + + tagBuilder.WriteString(`[`) + for i, item := range tag { + v := strconv.Quote(item) + nsonIndex++ + binary.BigEndian.PutUint16(nsonBuf[nsonIndex:], uint16(len(v)-2)) + nsonIndex++ + tagBuilder.WriteString(v) + if nItems > i+1 { + tagBuilder.WriteString(`,`) + } + } + tagBuilder.WriteString(`]`) + if nTags > t+1 { + tagBuilder.WriteString(`,`) + } + } + tagBuilder.WriteString(`]}`) + nsonBuf = nsonBuf[0 : nsonIndex+1] + + kind := strconv.Itoa(evt.Kind) + kindChars := len(kind) + nsonBuf[0] = uint8(kindChars) + + content := strconv.Quote(evt.Content) + contentChars := len(content) - 2 + binary.BigEndian.PutUint16(nsonBuf[1:3], uint16(contentChars)) + + // actually build the json + base := strings.Builder{} + base.Grow(320 + // everything up to "nson": + 2 + len(nsonBuf)*2 + // nson + 9 + kindChars + // kind and its label + 12 + contentChars + // content and its label + 9 + tagBuilder.Len() + // tags and its label + 2, // the end + ) + base.WriteString(`{"id":"` + evt.ID + `","pubkey":"` + evt.PubKey + `","sig":"` + evt.Sig + `","created_at":` + strconv.FormatInt(int64(evt.CreatedAt), 10) + `,"nson":"`) + base.WriteString(hex.EncodeToString([]byte{uint8(len(nsonBuf) * 2)})) // nson size + base.WriteString(hex.EncodeToString(nsonBuf)) // nson descriptors + base.WriteString(`","kind":` + kind + `,"content":` + content + `,"tags":`) + base.WriteString(tagBuilder.String() /* includes the end */) + + return base.String() +} +``` + +### Other restrictions + +Besides the field ordering and the presence of the `"nson"` field, other restrictions must be applied: + +- the `"created_at"` field must have 10, characters, which gives us a range of dates from about 20 years ago up to 250 years in the future. +- to simplify decoding of `"content"` and `"tags"` strings, escape codes like `\uXXXX` are forbidden in NSON, UTF-8 must be used instead. Only `\n`, `\\` and `\"` are the only valid escaped sequences. + +### Backwards-compatibility + +Any reader who is not aware of the NSON-encoding can receive these events and decode them using whatever other JSON decoder they happen to have in hand. The `"nson"` field will just be ignored and life will continue as normal. + +Also, other event fields that may be present (for example, the NIP-03 `"ots"` field) can be added at the end, after `"tags"`, with no loss. + +### Other points worth mentioning + +- Relays can receive non-nsonified events from clients, then reformat and store them nsonified so they can serve future clients better by sending them NSON always. + +### Open questions to be edited out of the NIP + +- How to signal NSON support? I thought it would work to have an initial field `"n":1` (before `"id"`) on the JSON, which could be read very fast, but I don't know. From 44ea6d84583b64f6d7d994bfb480a4d786ba1699 Mon Sep 17 00:00:00 2001 From: fiatjaf Date: Thu, 11 May 2023 06:00:04 -0300 Subject: [PATCH 2/2] fix some smallities. --- 93.md | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/93.md b/93.md index 121ed5e2..e111c5f9 100644 --- a/93.md +++ b/93.md @@ -8,9 +8,9 @@ NSON ### Preamble -Some [benchmarks](https://github.com/fiatjaf/nostr-json-benchmarks/tree/2f254fff91b3ad063ef9726bb4a3d25316cf12d8) made using all libraries available on Golang show that JSON decoding is very slow. And even when people do assembly-level optimizations things only improve up to a point (e.g. for decoding a Nostr event, the decoding time is 50% smaller). +Some [benchmarks](https://github.com/fiatjaf/nostr-json-benchmarks/tree/2f254fff91b3ad063ef9726bb4a3d25316cf12d8) made using all libraries available on Golang show that JSON decoding is very slow. And even when people do assembly-level optimizations things only improve up to a point (e.g. for decoding a Nostr event, the "Sonic" library uses about 50% of that of the standard library). -Meanwhile, doing a simple TLV encoding reduces the decoding time to 35% and a simpler static binary format for Nostr events reduces makes that number drop to 4%. However, it would be bad for Nostr if a binary encoding was introduced, as it would be likely to cause compatibility issues, centralize the protocol and/or increase the work for everybody, more about this at [this comment](https://github.com/nostr-protocol/nips/pull/512#issuecomment-1542368664). +Meanwhile, doing a simple TLV encoding reduces the decoding time to 35% and a simpler static binary format for Nostr events makes that number drop to 4%. However, it would be bad for Nostr if a binary encoding was introduced, as it would be likely to cause compatibility issues, centralize the protocol and/or increase the work for everybody, more about this in [this comment](https://github.com/nostr-protocol/nips/pull/512#issuecomment-1542368664). ### The actual NIP @@ -20,31 +20,27 @@ Here's an example of a NSON-encoded Nostr event: `{"id":"57ff66490a6a2af3992accc26ae95f3f60c6e5f84ed0ddf6f59c534d3920d3d2","pubkey":"79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798","sig":"504d142aed7fa7e0f6dab5bcd7eed63963b0277a8e11bbcb03b94531beb4b95a12f1438668b02746bd5362161bc782068e6b71494060975414e793f9e19f57ea","created_at":1683762317,"nson":"2801000b0203000100400005040001004000000014","kind":1,"content":"hello world","tags":[["e","b6de44a9dd47d1c000f795ea0453046914f44ba7d5e369608b04867a575ea83e","reply"],["p","c26f7b252cea77a5b94f42b1a4771021be07d4df766407e47738605f7e3ab774","","wss://relay.damus.io"]]}` -The idea is that `"id"` comes first, so it can be accessed by reading a slice of the string from character `7` to character `71`, `pubkey` from character `83` to `147` and so on. `"content"`, `"kind"` and `"tags"` have dynamic sizes, so these are given by the values inside the `"nson"` field (which is also dynamic, its size by its first byte). +The idea is that `"id"` comes first, so it can be accessed by reading a slice of the string from character `7` to character `71`, `pubkey` from character `83` to `147` and so on. `"content"`, `"kind"` and `"tags"` have dynamic sizes, so their sizes are given by the values inside the `"nson"` field (which is also dynamic, its size given by its first byte). ### Anatomy of the `"nson"` field -It is hex-encoded. Some fields are a single byte, others are two bytes (4 characters). +It is hex-encoded. Some fields are a single byte, others are two bytes (4 characters), big-endian. -Each explanation starts at the same line as the field it is referring to. - - number of tags (let's say it's two) - number of items on the first tag (let's say it's three) - number of chars on the first item - number of chars on the second item - number of chars on the third item - number of items on the second tag (let's say it's two) - number of chars on the first item - number of chars on the second item + tt: number of tags (let's say it's two) + nn: number of items on the first tag (let's say it's 3) + 1111: number of chars on the first item + 2222: number of chars on the second item + 3333: number of chars on the third item + nn: number of items on the second tag (let's say it's 2) + 1111: number of chars on the first item + 2222: number of chars on the second item "nson":"xxkkccccttnn111122223333nn11112222" - nson size - kind chars - content chars + xx: nson size + kk: kind chars + cccc: content chars ### Reference implementation -Beware, all Rust maniacs, the following reference implementation is written in Go: - ```go func decodeNson(data string) *Event { evt := &Event{} @@ -167,13 +163,13 @@ func encodeNson(evt *Event) string { Besides the field ordering and the presence of the `"nson"` field, other restrictions must be applied: - the `"created_at"` field must have 10, characters, which gives us a range of dates from about 20 years ago up to 250 years in the future. -- to simplify decoding of `"content"` and `"tags"` strings, escape codes like `\uXXXX` are forbidden in NSON, UTF-8 must be used instead. Only `\n`, `\\` and `\"` are the only valid escaped sequences. +- to simplify decoding of `"content"` and `"tags"` strings, escape codes like `\uXXXX` are forbidden in NSON, UTF-8 must be used instead. `\n`, `\\` and `\"` are the only valid escaped sequences. ### Backwards-compatibility -Any reader who is not aware of the NSON-encoding can receive these events and decode them using whatever other JSON decoder they happen to have in hand. The `"nson"` field will just be ignored and life will continue as normal. +Any reader who is not aware of the NSON-encoding can receive these events and decode them using whatever means they want. The `"nson"` field will just be ignored and life will continue as normal. -Also, other event fields that may be present (for example, the NIP-03 `"ots"` field) can be added at the end, after `"tags"`, with no loss. +Also, other event fields that may be present (for example, the NIP-03 `"ots"` field) can be added at the end, after `"tags"`, with no loss to anyone. ### Other points worth mentioning