From 09083850081688049d1eb77eea115df87139c0c3 Mon Sep 17 00:00:00 2001 From: Enki Date: Fri, 20 Dec 2024 10:00:55 +0000 Subject: [PATCH] add kind checker --- kind-aware-content-checker.js | 370 ++++++++++++++++++++++++++++++++++ 1 file changed, 370 insertions(+) create mode 100644 kind-aware-content-checker.js diff --git a/kind-aware-content-checker.js b/kind-aware-content-checker.js new file mode 100644 index 0000000..cbcee08 --- /dev/null +++ b/kind-aware-content-checker.js @@ -0,0 +1,370 @@ +#!/usr/bin/env node + +class KindAwareContentChecker { + constructor() { + // Define kinds that should bypass content similarity checks + this.standardizedContentKinds = new Set([ + 0, // Metadata + 3, // Follows + 4, // Encrypted Direct Messages + 5, // Event Deletion + 6, // Repost + 7, // Reaction + 8, // Badge Award + 9, // Chat Message + 13, // Seal + 14, // Direct Message + 16, // Generic Repost + 17, // Website Reaction + 40, // Channel Creation + 41, // Channel Metadata + 42, // Channel Message + 43, // Channel Hide Message + 44, // Channel Mute User + 1063, // File Metadata + 1984, // Reporting + 1985, // Label + 1986, // Relay reviews + 2003, // Torrent + 9734, // Zap Request + 9735, // Zap + 10000, // Mute list + 10001, // Pin list + 10002, // Relay List Metadata + 10003, // Bookmark list + 10004, // Communities list + 10005, // Public chats list + 10006, // Blocked relays list + 10007, // Search relays list + 10009, // User groups + 10013, // Draft relays + 10015, // Interests list + 10019, // Nutzap Mint Recommendation + 10030, // User emoji list + 10050, // Relay list for DMs + 10063, // User server list + 10096, // File storage server list + 13194, // Wallet Info + 22242, // Client Authentication + 23194, // Wallet Request + 23195, // Wallet Response + 24133, // Nostr Connect + 24242, // Blobs on mediaservers + 27235, // HTTP Auth + 30000, // Follow sets + 30001, // Generic lists + 30002, // Relay sets + 30003, // Bookmark sets + 30004, // Curation sets + 30005, // Video sets + 30007, // Kind mute sets + 30008, // Profile Badges + 30009, // Badge Definition + 30017, // Create/update stall + 30018, // Create/update product + 30019, // Marketplace UI/UX + 30023, // Long-form Content + 30024, // Draft Long-form Content + 30078, // Application-specific Data + 30311, // Live Event + 30315, // User Statuses + 30402, // Classified Listing + 31234, // Draft Event + 31922, // Date-Based Calendar Event + 31923, // Time-Based Calendar Event + 31924, // Calendar + 31925, // Calendar Event RSVP + 34235, // Video Event + 34550, // Community Definition + 37375, // Cashu Wallet Event + 38383 // Peer-to-peer Order events + ]); + + // Define kinds that require special content validation rules + this.specialContentRules = new Map([ + [0, this.validateMetadata.bind(this)], // Metadata + [3, this.validateFollowList.bind(this)], // Follow Lists + [4, this.validateEncryptedDM.bind(this)], // Encrypted DM + [6, this.validateRepost.bind(this)], // Reposts + [7, this.validateReaction.bind(this)], // Reactions + [14, this.validateDirectMessage.bind(this)], // Direct Message + [16, this.validateRepost.bind(this)], // Generic Reposts + [30023, this.validateLongForm.bind(this)], // Long-form Content + [10002, this.validateRelayList.bind(this)] // Relay Lists + ]); + } + + // Main check function + checkContent(event, recentEvents) { + // If this kind should bypass content checks entirely + if (this.standardizedContentKinds.has(event.kind)) { + return { + isSpam: false, + reason: null + }; + } + + // If this kind has special validation rules + if (this.specialContentRules.has(event.kind)) { + return this.specialContentRules.get(event.kind)(event, recentEvents); + } + + // For other kinds, apply standard similarity check + return this.checkContentSimilarity(event, recentEvents); + } + + // Specialized validator for follow lists (kind 3) + validateFollowList(event) { + // Get all p tags + const pTags = event.tags.filter(tag => tag[0] === 'p'); + + if (pTags.length === 0) { + return { + isSpam: true, + reason: 'Invalid follow list: missing p tags' + }; + } + + // Content should be empty per NIP-02 + if (event.content !== '') { + return { + isSpam: true, + reason: 'Invalid follow list: content must be empty' + }; + } + + // Valid key format checking + const invalidKeys = pTags.filter(tag => + tag.length < 2 || + !/^[0-9a-f]{64}$/.test(tag[1]) + ); + + if (invalidKeys.length > 0) { + return { + isSpam: true, + reason: 'Invalid follow list: contains invalid pubkeys' + }; + } + + return { isSpam: false, reason: null }; + } + + // Specialized validator for reactions (kind 7) + validateReaction(event) { + // Valid reaction contents according to NIP-25 + const validReactions = new Set(['+', '-', '', '👍', '❤️', '🤙', '⚡']); + + // Check if it's a standard reaction + if (validReactions.has(event.content)) { + return { isSpam: false, reason: null }; + } + + // Check if it's a valid emoji reaction + if (this.isValidEmojiReaction(event)) { + return { isSpam: false, reason: null }; + } + + // Check required tags for reactions + if (!this.hasValidReactionTags(event)) { + return { + isSpam: true, + reason: 'Invalid reaction: missing required tags' + }; + } + + return { isSpam: false, reason: null }; + } + + // Helper for validating emoji reactions + isValidEmojiReaction(event) { + // Check for custom emoji format (NIP-30) + if (event.content.startsWith(':') && event.content.endsWith(':')) { + return event.tags.some(tag => + tag[0] === 'emoji' && + tag[1] === event.content.slice(1, -1) + ); + } + + // Check if it's a single unicode emoji + const emojiRegex = /^\p{Emoji}$/u; + return emojiRegex.test(event.content.trim()); + } + + hasValidReactionTags(event) { + const hasEventTag = event.tags.some(tag => tag[0] === 'e'); + const hasPubkeyTag = event.tags.some(tag => tag[0] === 'p'); + return hasEventTag && hasPubkeyTag; + } + + // Specialized validator for reposts (kinds 6 and 16) + validateRepost(event) { + const hasEventTag = event.tags.some(tag => tag[0] === 'e'); + if (!hasEventTag) { + return { + isSpam: true, + reason: 'Invalid repost: missing event reference' + }; + } + + // For kind 6, content should be empty + if (event.kind === 6 && event.content !== '') { + return { + isSpam: true, + reason: 'Invalid repost: kind 6 should have empty content' + }; + } + + return { isSpam: false, reason: null }; + } + + // Specialized validator for metadata (kind 0) + validateMetadata(event) { + try { + const metadata = JSON.parse(event.content); + + if (typeof metadata !== 'object' || metadata === null) { + return { + isSpam: true, + reason: 'Invalid metadata: not an object' + }; + } + + return { isSpam: false, reason: null }; + } catch (e) { + return { + isSpam: true, + reason: 'Invalid metadata: malformed JSON' + }; + } + } + + // Standard similarity check for regular content + checkContentSimilarity(event, recentEvents) { + const similarEvents = recentEvents.filter(e => + e.pubkey !== event.pubkey && + e.kind === event.kind && + this.calculateSimilarity(e.content, event.content) > 0.8 + ); + + if (similarEvents.length > 0) { + return { + isSpam: true, + reason: `Content too similar to ${similarEvents.length} recent events` + }; + } + + return { isSpam: false, reason: null }; + } + + // Levenshtein distance for content similarity + calculateSimilarity(str1, str2) { + if (str1 === str2) return 1.0; + + const len1 = str1.length; + const len2 = str2.length; + const maxLen = Math.max(len1, len2); + + if (maxLen === 0) return 1.0; + + const distance = this.levenshteinDistance(str1, str2); + return (maxLen - distance) / maxLen; + } + + levenshteinDistance(str1, str2) { + const m = str1.length; + const n = str2.length; + const dp = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0)); + + for (let i = 0; i <= m; i++) dp[i][0] = i; + for (let j = 0; j <= n; j++) dp[0][j] = j; + + for (let i = 1; i <= m; i++) { + for (let j = 1; j <= n; j++) { + if (str1[i - 1] === str2[j - 1]) { + dp[i][j] = dp[i - 1][j - 1]; + } else { + dp[i][j] = Math.min( + dp[i - 1][j - 1], + dp[i - 1][j], + dp[i][j - 1] + ) + 1; + } + } + } + + return dp[m][n]; + } + + // Other specialized validators... + validateEncryptedDM(event) { + const hasPubkeyTag = event.tags.some(tag => tag[0] === 'p'); + if (!hasPubkeyTag) { + return { + isSpam: true, + reason: 'Invalid encrypted DM: missing pubkey tag' + }; + } + + if (!event.content || event.content.length < 100) { + return { + isSpam: true, + reason: 'Invalid encrypted DM: content too short' + }; + } + + return { isSpam: false, reason: null }; + } + + validateDirectMessage(event) { + const hasPubkeyTag = event.tags.some(tag => tag[0] === 'p'); + if (!hasPubkeyTag) { + return { + isSpam: true, + reason: 'Invalid DM: missing pubkey tag' + }; + } + + return { isSpam: false, reason: null }; + } + + validateLongForm(event) { + try { + const hasTitle = event.tags.some(tag => tag[0] === 'title'); + const hasPublishedAt = event.tags.some(tag => tag[0] === 'published_at'); + + if (!hasTitle || !hasPublishedAt) { + return { + isSpam: true, + reason: 'Invalid long-form content: missing required tags' + }; + } + + return { isSpam: false, reason: null }; + } catch (e) { + return { + isSpam: true, + reason: 'Invalid long-form content: malformed' + }; + } + } + + validateRelayList(event) { + try { + const relays = JSON.parse(event.content); + if (typeof relays !== 'object' || relays === null) { + return { + isSpam: true, + reason: 'Invalid relay list: not an object' + }; + } + return { isSpam: false, reason: null }; + } catch (e) { + return { + isSpam: true, + reason: 'Invalid relay list: malformed JSON' + }; + } + } +} + +module.exports = KindAwareContentChecker; \ No newline at end of file