stirfry-spam-filter/kind-aware-content-checker.js
2024-12-20 10:00:55 +00:00

370 lines
12 KiB
JavaScript

#!/usr/bin/env node
class KindAwareContentChecker {
constructor() {
// Define kinds that should bypass content similarity checks
this.standardizedContentKinds = new Set([
0, // Metadata
3, // Follows
4, // Encrypted Direct Messages
5, // Event Deletion
6, // Repost
7, // Reaction
8, // Badge Award
9, // Chat Message
13, // Seal
14, // Direct Message
16, // Generic Repost
17, // Website Reaction
40, // Channel Creation
41, // Channel Metadata
42, // Channel Message
43, // Channel Hide Message
44, // Channel Mute User
1063, // File Metadata
1984, // Reporting
1985, // Label
1986, // Relay reviews
2003, // Torrent
9734, // Zap Request
9735, // Zap
10000, // Mute list
10001, // Pin list
10002, // Relay List Metadata
10003, // Bookmark list
10004, // Communities list
10005, // Public chats list
10006, // Blocked relays list
10007, // Search relays list
10009, // User groups
10013, // Draft relays
10015, // Interests list
10019, // Nutzap Mint Recommendation
10030, // User emoji list
10050, // Relay list for DMs
10063, // User server list
10096, // File storage server list
13194, // Wallet Info
22242, // Client Authentication
23194, // Wallet Request
23195, // Wallet Response
24133, // Nostr Connect
24242, // Blobs on mediaservers
27235, // HTTP Auth
30000, // Follow sets
30001, // Generic lists
30002, // Relay sets
30003, // Bookmark sets
30004, // Curation sets
30005, // Video sets
30007, // Kind mute sets
30008, // Profile Badges
30009, // Badge Definition
30017, // Create/update stall
30018, // Create/update product
30019, // Marketplace UI/UX
30023, // Long-form Content
30024, // Draft Long-form Content
30078, // Application-specific Data
30311, // Live Event
30315, // User Statuses
30402, // Classified Listing
31234, // Draft Event
31922, // Date-Based Calendar Event
31923, // Time-Based Calendar Event
31924, // Calendar
31925, // Calendar Event RSVP
34235, // Video Event
34550, // Community Definition
37375, // Cashu Wallet Event
38383 // Peer-to-peer Order events
]);
// Define kinds that require special content validation rules
this.specialContentRules = new Map([
[0, this.validateMetadata.bind(this)], // Metadata
[3, this.validateFollowList.bind(this)], // Follow Lists
[4, this.validateEncryptedDM.bind(this)], // Encrypted DM
[6, this.validateRepost.bind(this)], // Reposts
[7, this.validateReaction.bind(this)], // Reactions
[14, this.validateDirectMessage.bind(this)], // Direct Message
[16, this.validateRepost.bind(this)], // Generic Reposts
[30023, this.validateLongForm.bind(this)], // Long-form Content
[10002, this.validateRelayList.bind(this)] // Relay Lists
]);
}
// Main check function
checkContent(event, recentEvents) {
// If this kind should bypass content checks entirely
if (this.standardizedContentKinds.has(event.kind)) {
return {
isSpam: false,
reason: null
};
}
// If this kind has special validation rules
if (this.specialContentRules.has(event.kind)) {
return this.specialContentRules.get(event.kind)(event, recentEvents);
}
// For other kinds, apply standard similarity check
return this.checkContentSimilarity(event, recentEvents);
}
// Specialized validator for follow lists (kind 3)
validateFollowList(event) {
// Get all p tags
const pTags = event.tags.filter(tag => tag[0] === 'p');
if (pTags.length === 0) {
return {
isSpam: true,
reason: 'Invalid follow list: missing p tags'
};
}
// Content should be empty per NIP-02
if (event.content !== '') {
return {
isSpam: true,
reason: 'Invalid follow list: content must be empty'
};
}
// Valid key format checking
const invalidKeys = pTags.filter(tag =>
tag.length < 2 ||
!/^[0-9a-f]{64}$/.test(tag[1])
);
if (invalidKeys.length > 0) {
return {
isSpam: true,
reason: 'Invalid follow list: contains invalid pubkeys'
};
}
return { isSpam: false, reason: null };
}
// Specialized validator for reactions (kind 7)
validateReaction(event) {
// Valid reaction contents according to NIP-25
const validReactions = new Set(['+', '-', '', '👍', '❤️', '🤙', '⚡']);
// Check if it's a standard reaction
if (validReactions.has(event.content)) {
return { isSpam: false, reason: null };
}
// Check if it's a valid emoji reaction
if (this.isValidEmojiReaction(event)) {
return { isSpam: false, reason: null };
}
// Check required tags for reactions
if (!this.hasValidReactionTags(event)) {
return {
isSpam: true,
reason: 'Invalid reaction: missing required tags'
};
}
return { isSpam: false, reason: null };
}
// Helper for validating emoji reactions
isValidEmojiReaction(event) {
// Check for custom emoji format (NIP-30)
if (event.content.startsWith(':') && event.content.endsWith(':')) {
return event.tags.some(tag =>
tag[0] === 'emoji' &&
tag[1] === event.content.slice(1, -1)
);
}
// Check if it's a single unicode emoji
const emojiRegex = /^\p{Emoji}$/u;
return emojiRegex.test(event.content.trim());
}
hasValidReactionTags(event) {
const hasEventTag = event.tags.some(tag => tag[0] === 'e');
const hasPubkeyTag = event.tags.some(tag => tag[0] === 'p');
return hasEventTag && hasPubkeyTag;
}
// Specialized validator for reposts (kinds 6 and 16)
validateRepost(event) {
const hasEventTag = event.tags.some(tag => tag[0] === 'e');
if (!hasEventTag) {
return {
isSpam: true,
reason: 'Invalid repost: missing event reference'
};
}
// For kind 6, content should be empty
if (event.kind === 6 && event.content !== '') {
return {
isSpam: true,
reason: 'Invalid repost: kind 6 should have empty content'
};
}
return { isSpam: false, reason: null };
}
// Specialized validator for metadata (kind 0)
validateMetadata(event) {
try {
const metadata = JSON.parse(event.content);
if (typeof metadata !== 'object' || metadata === null) {
return {
isSpam: true,
reason: 'Invalid metadata: not an object'
};
}
return { isSpam: false, reason: null };
} catch (e) {
return {
isSpam: true,
reason: 'Invalid metadata: malformed JSON'
};
}
}
// Standard similarity check for regular content
checkContentSimilarity(event, recentEvents) {
const similarEvents = recentEvents.filter(e =>
e.pubkey !== event.pubkey &&
e.kind === event.kind &&
this.calculateSimilarity(e.content, event.content) > 0.8
);
if (similarEvents.length > 0) {
return {
isSpam: true,
reason: `Content too similar to ${similarEvents.length} recent events`
};
}
return { isSpam: false, reason: null };
}
// Levenshtein distance for content similarity
calculateSimilarity(str1, str2) {
if (str1 === str2) return 1.0;
const len1 = str1.length;
const len2 = str2.length;
const maxLen = Math.max(len1, len2);
if (maxLen === 0) return 1.0;
const distance = this.levenshteinDistance(str1, str2);
return (maxLen - distance) / maxLen;
}
levenshteinDistance(str1, str2) {
const m = str1.length;
const n = str2.length;
const dp = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
for (let i = 0; i <= m; i++) dp[i][0] = i;
for (let j = 0; j <= n; j++) dp[0][j] = j;
for (let i = 1; i <= m; i++) {
for (let j = 1; j <= n; j++) {
if (str1[i - 1] === str2[j - 1]) {
dp[i][j] = dp[i - 1][j - 1];
} else {
dp[i][j] = Math.min(
dp[i - 1][j - 1],
dp[i - 1][j],
dp[i][j - 1]
) + 1;
}
}
}
return dp[m][n];
}
// Other specialized validators...
validateEncryptedDM(event) {
const hasPubkeyTag = event.tags.some(tag => tag[0] === 'p');
if (!hasPubkeyTag) {
return {
isSpam: true,
reason: 'Invalid encrypted DM: missing pubkey tag'
};
}
if (!event.content || event.content.length < 100) {
return {
isSpam: true,
reason: 'Invalid encrypted DM: content too short'
};
}
return { isSpam: false, reason: null };
}
validateDirectMessage(event) {
const hasPubkeyTag = event.tags.some(tag => tag[0] === 'p');
if (!hasPubkeyTag) {
return {
isSpam: true,
reason: 'Invalid DM: missing pubkey tag'
};
}
return { isSpam: false, reason: null };
}
validateLongForm(event) {
try {
const hasTitle = event.tags.some(tag => tag[0] === 'title');
const hasPublishedAt = event.tags.some(tag => tag[0] === 'published_at');
if (!hasTitle || !hasPublishedAt) {
return {
isSpam: true,
reason: 'Invalid long-form content: missing required tags'
};
}
return { isSpam: false, reason: null };
} catch (e) {
return {
isSpam: true,
reason: 'Invalid long-form content: malformed'
};
}
}
validateRelayList(event) {
try {
const relays = JSON.parse(event.content);
if (typeof relays !== 'object' || relays === null) {
return {
isSpam: true,
reason: 'Invalid relay list: not an object'
};
}
return { isSpam: false, reason: null };
} catch (e) {
return {
isSpam: true,
reason: 'Invalid relay list: malformed JSON'
};
}
}
}
module.exports = KindAwareContentChecker;