groups: - name: torrent-gateway-alerts rules: # Service availability alerts - alert: GatewayDown expr: up{job="torrent-gateway"} == 0 for: 1m labels: severity: critical annotations: summary: "Torrent Gateway is down" description: "Torrent Gateway has been down for more than 1 minute" # Performance alerts - alert: HighRequestLatency expr: histogram_quantile(0.95, rate(gateway_request_duration_seconds_bucket[5m])) > 2 for: 5m labels: severity: warning annotations: summary: "High request latency detected" description: "95th percentile request latency is {{ $value }}s" - alert: HighErrorRate expr: rate(gateway_requests_total{status_code=~"5.."}[5m]) / rate(gateway_requests_total[5m]) > 0.1 for: 5m labels: severity: critical annotations: summary: "High error rate detected" description: "Error rate is {{ $value | humanizePercentage }}" # Storage alerts - alert: HighStorageUsage expr: gateway_storage_used_bytes > 50 * 1024 * 1024 * 1024 # 50GB for: 5m labels: severity: warning annotations: summary: "High storage usage" description: "Storage usage is {{ $value | humanizeBytes }}" - alert: LowDiskSpace expr: (node_filesystem_size_bytes - node_filesystem_free_bytes) / node_filesystem_size_bytes > 0.9 for: 5m labels: severity: critical annotations: summary: "Low disk space" description: "Disk usage is {{ $value | humanizePercentage }}" # Cache alerts - alert: LowCacheHitRate expr: rate(gateway_cache_hits_total[5m]) / (rate(gateway_cache_hits_total[5m]) + rate(gateway_cache_misses_total[5m])) < 0.5 for: 10m labels: severity: warning annotations: summary: "Low cache hit rate" description: "Cache hit rate is {{ $value | humanizePercentage }}" # Memory alerts - alert: HighMemoryUsage expr: gateway_memory_usage_bytes > 2 * 1024 * 1024 * 1024 # 2GB for: 5m labels: severity: warning annotations: summary: "High memory usage" description: "Memory usage is {{ $value | humanizeBytes }}" # Rate limiting alerts - alert: HighRateLimitBlocks expr: rate(gateway_rate_limit_blocks_total[5m]) > 10 for: 5m labels: severity: warning annotations: summary: "High rate limit blocks" description: "Rate limit blocks are {{ $value }}/sec" # Admin alerts - alert: SuspiciousAdminActivity expr: rate(gateway_admin_actions_total[5m]) > 5 for: 2m labels: severity: warning annotations: summary: "High admin activity detected" description: "Admin actions rate is {{ $value }}/sec" # Database alerts - alert: HighDatabaseErrors expr: rate(gateway_database_errors_total[5m]) > 1 for: 5m labels: severity: critical annotations: summary: "Database errors detected" description: "Database error rate is {{ $value }}/sec"