Some checks are pending
CI Pipeline / Run Tests (push) Waiting to run
CI Pipeline / Lint Code (push) Waiting to run
CI Pipeline / Security Scan (push) Waiting to run
CI Pipeline / Build Docker Images (push) Blocked by required conditions
CI Pipeline / E2E Tests (push) Blocked by required conditions
100 lines
3.3 KiB
YAML
100 lines
3.3 KiB
YAML
groups:
|
|
- name: torrent-gateway-alerts
|
|
rules:
|
|
# Service availability alerts
|
|
- alert: GatewayDown
|
|
expr: up{job="torrent-gateway"} == 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Torrent Gateway is down"
|
|
description: "Torrent Gateway has been down for more than 1 minute"
|
|
|
|
# Performance alerts
|
|
- alert: HighRequestLatency
|
|
expr: histogram_quantile(0.95, rate(gateway_request_duration_seconds_bucket[5m])) > 2
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "High request latency detected"
|
|
description: "95th percentile request latency is {{ $value }}s"
|
|
|
|
- alert: HighErrorRate
|
|
expr: rate(gateway_requests_total{status_code=~"5.."}[5m]) / rate(gateway_requests_total[5m]) > 0.1
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "High error rate detected"
|
|
description: "Error rate is {{ $value | humanizePercentage }}"
|
|
|
|
# Storage alerts
|
|
- alert: HighStorageUsage
|
|
expr: gateway_storage_used_bytes > 50 * 1024 * 1024 * 1024 # 50GB
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "High storage usage"
|
|
description: "Storage usage is {{ $value | humanizeBytes }}"
|
|
|
|
- alert: LowDiskSpace
|
|
expr: (node_filesystem_size_bytes - node_filesystem_free_bytes) / node_filesystem_size_bytes > 0.9
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Low disk space"
|
|
description: "Disk usage is {{ $value | humanizePercentage }}"
|
|
|
|
# Cache alerts
|
|
- alert: LowCacheHitRate
|
|
expr: rate(gateway_cache_hits_total[5m]) / (rate(gateway_cache_hits_total[5m]) + rate(gateway_cache_misses_total[5m])) < 0.5
|
|
for: 10m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "Low cache hit rate"
|
|
description: "Cache hit rate is {{ $value | humanizePercentage }}"
|
|
|
|
# Memory alerts
|
|
- alert: HighMemoryUsage
|
|
expr: gateway_memory_usage_bytes > 2 * 1024 * 1024 * 1024 # 2GB
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "High memory usage"
|
|
description: "Memory usage is {{ $value | humanizeBytes }}"
|
|
|
|
# Rate limiting alerts
|
|
- alert: HighRateLimitBlocks
|
|
expr: rate(gateway_rate_limit_blocks_total[5m]) > 10
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "High rate limit blocks"
|
|
description: "Rate limit blocks are {{ $value }}/sec"
|
|
|
|
# Admin alerts
|
|
- alert: SuspiciousAdminActivity
|
|
expr: rate(gateway_admin_actions_total[5m]) > 5
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "High admin activity detected"
|
|
description: "Admin actions rate is {{ $value }}/sec"
|
|
|
|
# Database alerts
|
|
- alert: HighDatabaseErrors
|
|
expr: rate(gateway_database_errors_total[5m]) > 1
|
|
for: 5m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "Database errors detected"
|
|
description: "Database error rate is {{ $value }}/sec" |