Some checks are pending
CI Pipeline / Run Tests (push) Waiting to run
CI Pipeline / Lint Code (push) Waiting to run
CI Pipeline / Security Scan (push) Waiting to run
CI Pipeline / Build Docker Images (push) Blocked by required conditions
CI Pipeline / E2E Tests (push) Blocked by required conditions
519 lines
14 KiB
Go
519 lines
14 KiB
Go
package metrics
|
|
|
|
import (
|
|
"fmt"
|
|
"log"
|
|
"net/http"
|
|
"time"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
|
)
|
|
|
|
// Metrics holds all Prometheus metrics for the gateway
|
|
type Metrics struct {
|
|
// Request metrics
|
|
RequestsTotal *prometheus.CounterVec
|
|
RequestDuration *prometheus.HistogramVec
|
|
ActiveConnections prometheus.Gauge
|
|
|
|
// Upload metrics
|
|
UploadsTotal *prometheus.CounterVec
|
|
UploadSize *prometheus.HistogramVec
|
|
UploadDuration *prometheus.HistogramVec
|
|
|
|
// Download metrics
|
|
DownloadsTotal *prometheus.CounterVec
|
|
DownloadSize *prometheus.HistogramVec
|
|
DownloadDuration *prometheus.HistogramVec
|
|
|
|
// Stream metrics
|
|
StreamsActive prometheus.Gauge
|
|
StreamsTotal *prometheus.CounterVec
|
|
StreamDuration *prometheus.HistogramVec
|
|
|
|
// Storage metrics
|
|
StorageUsed prometheus.Gauge
|
|
FilesStored prometheus.Gauge
|
|
ChunksStored prometheus.Gauge
|
|
BlobsStored prometheus.Gauge
|
|
|
|
// Cache metrics
|
|
CacheHits *prometheus.CounterVec
|
|
CacheMisses *prometheus.CounterVec
|
|
CacheSize *prometheus.GaugeVec
|
|
CacheMemoryUsage *prometheus.GaugeVec
|
|
|
|
// Rate limiting metrics
|
|
RateLimitHits *prometheus.CounterVec
|
|
RateLimitBlocks *prometheus.CounterVec
|
|
|
|
// Admin metrics
|
|
AdminActions *prometheus.CounterVec
|
|
BannedUsers prometheus.Gauge
|
|
ContentReports *prometheus.CounterVec
|
|
|
|
// System metrics
|
|
DatabaseQueries *prometheus.CounterVec
|
|
DatabaseErrors *prometheus.CounterVec
|
|
GoroutineCount prometheus.Gauge
|
|
MemoryUsage prometheus.Gauge
|
|
|
|
// Blossom pool metrics
|
|
BlossomPoolServers *prometheus.GaugeVec
|
|
BlossomPoolRequests *prometheus.CounterVec
|
|
BlossomPoolErrors *prometheus.CounterVec
|
|
}
|
|
|
|
// NewMetrics creates and registers all Prometheus metrics
|
|
func NewMetrics() *Metrics {
|
|
m := &Metrics{
|
|
// Request metrics
|
|
RequestsTotal: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "gateway_requests_total",
|
|
Help: "Total number of HTTP requests",
|
|
},
|
|
[]string{"method", "endpoint", "status_code"},
|
|
),
|
|
RequestDuration: prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "gateway_request_duration_seconds",
|
|
Help: "HTTP request duration in seconds",
|
|
Buckets: prometheus.DefBuckets,
|
|
},
|
|
[]string{"method", "endpoint"},
|
|
),
|
|
ActiveConnections: prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "gateway_active_connections",
|
|
Help: "Number of active HTTP connections",
|
|
},
|
|
),
|
|
|
|
// Upload metrics
|
|
UploadsTotal: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "gateway_uploads_total",
|
|
Help: "Total number of file uploads",
|
|
},
|
|
[]string{"storage_type", "status"},
|
|
),
|
|
UploadSize: prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "gateway_upload_size_bytes",
|
|
Help: "Upload file size in bytes",
|
|
Buckets: []float64{1024, 10240, 102400, 1048576, 10485760, 104857600, 1073741824}, // 1KB to 1GB
|
|
},
|
|
[]string{"storage_type"},
|
|
),
|
|
UploadDuration: prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "gateway_upload_duration_seconds",
|
|
Help: "Upload duration in seconds",
|
|
Buckets: []float64{0.1, 0.5, 1, 5, 10, 30, 60, 300}, // 100ms to 5min
|
|
},
|
|
[]string{"storage_type"},
|
|
),
|
|
|
|
// Download metrics
|
|
DownloadsTotal: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "gateway_downloads_total",
|
|
Help: "Total number of file downloads",
|
|
},
|
|
[]string{"storage_type", "status"},
|
|
),
|
|
DownloadSize: prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "gateway_download_size_bytes",
|
|
Help: "Download file size in bytes",
|
|
Buckets: []float64{1024, 10240, 102400, 1048576, 10485760, 104857600, 1073741824},
|
|
},
|
|
[]string{"storage_type"},
|
|
),
|
|
DownloadDuration: prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "gateway_download_duration_seconds",
|
|
Help: "Download duration in seconds",
|
|
Buckets: []float64{0.1, 0.5, 1, 5, 10, 30, 60, 300},
|
|
},
|
|
[]string{"storage_type"},
|
|
),
|
|
|
|
// Stream metrics
|
|
StreamsActive: prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "gateway_streams_active",
|
|
Help: "Number of active streams",
|
|
},
|
|
),
|
|
StreamsTotal: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "gateway_streams_total",
|
|
Help: "Total number of streams started",
|
|
},
|
|
[]string{"file_type", "status"},
|
|
),
|
|
StreamDuration: prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Name: "gateway_stream_duration_seconds",
|
|
Help: "Stream duration in seconds",
|
|
Buckets: []float64{1, 10, 60, 300, 1800, 3600}, // 1s to 1h
|
|
},
|
|
[]string{"file_type"},
|
|
),
|
|
|
|
// Storage metrics
|
|
StorageUsed: prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "gateway_storage_used_bytes",
|
|
Help: "Total storage used in bytes",
|
|
},
|
|
),
|
|
FilesStored: prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "gateway_files_stored_total",
|
|
Help: "Total number of files stored",
|
|
},
|
|
),
|
|
ChunksStored: prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "gateway_chunks_stored_total",
|
|
Help: "Total number of chunks stored",
|
|
},
|
|
),
|
|
BlobsStored: prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "gateway_blobs_stored_total",
|
|
Help: "Total number of blobs stored",
|
|
},
|
|
),
|
|
|
|
// Cache metrics
|
|
CacheHits: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "gateway_cache_hits_total",
|
|
Help: "Total number of cache hits",
|
|
},
|
|
[]string{"cache_type"},
|
|
),
|
|
CacheMisses: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "gateway_cache_misses_total",
|
|
Help: "Total number of cache misses",
|
|
},
|
|
[]string{"cache_type"},
|
|
),
|
|
CacheSize: prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "gateway_cache_size_items",
|
|
Help: "Number of items in cache",
|
|
},
|
|
[]string{"cache_type"},
|
|
),
|
|
CacheMemoryUsage: prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "gateway_cache_memory_bytes",
|
|
Help: "Memory usage of cache in bytes",
|
|
},
|
|
[]string{"cache_type"},
|
|
),
|
|
|
|
// Rate limiting metrics
|
|
RateLimitHits: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "gateway_rate_limit_hits_total",
|
|
Help: "Total number of rate limit hits",
|
|
},
|
|
[]string{"limit_type"},
|
|
),
|
|
RateLimitBlocks: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "gateway_rate_limit_blocks_total",
|
|
Help: "Total number of rate limit blocks",
|
|
},
|
|
[]string{"limit_type"},
|
|
),
|
|
|
|
// Admin metrics
|
|
AdminActions: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "gateway_admin_actions_total",
|
|
Help: "Total number of admin actions",
|
|
},
|
|
[]string{"action_type", "admin_pubkey"},
|
|
),
|
|
BannedUsers: prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "gateway_banned_users_total",
|
|
Help: "Total number of banned users",
|
|
},
|
|
),
|
|
ContentReports: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "gateway_content_reports_total",
|
|
Help: "Total number of content reports",
|
|
},
|
|
[]string{"status"},
|
|
),
|
|
|
|
// System metrics
|
|
DatabaseQueries: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "gateway_database_queries_total",
|
|
Help: "Total number of database queries",
|
|
},
|
|
[]string{"operation", "table"},
|
|
),
|
|
DatabaseErrors: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "gateway_database_errors_total",
|
|
Help: "Total number of database errors",
|
|
},
|
|
[]string{"operation", "table"},
|
|
),
|
|
GoroutineCount: prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "gateway_goroutines_active",
|
|
Help: "Number of active goroutines",
|
|
},
|
|
),
|
|
MemoryUsage: prometheus.NewGauge(
|
|
prometheus.GaugeOpts{
|
|
Name: "gateway_memory_usage_bytes",
|
|
Help: "Memory usage in bytes",
|
|
},
|
|
),
|
|
|
|
// Blossom pool metrics
|
|
BlossomPoolServers: prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Name: "gateway_blossom_pool_servers",
|
|
Help: "Number of Blossom pool servers by status",
|
|
},
|
|
[]string{"status"}, // healthy, unhealthy
|
|
),
|
|
BlossomPoolRequests: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "gateway_blossom_pool_requests_total",
|
|
Help: "Total number of Blossom pool requests",
|
|
},
|
|
[]string{"server", "status"},
|
|
),
|
|
BlossomPoolErrors: prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Name: "gateway_blossom_pool_errors_total",
|
|
Help: "Total number of Blossom pool errors",
|
|
},
|
|
[]string{"server", "error_type"},
|
|
),
|
|
}
|
|
|
|
// Register all metrics
|
|
prometheus.MustRegister(
|
|
m.RequestsTotal,
|
|
m.RequestDuration,
|
|
m.ActiveConnections,
|
|
m.UploadsTotal,
|
|
m.UploadSize,
|
|
m.UploadDuration,
|
|
m.DownloadsTotal,
|
|
m.DownloadSize,
|
|
m.DownloadDuration,
|
|
m.StreamsActive,
|
|
m.StreamsTotal,
|
|
m.StreamDuration,
|
|
m.StorageUsed,
|
|
m.FilesStored,
|
|
m.ChunksStored,
|
|
m.BlobsStored,
|
|
m.CacheHits,
|
|
m.CacheMisses,
|
|
m.CacheSize,
|
|
m.CacheMemoryUsage,
|
|
m.RateLimitHits,
|
|
m.RateLimitBlocks,
|
|
m.AdminActions,
|
|
m.BannedUsers,
|
|
m.ContentReports,
|
|
m.DatabaseQueries,
|
|
m.DatabaseErrors,
|
|
m.GoroutineCount,
|
|
m.MemoryUsage,
|
|
m.BlossomPoolServers,
|
|
m.BlossomPoolRequests,
|
|
m.BlossomPoolErrors,
|
|
)
|
|
|
|
return m
|
|
}
|
|
|
|
// HTTPMiddleware wraps HTTP handlers to collect request metrics
|
|
func (m *Metrics) HTTPMiddleware(next http.Handler) http.Handler {
|
|
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
|
start := time.Now()
|
|
|
|
// Increment active connections
|
|
m.ActiveConnections.Inc()
|
|
defer m.ActiveConnections.Dec()
|
|
|
|
// Wrap response writer to capture status code
|
|
ww := &wrappedWriter{ResponseWriter: w, statusCode: 200}
|
|
|
|
// Call next handler
|
|
next.ServeHTTP(ww, r)
|
|
|
|
// Record metrics
|
|
duration := time.Since(start).Seconds()
|
|
endpoint := r.URL.Path
|
|
method := r.Method
|
|
statusCode := ww.statusCode
|
|
|
|
m.RequestsTotal.WithLabelValues(method, endpoint, string(rune(statusCode))).Inc()
|
|
m.RequestDuration.WithLabelValues(method, endpoint).Observe(duration)
|
|
})
|
|
}
|
|
|
|
// wrappedWriter wraps http.ResponseWriter to capture status code
|
|
type wrappedWriter struct {
|
|
http.ResponseWriter
|
|
statusCode int
|
|
}
|
|
|
|
func (w *wrappedWriter) WriteHeader(statusCode int) {
|
|
w.statusCode = statusCode
|
|
w.ResponseWriter.WriteHeader(statusCode)
|
|
}
|
|
|
|
// RecordUpload records upload metrics
|
|
func (m *Metrics) RecordUpload(storageType string, size int64, duration time.Duration, success bool) {
|
|
status := "success"
|
|
if !success {
|
|
status = "error"
|
|
}
|
|
|
|
m.UploadsTotal.WithLabelValues(storageType, status).Inc()
|
|
m.UploadSize.WithLabelValues(storageType).Observe(float64(size))
|
|
m.UploadDuration.WithLabelValues(storageType).Observe(duration.Seconds())
|
|
}
|
|
|
|
// RecordDownload records download metrics
|
|
func (m *Metrics) RecordDownload(storageType string, size int64, duration time.Duration, success bool) {
|
|
status := "success"
|
|
if !success {
|
|
status = "error"
|
|
}
|
|
|
|
m.DownloadsTotal.WithLabelValues(storageType, status).Inc()
|
|
m.DownloadSize.WithLabelValues(storageType).Observe(float64(size))
|
|
m.DownloadDuration.WithLabelValues(storageType).Observe(duration.Seconds())
|
|
}
|
|
|
|
// RecordStream records streaming metrics
|
|
func (m *Metrics) RecordStream(fileType string, duration time.Duration, success bool) {
|
|
status := "success"
|
|
if !success {
|
|
status = "error"
|
|
}
|
|
|
|
m.StreamsTotal.WithLabelValues(fileType, status).Inc()
|
|
m.StreamDuration.WithLabelValues(fileType).Observe(duration.Seconds())
|
|
}
|
|
|
|
// UpdateStorageMetrics updates storage-related metrics
|
|
func (m *Metrics) UpdateStorageMetrics(storageUsed int64, filesCount, chunksCount, blobsCount int) {
|
|
m.StorageUsed.Set(float64(storageUsed))
|
|
m.FilesStored.Set(float64(filesCount))
|
|
m.ChunksStored.Set(float64(chunksCount))
|
|
m.BlobsStored.Set(float64(blobsCount))
|
|
}
|
|
|
|
// RecordCacheOperation records cache hit/miss
|
|
func (m *Metrics) RecordCacheOperation(cacheType string, hit bool) {
|
|
if hit {
|
|
m.CacheHits.WithLabelValues(cacheType).Inc()
|
|
} else {
|
|
m.CacheMisses.WithLabelValues(cacheType).Inc()
|
|
}
|
|
}
|
|
|
|
// UpdateCacheMetrics updates cache size and memory usage
|
|
func (m *Metrics) UpdateCacheMetrics(cacheType string, size int, memoryUsage int64) {
|
|
m.CacheSize.WithLabelValues(cacheType).Set(float64(size))
|
|
m.CacheMemoryUsage.WithLabelValues(cacheType).Set(float64(memoryUsage))
|
|
}
|
|
|
|
// RecordRateLimit records rate limiting events
|
|
func (m *Metrics) RecordRateLimit(limitType string, blocked bool) {
|
|
if blocked {
|
|
m.RateLimitBlocks.WithLabelValues(limitType).Inc()
|
|
} else {
|
|
m.RateLimitHits.WithLabelValues(limitType).Inc()
|
|
}
|
|
}
|
|
|
|
// RecordAdminAction records admin actions
|
|
func (m *Metrics) RecordAdminAction(actionType, adminPubkey string) {
|
|
m.AdminActions.WithLabelValues(actionType, adminPubkey[:16]+"...").Inc()
|
|
}
|
|
|
|
// UpdateAdminMetrics updates admin-related metrics
|
|
func (m *Metrics) UpdateAdminMetrics(bannedUsersCount int) {
|
|
m.BannedUsers.Set(float64(bannedUsersCount))
|
|
}
|
|
|
|
// RecordContentReport records content reports
|
|
func (m *Metrics) RecordContentReport(status string) {
|
|
m.ContentReports.WithLabelValues(status).Inc()
|
|
}
|
|
|
|
// RecordDatabaseOperation records database queries and errors
|
|
func (m *Metrics) RecordDatabaseOperation(operation, table string, success bool) {
|
|
m.DatabaseQueries.WithLabelValues(operation, table).Inc()
|
|
if !success {
|
|
m.DatabaseErrors.WithLabelValues(operation, table).Inc()
|
|
}
|
|
}
|
|
|
|
// UpdateSystemMetrics updates system-level metrics
|
|
func (m *Metrics) UpdateSystemMetrics(goroutineCount int, memoryUsage int64) {
|
|
m.GoroutineCount.Set(float64(goroutineCount))
|
|
m.MemoryUsage.Set(float64(memoryUsage))
|
|
}
|
|
|
|
// RecordBlossomPoolOperation records Blossom pool metrics
|
|
func (m *Metrics) RecordBlossomPoolOperation(server, status string, success bool) {
|
|
m.BlossomPoolRequests.WithLabelValues(server, status).Inc()
|
|
if !success {
|
|
m.BlossomPoolErrors.WithLabelValues(server, "request_failed").Inc()
|
|
}
|
|
}
|
|
|
|
// UpdateBlossomPoolHealth updates Blossom pool server health metrics
|
|
func (m *Metrics) UpdateBlossomPoolHealth(healthyCount, unhealthyCount int) {
|
|
m.BlossomPoolServers.WithLabelValues("healthy").Set(float64(healthyCount))
|
|
m.BlossomPoolServers.WithLabelValues("unhealthy").Set(float64(unhealthyCount))
|
|
}
|
|
|
|
// Handler returns the Prometheus metrics HTTP handler
|
|
func (m *Metrics) Handler() http.Handler {
|
|
return promhttp.Handler()
|
|
}
|
|
|
|
// StartMetricsServer starts a dedicated metrics server
|
|
func (m *Metrics) StartMetricsServer(port int) {
|
|
mux := http.NewServeMux()
|
|
mux.Handle("/metrics", m.Handler())
|
|
|
|
server := &http.Server{
|
|
Addr: fmt.Sprintf(":%d", port),
|
|
Handler: mux,
|
|
}
|
|
|
|
go func() {
|
|
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
|
log.Printf("Metrics server error: %v", err)
|
|
}
|
|
}()
|
|
} |