mirror of
https://github.com/ctwj/urldb.git
synced 2025-11-25 03:15:04 +08:00
458 lines
12 KiB
Go
458 lines
12 KiB
Go
package monitor
|
||
|
||
import (
|
||
"fmt"
|
||
"net/http"
|
||
"runtime"
|
||
"sync"
|
||
"time"
|
||
|
||
"github.com/ctwj/urldb/utils"
|
||
"github.com/gin-gonic/gin"
|
||
"github.com/prometheus/client_golang/prometheus"
|
||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||
)
|
||
|
||
// Metrics 监控指标
|
||
type Metrics struct {
|
||
// HTTP请求指标
|
||
RequestsTotal *prometheus.CounterVec
|
||
RequestDuration *prometheus.HistogramVec
|
||
RequestSize *prometheus.SummaryVec
|
||
ResponseSize *prometheus.SummaryVec
|
||
|
||
// 数据库指标
|
||
DatabaseQueries *prometheus.CounterVec
|
||
DatabaseErrors *prometheus.CounterVec
|
||
DatabaseDuration *prometheus.HistogramVec
|
||
|
||
// 系统指标
|
||
MemoryUsage prometheus.Gauge
|
||
Goroutines prometheus.Gauge
|
||
GCStats *prometheus.CounterVec
|
||
|
||
// 业务指标
|
||
ResourcesCreated *prometheus.CounterVec
|
||
ResourcesViewed *prometheus.CounterVec
|
||
Searches *prometheus.CounterVec
|
||
Transfers *prometheus.CounterVec
|
||
|
||
// 错误指标
|
||
ErrorsTotal *prometheus.CounterVec
|
||
|
||
// 自定义指标
|
||
CustomCounters map[string]prometheus.Counter
|
||
CustomGauges map[string]prometheus.Gauge
|
||
mu sync.RWMutex
|
||
}
|
||
|
||
// MetricsConfig 监控配置
|
||
type MetricsConfig struct {
|
||
Enabled bool
|
||
ListenAddress string
|
||
MetricsPath string
|
||
Namespace string
|
||
Subsystem string
|
||
}
|
||
|
||
// DefaultMetricsConfig 默认监控配置
|
||
func DefaultMetricsConfig() *MetricsConfig {
|
||
return &MetricsConfig{
|
||
Enabled: true,
|
||
ListenAddress: ":9090",
|
||
MetricsPath: "/metrics",
|
||
Namespace: "urldb",
|
||
Subsystem: "api",
|
||
}
|
||
}
|
||
|
||
// GlobalMetrics 全局监控实例
|
||
var (
|
||
globalMetrics *Metrics
|
||
once sync.Once
|
||
)
|
||
|
||
// NewMetrics 创建新的监控指标
|
||
func NewMetrics(config *MetricsConfig) *Metrics {
|
||
if config == nil {
|
||
config = DefaultMetricsConfig()
|
||
}
|
||
|
||
namespace := config.Namespace
|
||
subsystem := config.Subsystem
|
||
|
||
m := &Metrics{
|
||
// HTTP请求指标
|
||
RequestsTotal: promauto.NewCounterVec(
|
||
prometheus.CounterOpts{
|
||
Namespace: namespace,
|
||
Subsystem: subsystem,
|
||
Name: "http_requests_total",
|
||
Help: "Total number of HTTP requests",
|
||
},
|
||
[]string{"method", "endpoint", "status"},
|
||
),
|
||
RequestDuration: promauto.NewHistogramVec(
|
||
prometheus.HistogramOpts{
|
||
Namespace: namespace,
|
||
Subsystem: subsystem,
|
||
Name: "http_request_duration_seconds",
|
||
Help: "HTTP request duration in seconds",
|
||
Buckets: prometheus.DefBuckets,
|
||
},
|
||
[]string{"method", "endpoint", "status"},
|
||
),
|
||
RequestSize: promauto.NewSummaryVec(
|
||
prometheus.SummaryOpts{
|
||
Namespace: namespace,
|
||
Subsystem: subsystem,
|
||
Name: "http_request_size_bytes",
|
||
Help: "HTTP request size in bytes",
|
||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
||
},
|
||
[]string{"method", "endpoint"},
|
||
),
|
||
ResponseSize: promauto.NewSummaryVec(
|
||
prometheus.SummaryOpts{
|
||
Namespace: namespace,
|
||
Subsystem: subsystem,
|
||
Name: "http_response_size_bytes",
|
||
Help: "HTTP response size in bytes",
|
||
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
|
||
},
|
||
[]string{"method", "endpoint"},
|
||
),
|
||
|
||
// 数据库指标
|
||
DatabaseQueries: promauto.NewCounterVec(
|
||
prometheus.CounterOpts{
|
||
Namespace: namespace,
|
||
Subsystem: "database",
|
||
Name: "queries_total",
|
||
Help: "Total number of database queries",
|
||
},
|
||
[]string{"table", "operation"},
|
||
),
|
||
DatabaseErrors: promauto.NewCounterVec(
|
||
prometheus.CounterOpts{
|
||
Namespace: namespace,
|
||
Subsystem: "database",
|
||
Name: "errors_total",
|
||
Help: "Total number of database errors",
|
||
},
|
||
[]string{"table", "operation", "error"},
|
||
),
|
||
DatabaseDuration: promauto.NewHistogramVec(
|
||
prometheus.HistogramOpts{
|
||
Namespace: namespace,
|
||
Subsystem: "database",
|
||
Name: "query_duration_seconds",
|
||
Help: "Database query duration in seconds",
|
||
Buckets: prometheus.DefBuckets,
|
||
},
|
||
[]string{"table", "operation"},
|
||
),
|
||
|
||
// 系统指标
|
||
MemoryUsage: promauto.NewGauge(
|
||
prometheus.GaugeOpts{
|
||
Namespace: namespace,
|
||
Subsystem: "system",
|
||
Name: "memory_usage_bytes",
|
||
Help: "Current memory usage in bytes",
|
||
},
|
||
),
|
||
Goroutines: promauto.NewGauge(
|
||
prometheus.GaugeOpts{
|
||
Namespace: namespace,
|
||
Subsystem: "system",
|
||
Name: "goroutines",
|
||
Help: "Number of goroutines",
|
||
},
|
||
),
|
||
GCStats: promauto.NewCounterVec(
|
||
prometheus.CounterOpts{
|
||
Namespace: namespace,
|
||
Subsystem: "system",
|
||
Name: "gc_stats_total",
|
||
Help: "Garbage collection statistics",
|
||
},
|
||
[]string{"type"},
|
||
),
|
||
|
||
// 业务指标
|
||
ResourcesCreated: promauto.NewCounterVec(
|
||
prometheus.CounterOpts{
|
||
Namespace: namespace,
|
||
Subsystem: "business",
|
||
Name: "resources_created_total",
|
||
Help: "Total number of resources created",
|
||
},
|
||
[]string{"category", "platform"},
|
||
),
|
||
ResourcesViewed: promauto.NewCounterVec(
|
||
prometheus.CounterOpts{
|
||
Namespace: namespace,
|
||
Subsystem: "business",
|
||
Name: "resources_viewed_total",
|
||
Help: "Total number of resources viewed",
|
||
},
|
||
[]string{"category"},
|
||
),
|
||
Searches: promauto.NewCounterVec(
|
||
prometheus.CounterOpts{
|
||
Namespace: namespace,
|
||
Subsystem: "business",
|
||
Name: "searches_total",
|
||
Help: "Total number of searches",
|
||
},
|
||
[]string{"platform"},
|
||
),
|
||
Transfers: promauto.NewCounterVec(
|
||
prometheus.CounterOpts{
|
||
Namespace: namespace,
|
||
Subsystem: "business",
|
||
Name: "transfers_total",
|
||
Help: "Total number of transfers",
|
||
},
|
||
[]string{"platform", "status"},
|
||
),
|
||
|
||
// 错误指标
|
||
ErrorsTotal: promauto.NewCounterVec(
|
||
prometheus.CounterOpts{
|
||
Namespace: namespace,
|
||
Subsystem: "errors",
|
||
Name: "total",
|
||
Help: "Total number of errors",
|
||
},
|
||
[]string{"type", "endpoint"},
|
||
),
|
||
|
||
// 自定义指标
|
||
CustomCounters: make(map[string]prometheus.Counter),
|
||
CustomGauges: make(map[string]prometheus.Gauge),
|
||
}
|
||
|
||
// 启动系统指标收集
|
||
go m.collectSystemMetrics()
|
||
|
||
return m
|
||
}
|
||
|
||
// GetGlobalMetrics 获取全局监控实例
|
||
func GetGlobalMetrics() *Metrics {
|
||
once.Do(func() {
|
||
globalMetrics = NewMetrics(DefaultMetricsConfig())
|
||
})
|
||
return globalMetrics
|
||
}
|
||
|
||
// SetGlobalMetrics 设置全局监控实例
|
||
func SetGlobalMetrics(metrics *Metrics) {
|
||
globalMetrics = metrics
|
||
}
|
||
|
||
// collectSystemMetrics 收集系统指标
|
||
func (m *Metrics) collectSystemMetrics() {
|
||
ticker := time.NewTicker(10 * time.Second)
|
||
defer ticker.Stop()
|
||
|
||
for range ticker.C {
|
||
// 收集内存使用情况
|
||
var ms runtime.MemStats
|
||
runtime.ReadMemStats(&ms)
|
||
m.MemoryUsage.Set(float64(ms.Alloc))
|
||
|
||
// 收集goroutine数量
|
||
m.Goroutines.Set(float64(runtime.NumGoroutine()))
|
||
|
||
// 收集GC统计
|
||
m.GCStats.WithLabelValues("alloc").Add(float64(ms.TotalAlloc))
|
||
m.GCStats.WithLabelValues("sys").Add(float64(ms.Sys))
|
||
m.GCStats.WithLabelValues("lookups").Add(float64(ms.Lookups))
|
||
m.GCStats.WithLabelValues("mallocs").Add(float64(ms.Mallocs))
|
||
m.GCStats.WithLabelValues("frees").Add(float64(ms.Frees))
|
||
}
|
||
}
|
||
|
||
// MetricsMiddleware 监控中间件
|
||
func (m *Metrics) MetricsMiddleware() gin.HandlerFunc {
|
||
return func(c *gin.Context) {
|
||
start := time.Now()
|
||
path := c.FullPath()
|
||
|
||
// 如果没有匹配的路由,使用请求路径
|
||
if path == "" {
|
||
path = c.Request.URL.Path
|
||
}
|
||
|
||
// 记录请求大小
|
||
requestSize := float64(c.Request.ContentLength)
|
||
m.RequestSize.WithLabelValues(c.Request.Method, path).Observe(requestSize)
|
||
|
||
c.Next()
|
||
|
||
// 记录响应信息
|
||
status := c.Writer.Status()
|
||
latency := time.Since(start).Seconds()
|
||
responseSize := float64(c.Writer.Size())
|
||
|
||
// 更新指标
|
||
m.RequestsTotal.WithLabelValues(c.Request.Method, path, fmt.Sprintf("%d", status)).Inc()
|
||
m.RequestDuration.WithLabelValues(c.Request.Method, path, fmt.Sprintf("%d", status)).Observe(latency)
|
||
m.ResponseSize.WithLabelValues(c.Request.Method, path).Observe(responseSize)
|
||
|
||
// 如果是错误状态码,记录错误
|
||
if status >= 400 {
|
||
m.ErrorsTotal.WithLabelValues("http", path).Inc()
|
||
}
|
||
}
|
||
}
|
||
|
||
// StartMetricsServer 启动监控服务器
|
||
func (m *Metrics) StartMetricsServer(config *MetricsConfig) {
|
||
if config == nil {
|
||
config = DefaultMetricsConfig()
|
||
}
|
||
|
||
if !config.Enabled {
|
||
utils.Info("监控服务器未启用")
|
||
return
|
||
}
|
||
|
||
// 创建新的Gin路由器
|
||
router := gin.New()
|
||
router.Use(gin.Recovery())
|
||
|
||
// 注册Prometheus指标端点
|
||
router.GET(config.MetricsPath, gin.WrapH(promhttp.Handler()))
|
||
|
||
// 启动HTTP服务器
|
||
go func() {
|
||
utils.Info("监控服务器启动在 %s", config.ListenAddress)
|
||
if err := router.Run(config.ListenAddress); err != nil {
|
||
utils.Error("监控服务器启动失败: %v", err)
|
||
}
|
||
}()
|
||
|
||
utils.Info("监控服务器已启动,指标路径: %s%s", config.ListenAddress, config.MetricsPath)
|
||
}
|
||
|
||
// IncrementDatabaseQuery 增加数据库查询计数
|
||
func (m *Metrics) IncrementDatabaseQuery(table, operation string) {
|
||
m.DatabaseQueries.WithLabelValues(table, operation).Inc()
|
||
}
|
||
|
||
// IncrementDatabaseError 增加数据库错误计数
|
||
func (m *Metrics) IncrementDatabaseError(table, operation, error string) {
|
||
m.DatabaseErrors.WithLabelValues(table, operation, error).Inc()
|
||
}
|
||
|
||
// ObserveDatabaseDuration 记录数据库查询耗时
|
||
func (m *Metrics) ObserveDatabaseDuration(table, operation string, duration float64) {
|
||
m.DatabaseDuration.WithLabelValues(table, operation).Observe(duration)
|
||
}
|
||
|
||
// IncrementResourceCreated 增加资源创建计数
|
||
func (m *Metrics) IncrementResourceCreated(category, platform string) {
|
||
m.ResourcesCreated.WithLabelValues(category, platform).Inc()
|
||
}
|
||
|
||
// IncrementResourceViewed 增加资源查看计数
|
||
func (m *Metrics) IncrementResourceViewed(category string) {
|
||
m.ResourcesViewed.WithLabelValues(category).Inc()
|
||
}
|
||
|
||
// IncrementSearch 增加搜索计数
|
||
func (m *Metrics) IncrementSearch(platform string) {
|
||
m.Searches.WithLabelValues(platform).Inc()
|
||
}
|
||
|
||
// IncrementTransfer 增加转存计数
|
||
func (m *Metrics) IncrementTransfer(platform, status string) {
|
||
m.Transfers.WithLabelValues(platform, status).Inc()
|
||
}
|
||
|
||
// IncrementError 增加错误计数
|
||
func (m *Metrics) IncrementError(errorType, endpoint string) {
|
||
m.ErrorsTotal.WithLabelValues(errorType, endpoint).Inc()
|
||
}
|
||
|
||
// AddCustomCounter 添加自定义计数器
|
||
func (m *Metrics) AddCustomCounter(name, help string, labels []string) prometheus.Counter {
|
||
m.mu.Lock()
|
||
defer m.mu.Unlock()
|
||
|
||
key := fmt.Sprintf("%s_%v", name, labels)
|
||
if counter, exists := m.CustomCounters[key]; exists {
|
||
return counter
|
||
}
|
||
|
||
counter := promauto.NewCounterVec(
|
||
prometheus.CounterOpts{
|
||
Namespace: "urldb",
|
||
Name: name,
|
||
Help: help,
|
||
},
|
||
labels,
|
||
).WithLabelValues() // 如果没有标签,返回默认实例
|
||
|
||
m.CustomCounters[key] = counter
|
||
return counter
|
||
}
|
||
|
||
// AddCustomGauge 添加自定义仪表盘
|
||
func (m *Metrics) AddCustomGauge(name, help string, labels []string) prometheus.Gauge {
|
||
m.mu.Lock()
|
||
defer m.mu.Unlock()
|
||
|
||
key := fmt.Sprintf("%s_%v", name, labels)
|
||
if gauge, exists := m.CustomGauges[key]; exists {
|
||
return gauge
|
||
}
|
||
|
||
gauge := promauto.NewGaugeVec(
|
||
prometheus.GaugeOpts{
|
||
Namespace: "urldb",
|
||
Name: name,
|
||
Help: help,
|
||
},
|
||
labels,
|
||
).WithLabelValues() // 如果没有标签,返回默认实例
|
||
|
||
m.CustomGauges[key] = gauge
|
||
return gauge
|
||
}
|
||
|
||
// GetMetricsSummary 获取指标摘要
|
||
func (m *Metrics) GetMetricsSummary() map[string]interface{} {
|
||
// 这里可以实现获取当前指标摘要的逻辑
|
||
// 由于Prometheus指标不能直接读取,我们只能返回一些基本的统计信息
|
||
return map[string]interface{}{
|
||
"timestamp": time.Now(),
|
||
"status": "running",
|
||
"info": "使用 /metrics 端点获取详细指标",
|
||
}
|
||
}
|
||
|
||
// HealthCheck 健康检查
|
||
func (m *Metrics) HealthCheck(c *gin.Context) {
|
||
c.JSON(http.StatusOK, gin.H{
|
||
"status": "healthy",
|
||
"timestamp": time.Now().Unix(),
|
||
"version": "1.0.0",
|
||
})
|
||
}
|
||
|
||
// SetupHealthCheck 设置健康检查端点
|
||
func (m *Metrics) SetupHealthCheck(router *gin.Engine) {
|
||
router.GET("/health", m.HealthCheck)
|
||
router.GET("/healthz", m.HealthCheck)
|
||
}
|
||
|
||
// MetricsHandler 指标处理器
|
||
func (m *Metrics) MetricsHandler() gin.HandlerFunc {
|
||
return gin.WrapH(promhttp.Handler())
|
||
} |