mirror of
https://github.com/EZ-Api/ez-api.git
synced 2026-01-13 17:47:51 +00:00
feat(alerts): add traffic spike detection with configurable thresholds
Introduce traffic_spike alert type for monitoring system and per-master traffic levels with configurable thresholds stored in database. - Add AlertThresholdConfig model for persistent threshold configuration - Implement GET/PUT /admin/alerts/thresholds endpoints for threshold management - Add traffic spike detection in alert detector cron job: - Global QPS monitoring across all masters - Per-master RPM/TPM checks with minimum sample thresholds - Per-master RPD/TPD checks for daily limits - Use warning severity at threshold, critical at 2x threshold - Include metric metadata (value, threshold, window) in alert details - Update API documentation with new endpoints and alert type
This commit is contained in:
@@ -194,7 +194,7 @@ func (h *AlertHandler) CreateAlert(c *gin.Context) {
|
||||
// Validate type
|
||||
validTypes := map[string]bool{
|
||||
"rate_limit": true, "error_spike": true, "quota_exceeded": true,
|
||||
"key_disabled": true, "key_expired": true, "provider_down": true,
|
||||
"key_disabled": true, "key_expired": true, "provider_down": true, "traffic_spike": true,
|
||||
}
|
||||
if !validTypes[req.Type] {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "invalid alert type"})
|
||||
@@ -403,3 +403,161 @@ func (h *AlertHandler) GetAlertStats(c *gin.Context) {
|
||||
Info: info,
|
||||
})
|
||||
}
|
||||
|
||||
// AlertThresholdView represents threshold configuration in API responses
|
||||
type AlertThresholdView struct {
|
||||
GlobalQPS int64 `json:"global_qps"`
|
||||
MasterRPM int64 `json:"master_rpm"`
|
||||
MasterRPD int64 `json:"master_rpd"`
|
||||
MasterTPM int64 `json:"master_tpm"`
|
||||
MasterTPD int64 `json:"master_tpd"`
|
||||
MinRPMRequests1m int64 `json:"min_rpm_requests_1m"`
|
||||
MinTPMTokens1m int64 `json:"min_tpm_tokens_1m"`
|
||||
UpdatedAt int64 `json:"updated_at"`
|
||||
}
|
||||
|
||||
func toAlertThresholdView(cfg model.AlertThresholdConfig) AlertThresholdView {
|
||||
return AlertThresholdView{
|
||||
GlobalQPS: cfg.GlobalQPS,
|
||||
MasterRPM: cfg.MasterRPM,
|
||||
MasterRPD: cfg.MasterRPD,
|
||||
MasterTPM: cfg.MasterTPM,
|
||||
MasterTPD: cfg.MasterTPD,
|
||||
MinRPMRequests1m: cfg.MinRPMRequests1m,
|
||||
MinTPMTokens1m: cfg.MinTPMTokens1m,
|
||||
UpdatedAt: cfg.UpdatedAt.UTC().Unix(),
|
||||
}
|
||||
}
|
||||
|
||||
// GetAlertThresholds godoc
|
||||
// @Summary Get alert thresholds
|
||||
// @Description Get current alert threshold configuration for traffic spike detection
|
||||
// @Tags admin
|
||||
// @Produce json
|
||||
// @Security AdminAuth
|
||||
// @Success 200 {object} AlertThresholdView
|
||||
// @Failure 500 {object} gin.H
|
||||
// @Router /admin/alerts/thresholds [get]
|
||||
func (h *AlertHandler) GetAlertThresholds(c *gin.Context) {
|
||||
cfg, err := h.loadThresholdConfig()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to load thresholds", "details": err.Error()})
|
||||
return
|
||||
}
|
||||
c.JSON(http.StatusOK, toAlertThresholdView(cfg))
|
||||
}
|
||||
|
||||
// UpdateAlertThresholdsRequest is the request body for updating thresholds
|
||||
type UpdateAlertThresholdsRequest struct {
|
||||
GlobalQPS *int64 `json:"global_qps"`
|
||||
MasterRPM *int64 `json:"master_rpm"`
|
||||
MasterRPD *int64 `json:"master_rpd"`
|
||||
MasterTPM *int64 `json:"master_tpm"`
|
||||
MasterTPD *int64 `json:"master_tpd"`
|
||||
MinRPMRequests1m *int64 `json:"min_rpm_requests_1m"`
|
||||
MinTPMTokens1m *int64 `json:"min_tpm_tokens_1m"`
|
||||
}
|
||||
|
||||
// UpdateAlertThresholds godoc
|
||||
// @Summary Update alert thresholds
|
||||
// @Description Update alert threshold configuration for traffic spike detection
|
||||
// @Tags admin
|
||||
// @Accept json
|
||||
// @Produce json
|
||||
// @Security AdminAuth
|
||||
// @Param request body UpdateAlertThresholdsRequest true "Threshold configuration"
|
||||
// @Success 200 {object} AlertThresholdView
|
||||
// @Failure 400 {object} gin.H
|
||||
// @Failure 500 {object} gin.H
|
||||
// @Router /admin/alerts/thresholds [put]
|
||||
func (h *AlertHandler) UpdateAlertThresholds(c *gin.Context) {
|
||||
var req UpdateAlertThresholdsRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
// Validate positive values
|
||||
if req.GlobalQPS != nil && *req.GlobalQPS <= 0 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "global_qps must be positive"})
|
||||
return
|
||||
}
|
||||
if req.MasterRPM != nil && *req.MasterRPM <= 0 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "master_rpm must be positive"})
|
||||
return
|
||||
}
|
||||
if req.MasterRPD != nil && *req.MasterRPD <= 0 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "master_rpd must be positive"})
|
||||
return
|
||||
}
|
||||
if req.MasterTPM != nil && *req.MasterTPM <= 0 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "master_tpm must be positive"})
|
||||
return
|
||||
}
|
||||
if req.MasterTPD != nil && *req.MasterTPD <= 0 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "master_tpd must be positive"})
|
||||
return
|
||||
}
|
||||
if req.MinRPMRequests1m != nil && *req.MinRPMRequests1m < 0 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "min_rpm_requests_1m must be non-negative"})
|
||||
return
|
||||
}
|
||||
if req.MinTPMTokens1m != nil && *req.MinTPMTokens1m < 0 {
|
||||
c.JSON(http.StatusBadRequest, gin.H{"error": "min_tpm_tokens_1m must be non-negative"})
|
||||
return
|
||||
}
|
||||
|
||||
cfg, err := h.loadThresholdConfig()
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to load thresholds", "details": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
// Apply updates
|
||||
if req.GlobalQPS != nil {
|
||||
cfg.GlobalQPS = *req.GlobalQPS
|
||||
}
|
||||
if req.MasterRPM != nil {
|
||||
cfg.MasterRPM = *req.MasterRPM
|
||||
}
|
||||
if req.MasterRPD != nil {
|
||||
cfg.MasterRPD = *req.MasterRPD
|
||||
}
|
||||
if req.MasterTPM != nil {
|
||||
cfg.MasterTPM = *req.MasterTPM
|
||||
}
|
||||
if req.MasterTPD != nil {
|
||||
cfg.MasterTPD = *req.MasterTPD
|
||||
}
|
||||
if req.MinRPMRequests1m != nil {
|
||||
cfg.MinRPMRequests1m = *req.MinRPMRequests1m
|
||||
}
|
||||
if req.MinTPMTokens1m != nil {
|
||||
cfg.MinTPMTokens1m = *req.MinTPMTokens1m
|
||||
}
|
||||
|
||||
if err := h.db.Save(&cfg).Error; err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to save thresholds", "details": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, toAlertThresholdView(cfg))
|
||||
}
|
||||
|
||||
// loadThresholdConfig loads the threshold config from DB or returns defaults
|
||||
func (h *AlertHandler) loadThresholdConfig() (model.AlertThresholdConfig, error) {
|
||||
var cfg model.AlertThresholdConfig
|
||||
err := h.db.First(&cfg).Error
|
||||
if err != nil {
|
||||
if err.Error() == "record not found" {
|
||||
// Create default config
|
||||
cfg = model.DefaultAlertThresholdConfig()
|
||||
if createErr := h.db.Create(&cfg).Error; createErr != nil {
|
||||
return cfg, createErr
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
return cfg, err
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user