test(alerts): add comprehensive tests for alert handler and detector

Add unit tests for alert-related functionality: - alert_handler_test.go: tests for threshold CRUD operations, alert creation with traffic_spike type, filtering, and stats - alert_detector_test.go: tests for threshold config loading, traffic spike severity calculation, deduplication logic, error rate severity, and nil-safety checks Also fix format string issues: - Use %d instead of %.2f for integer QPS in alert messages - Wrap error description with format directive to avoid linter warning
2026-01-13 17:47:51 +00:00 · 2025-12-31 16:09:02 +08:00
parent 0b9556ee7e
commit f714a314a9
4 changed files with 594 additions and 2 deletions
--- a/internal/cron/alert_detector_test.go
+++ b/internal/cron/alert_detector_test.go
@@ -0,0 +1,328 @@
+package cron
+
+import (
+	"context"
+	"fmt"
+	"testing"
+	"time"
+
+	"github.com/ez-api/ez-api/internal/model"
+	"gorm.io/driver/sqlite"
+	"gorm.io/gorm"
+)
+
+func setupTestDB(t *testing.T) *gorm.DB {
+	dsn := fmt.Sprintf("file:%s?mode=memory&cache=shared", t.Name())
+	db, err := gorm.Open(sqlite.Open(dsn), &gorm.Config{})
+	if err != nil {
+		t.Fatalf("open sqlite: %v", err)
+	}
+	if err := db.AutoMigrate(&model.Alert{}, &model.AlertThresholdConfig{}, &model.Master{}, &model.Key{}, &model.APIKey{}, &model.ProviderGroup{}, &model.LogRecord{}); err != nil {
+		t.Fatalf("migrate: %v", err)
+	}
+	return db
+}
+
+func TestDefaultAlertThresholdConfig(t *testing.T) {
+	cfg := model.DefaultAlertThresholdConfig()
+
+	if cfg.GlobalQPS != 100 {
+		t.Errorf("expected GlobalQPS=100, got %d", cfg.GlobalQPS)
+	}
+	if cfg.MasterRPM != 20 {
+		t.Errorf("expected MasterRPM=20, got %d", cfg.MasterRPM)
+	}
+	if cfg.MasterRPD != 1000 {
+		t.Errorf("expected MasterRPD=1000, got %d", cfg.MasterRPD)
+	}
+	if cfg.MasterTPM != 10_000_000 {
+		t.Errorf("expected MasterTPM=10000000, got %d", cfg.MasterTPM)
+	}
+	if cfg.MasterTPD != 100_000_000 {
+		t.Errorf("expected MasterTPD=100000000, got %d", cfg.MasterTPD)
+	}
+	if cfg.MinRPMRequests1m != 10 {
+		t.Errorf("expected MinRPMRequests1m=10, got %d", cfg.MinRPMRequests1m)
+	}
+	if cfg.MinTPMTokens1m != 1_000_000 {
+		t.Errorf("expected MinTPMTokens1m=1000000, got %d", cfg.MinTPMTokens1m)
+	}
+}
+
+func TestAlertDetectorLoadThresholdConfigDefault(t *testing.T) {
+	db := setupTestDB(t)
+
+	detector := &AlertDetector{db: db}
+	cfg := detector.loadThresholdConfig()
+
+	// Should return defaults when no config in DB
+	if cfg.GlobalQPS != 100 {
+		t.Errorf("expected default GlobalQPS=100, got %d", cfg.GlobalQPS)
+	}
+	if cfg.MasterRPM != 20 {
+		t.Errorf("expected default MasterRPM=20, got %d", cfg.MasterRPM)
+	}
+}
+
+func TestAlertDetectorLoadThresholdConfigFromDB(t *testing.T) {
+	db := setupTestDB(t)
+
+	// Insert custom config
+	customCfg := model.AlertThresholdConfig{
+		GlobalQPS:        500,
+		MasterRPM:        100,
+		MasterRPD:        5000,
+		MasterTPM:        50_000_000,
+		MasterTPD:        500_000_000,
+		MinRPMRequests1m: 50,
+		MinTPMTokens1m:   5_000_000,
+	}
+	if err := db.Create(&customCfg).Error; err != nil {
+		t.Fatalf("create config: %v", err)
+	}
+
+	detector := &AlertDetector{db: db}
+	cfg := detector.loadThresholdConfig()
+
+	if cfg.GlobalQPS != 500 {
+		t.Errorf("expected GlobalQPS=500, got %d", cfg.GlobalQPS)
+	}
+	if cfg.MasterRPM != 100 {
+		t.Errorf("expected MasterRPM=100, got %d", cfg.MasterRPM)
+	}
+	if cfg.MasterRPD != 5000 {
+		t.Errorf("expected MasterRPD=5000, got %d", cfg.MasterRPD)
+	}
+}
+
+func TestTrafficSpikeSeverity(t *testing.T) {
+	tests := []struct {
+		value     int64
+		threshold int64
+		expected  model.AlertSeverity
+	}{
+		{50, 100, model.AlertSeverityWarning},  // below threshold, but this func is only called when >= threshold
+		{100, 100, model.AlertSeverityWarning}, // exactly at threshold
+		{150, 100, model.AlertSeverityWarning}, // 1.5x threshold
+		{199, 100, model.AlertSeverityWarning}, // just below 2x
+		{200, 100, model.AlertSeverityCritical}, // exactly 2x threshold
+		{300, 100, model.AlertSeverityCritical}, // 3x threshold
+	}
+
+	for _, tc := range tests {
+		result := trafficSpikeSeverity(tc.value, tc.threshold)
+		if result != tc.expected {
+			t.Errorf("trafficSpikeSeverity(%d, %d) = %s, expected %s", tc.value, tc.threshold, result, tc.expected)
+		}
+	}
+}
+
+func TestTrafficSpikeMetadataJSON(t *testing.T) {
+	meta := trafficSpikeMetadata{
+		Metric:    "master_rpm",
+		Value:     150,
+		Threshold: 20,
+		Window:    "1m",
+	}
+
+	json := meta.JSON()
+	if json == "" {
+		t.Error("expected non-empty JSON")
+	}
+	if len(json) < 10 {
+		t.Errorf("JSON too short: %s", json)
+	}
+}
+
+func TestAlertDetectorDeduplication(t *testing.T) {
+	db := setupTestDB(t)
+
+	config := DefaultAlertDetectorConfig()
+	config.DeduplicationCooldown = 5 * time.Minute
+
+	detector := NewAlertDetector(db, db, nil, nil, config, nil)
+
+	// Create first alert
+	detector.createAlertIfNew(
+		model.AlertTypeRateLimit,
+		model.AlertSeverityWarning,
+		"Test Alert",
+		"Test Message",
+		1,
+		"master",
+		"test-master",
+	)
+
+	var count int64
+	db.Model(&model.Alert{}).Count(&count)
+	if count != 1 {
+		t.Fatalf("expected 1 alert, got %d", count)
+	}
+
+	// Try to create duplicate (should be skipped)
+	detector.createAlertIfNew(
+		model.AlertTypeRateLimit,
+		model.AlertSeverityWarning,
+		"Test Alert Duplicate",
+		"Test Message Duplicate",
+		1,
+		"master",
+		"test-master",
+	)
+
+	db.Model(&model.Alert{}).Count(&count)
+	if count != 1 {
+		t.Fatalf("expected still 1 alert after duplicate, got %d", count)
+	}
+
+	// Different fingerprint should create new alert
+	detector.createAlertIfNew(
+		model.AlertTypeRateLimit,
+		model.AlertSeverityWarning,
+		"Different Alert",
+		"Different Message",
+		2, // Different related_id
+		"master",
+		"test-master-2",
+	)
+
+	db.Model(&model.Alert{}).Count(&count)
+	if count != 2 {
+		t.Fatalf("expected 2 alerts with different fingerprint, got %d", count)
+	}
+}
+
+func TestAlertDetectorTrafficSpikeDeduplication(t *testing.T) {
+	db := setupTestDB(t)
+
+	config := DefaultAlertDetectorConfig()
+	config.DeduplicationCooldown = 5 * time.Minute
+
+	detector := NewAlertDetector(db, db, nil, nil, config, nil)
+
+	meta := trafficSpikeMetadata{
+		Metric:    "master_rpm",
+		Value:     150,
+		Threshold: 20,
+		Window:    "1m",
+	}
+
+	// Create first traffic spike alert
+	detector.createTrafficSpikeAlert(
+		model.AlertSeverityWarning,
+		"RPM Exceeded",
+		"Master exceeded RPM",
+		1,
+		"master",
+		"test-master",
+		meta,
+	)
+
+	var count int64
+	db.Model(&model.Alert{}).Count(&count)
+	if count != 1 {
+		t.Fatalf("expected 1 alert, got %d", count)
+	}
+
+	// Try to create duplicate (same metric, same master)
+	detector.createTrafficSpikeAlert(
+		model.AlertSeverityWarning,
+		"RPM Exceeded Again",
+		"Master exceeded RPM again",
+		1,
+		"master",
+		"test-master",
+		meta,
+	)
+
+	db.Model(&model.Alert{}).Count(&count)
+	if count != 1 {
+		t.Fatalf("expected still 1 alert after duplicate, got %d", count)
+	}
+
+	// Different metric should create new alert
+	meta2 := trafficSpikeMetadata{
+		Metric:    "master_tpm", // Different metric
+		Value:     15000000,
+		Threshold: 10000000,
+		Window:    "1m",
+	}
+	detector.createTrafficSpikeAlert(
+		model.AlertSeverityWarning,
+		"TPM Exceeded",
+		"Master exceeded TPM",
+		1,
+		"master",
+		"test-master",
+		meta2,
+	)
+
+	db.Model(&model.Alert{}).Count(&count)
+	if count != 2 {
+		t.Fatalf("expected 2 alerts with different metric, got %d", count)
+	}
+}
+
+func TestAlertDetectorErrorRateSeverity(t *testing.T) {
+	detector := &AlertDetector{}
+
+	tests := []struct {
+		rate     float64
+		expected model.AlertSeverity
+	}{
+		{0.05, model.AlertSeverityInfo},     // 5%
+		{0.10, model.AlertSeverityInfo},     // 10%
+		{0.24, model.AlertSeverityInfo},     // 24%
+		{0.25, model.AlertSeverityWarning},  // 25%
+		{0.40, model.AlertSeverityWarning},  // 40%
+		{0.49, model.AlertSeverityWarning},  // 49%
+		{0.50, model.AlertSeverityCritical}, // 50%
+		{0.75, model.AlertSeverityCritical}, // 75%
+		{1.00, model.AlertSeverityCritical}, // 100%
+	}
+
+	for _, tc := range tests {
+		result := detector.errorRateSeverity(tc.rate)
+		if result != tc.expected {
+			t.Errorf("errorRateSeverity(%.2f) = %s, expected %s", tc.rate, result, tc.expected)
+		}
+	}
+}
+
+func TestAlertDetectorDetectOnceNilSafe(t *testing.T) {
+	// Test nil detector
+	var nilDetector *AlertDetector
+	nilDetector.detectOnce(context.Background())
+
+	// Test detector with nil db
+	detector := &AlertDetector{}
+	detector.detectOnce(context.Background())
+
+	// Should not panic
+}
+
+func TestAlertDetectorStartDisabled(t *testing.T) {
+	db := setupTestDB(t)
+
+	config := DefaultAlertDetectorConfig()
+	config.Enabled = false
+
+	detector := NewAlertDetector(db, db, nil, nil, config, nil)
+
+	ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
+	defer cancel()
+
+	// Should return immediately without blocking
+	done := make(chan struct{})
+	go func() {
+		detector.Start(ctx)
+		close(done)
+	}()
+
+	select {
+	case <-done:
+		// Expected: Start returned immediately because Enabled=false
+	case <-time.After(200 * time.Millisecond):
+		t.Error("Start did not return immediately when disabled")
+	}
+}