test(alerts): add comprehensive tests for alert handler and detector

Add unit tests for alert-related functionality:

- alert_handler_test.go: tests for threshold CRUD operations,
  alert creation with traffic_spike type, filtering, and stats
- alert_detector_test.go: tests for threshold config loading,
  traffic spike severity calculation, deduplication logic,
  error rate severity, and nil-safety checks

Also fix format string issues:
- Use %d instead of %.2f for integer QPS in alert messages
- Wrap error description with format directive to avoid linter warning
This commit is contained in:
zenfun
2025-12-31 16:09:02 +08:00
parent 0b9556ee7e
commit f714a314a9
4 changed files with 594 additions and 2 deletions

View File

@@ -0,0 +1,328 @@
package cron
import (
"context"
"fmt"
"testing"
"time"
"github.com/ez-api/ez-api/internal/model"
"gorm.io/driver/sqlite"
"gorm.io/gorm"
)
func setupTestDB(t *testing.T) *gorm.DB {
dsn := fmt.Sprintf("file:%s?mode=memory&cache=shared", t.Name())
db, err := gorm.Open(sqlite.Open(dsn), &gorm.Config{})
if err != nil {
t.Fatalf("open sqlite: %v", err)
}
if err := db.AutoMigrate(&model.Alert{}, &model.AlertThresholdConfig{}, &model.Master{}, &model.Key{}, &model.APIKey{}, &model.ProviderGroup{}, &model.LogRecord{}); err != nil {
t.Fatalf("migrate: %v", err)
}
return db
}
func TestDefaultAlertThresholdConfig(t *testing.T) {
cfg := model.DefaultAlertThresholdConfig()
if cfg.GlobalQPS != 100 {
t.Errorf("expected GlobalQPS=100, got %d", cfg.GlobalQPS)
}
if cfg.MasterRPM != 20 {
t.Errorf("expected MasterRPM=20, got %d", cfg.MasterRPM)
}
if cfg.MasterRPD != 1000 {
t.Errorf("expected MasterRPD=1000, got %d", cfg.MasterRPD)
}
if cfg.MasterTPM != 10_000_000 {
t.Errorf("expected MasterTPM=10000000, got %d", cfg.MasterTPM)
}
if cfg.MasterTPD != 100_000_000 {
t.Errorf("expected MasterTPD=100000000, got %d", cfg.MasterTPD)
}
if cfg.MinRPMRequests1m != 10 {
t.Errorf("expected MinRPMRequests1m=10, got %d", cfg.MinRPMRequests1m)
}
if cfg.MinTPMTokens1m != 1_000_000 {
t.Errorf("expected MinTPMTokens1m=1000000, got %d", cfg.MinTPMTokens1m)
}
}
func TestAlertDetectorLoadThresholdConfigDefault(t *testing.T) {
db := setupTestDB(t)
detector := &AlertDetector{db: db}
cfg := detector.loadThresholdConfig()
// Should return defaults when no config in DB
if cfg.GlobalQPS != 100 {
t.Errorf("expected default GlobalQPS=100, got %d", cfg.GlobalQPS)
}
if cfg.MasterRPM != 20 {
t.Errorf("expected default MasterRPM=20, got %d", cfg.MasterRPM)
}
}
func TestAlertDetectorLoadThresholdConfigFromDB(t *testing.T) {
db := setupTestDB(t)
// Insert custom config
customCfg := model.AlertThresholdConfig{
GlobalQPS: 500,
MasterRPM: 100,
MasterRPD: 5000,
MasterTPM: 50_000_000,
MasterTPD: 500_000_000,
MinRPMRequests1m: 50,
MinTPMTokens1m: 5_000_000,
}
if err := db.Create(&customCfg).Error; err != nil {
t.Fatalf("create config: %v", err)
}
detector := &AlertDetector{db: db}
cfg := detector.loadThresholdConfig()
if cfg.GlobalQPS != 500 {
t.Errorf("expected GlobalQPS=500, got %d", cfg.GlobalQPS)
}
if cfg.MasterRPM != 100 {
t.Errorf("expected MasterRPM=100, got %d", cfg.MasterRPM)
}
if cfg.MasterRPD != 5000 {
t.Errorf("expected MasterRPD=5000, got %d", cfg.MasterRPD)
}
}
func TestTrafficSpikeSeverity(t *testing.T) {
tests := []struct {
value int64
threshold int64
expected model.AlertSeverity
}{
{50, 100, model.AlertSeverityWarning}, // below threshold, but this func is only called when >= threshold
{100, 100, model.AlertSeverityWarning}, // exactly at threshold
{150, 100, model.AlertSeverityWarning}, // 1.5x threshold
{199, 100, model.AlertSeverityWarning}, // just below 2x
{200, 100, model.AlertSeverityCritical}, // exactly 2x threshold
{300, 100, model.AlertSeverityCritical}, // 3x threshold
}
for _, tc := range tests {
result := trafficSpikeSeverity(tc.value, tc.threshold)
if result != tc.expected {
t.Errorf("trafficSpikeSeverity(%d, %d) = %s, expected %s", tc.value, tc.threshold, result, tc.expected)
}
}
}
func TestTrafficSpikeMetadataJSON(t *testing.T) {
meta := trafficSpikeMetadata{
Metric: "master_rpm",
Value: 150,
Threshold: 20,
Window: "1m",
}
json := meta.JSON()
if json == "" {
t.Error("expected non-empty JSON")
}
if len(json) < 10 {
t.Errorf("JSON too short: %s", json)
}
}
func TestAlertDetectorDeduplication(t *testing.T) {
db := setupTestDB(t)
config := DefaultAlertDetectorConfig()
config.DeduplicationCooldown = 5 * time.Minute
detector := NewAlertDetector(db, db, nil, nil, config, nil)
// Create first alert
detector.createAlertIfNew(
model.AlertTypeRateLimit,
model.AlertSeverityWarning,
"Test Alert",
"Test Message",
1,
"master",
"test-master",
)
var count int64
db.Model(&model.Alert{}).Count(&count)
if count != 1 {
t.Fatalf("expected 1 alert, got %d", count)
}
// Try to create duplicate (should be skipped)
detector.createAlertIfNew(
model.AlertTypeRateLimit,
model.AlertSeverityWarning,
"Test Alert Duplicate",
"Test Message Duplicate",
1,
"master",
"test-master",
)
db.Model(&model.Alert{}).Count(&count)
if count != 1 {
t.Fatalf("expected still 1 alert after duplicate, got %d", count)
}
// Different fingerprint should create new alert
detector.createAlertIfNew(
model.AlertTypeRateLimit,
model.AlertSeverityWarning,
"Different Alert",
"Different Message",
2, // Different related_id
"master",
"test-master-2",
)
db.Model(&model.Alert{}).Count(&count)
if count != 2 {
t.Fatalf("expected 2 alerts with different fingerprint, got %d", count)
}
}
func TestAlertDetectorTrafficSpikeDeduplication(t *testing.T) {
db := setupTestDB(t)
config := DefaultAlertDetectorConfig()
config.DeduplicationCooldown = 5 * time.Minute
detector := NewAlertDetector(db, db, nil, nil, config, nil)
meta := trafficSpikeMetadata{
Metric: "master_rpm",
Value: 150,
Threshold: 20,
Window: "1m",
}
// Create first traffic spike alert
detector.createTrafficSpikeAlert(
model.AlertSeverityWarning,
"RPM Exceeded",
"Master exceeded RPM",
1,
"master",
"test-master",
meta,
)
var count int64
db.Model(&model.Alert{}).Count(&count)
if count != 1 {
t.Fatalf("expected 1 alert, got %d", count)
}
// Try to create duplicate (same metric, same master)
detector.createTrafficSpikeAlert(
model.AlertSeverityWarning,
"RPM Exceeded Again",
"Master exceeded RPM again",
1,
"master",
"test-master",
meta,
)
db.Model(&model.Alert{}).Count(&count)
if count != 1 {
t.Fatalf("expected still 1 alert after duplicate, got %d", count)
}
// Different metric should create new alert
meta2 := trafficSpikeMetadata{
Metric: "master_tpm", // Different metric
Value: 15000000,
Threshold: 10000000,
Window: "1m",
}
detector.createTrafficSpikeAlert(
model.AlertSeverityWarning,
"TPM Exceeded",
"Master exceeded TPM",
1,
"master",
"test-master",
meta2,
)
db.Model(&model.Alert{}).Count(&count)
if count != 2 {
t.Fatalf("expected 2 alerts with different metric, got %d", count)
}
}
func TestAlertDetectorErrorRateSeverity(t *testing.T) {
detector := &AlertDetector{}
tests := []struct {
rate float64
expected model.AlertSeverity
}{
{0.05, model.AlertSeverityInfo}, // 5%
{0.10, model.AlertSeverityInfo}, // 10%
{0.24, model.AlertSeverityInfo}, // 24%
{0.25, model.AlertSeverityWarning}, // 25%
{0.40, model.AlertSeverityWarning}, // 40%
{0.49, model.AlertSeverityWarning}, // 49%
{0.50, model.AlertSeverityCritical}, // 50%
{0.75, model.AlertSeverityCritical}, // 75%
{1.00, model.AlertSeverityCritical}, // 100%
}
for _, tc := range tests {
result := detector.errorRateSeverity(tc.rate)
if result != tc.expected {
t.Errorf("errorRateSeverity(%.2f) = %s, expected %s", tc.rate, result, tc.expected)
}
}
}
func TestAlertDetectorDetectOnceNilSafe(t *testing.T) {
// Test nil detector
var nilDetector *AlertDetector
nilDetector.detectOnce(context.Background())
// Test detector with nil db
detector := &AlertDetector{}
detector.detectOnce(context.Background())
// Should not panic
}
func TestAlertDetectorStartDisabled(t *testing.T) {
db := setupTestDB(t)
config := DefaultAlertDetectorConfig()
config.Enabled = false
detector := NewAlertDetector(db, db, nil, nil, config, nil)
ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
defer cancel()
// Should return immediately without blocking
done := make(chan struct{})
go func() {
detector.Start(ctx)
close(done)
}()
select {
case <-done:
// Expected: Start returned immediately because Enabled=false
case <-time.After(200 * time.Millisecond):
t.Error("Start did not return immediately when disabled")
}
}