feat(cron): add automatic alert detector for anomaly monitoring

Implement AlertDetector background task that runs every minute to detect
and create alerts for various anomalies:

- Rate limit detection: monitors masters hitting rate limits
- Error spike detection: flags keys with >= 10% error rate
- Quota exceeded: warns when key quota usage >= 90%
- Provider down: alerts when API keys have >= 50% failure rate

Includes fingerprint-based deduplication with 5-minute cooldown to
prevent duplicate alerts for the same issue.
This commit is contained in:
zenfun
2025-12-31 14:49:51 +08:00
parent 6cab7e257a
commit 85d91cdd2e
3 changed files with 359 additions and 0 deletions

View File

@@ -207,6 +207,13 @@ func main() {
defer cancelToken()
go tokenRefresher.Start(tokenCtx)
// Alert Detector
alertDetectorConfig := cron.DefaultAlertDetectorConfig()
alertDetector := cron.NewAlertDetector(db, logDB, rdb, service.NewStatsService(rdb), alertDetectorConfig, logger)
alertDetectorCtx, cancelAlertDetector := context.WithCancel(context.Background())
defer cancelAlertDetector()
go alertDetector.Start(alertDetectorCtx)
adminService, err := service.NewAdminService()
if err != nil {
fatal(logger, "failed to create admin service", "err", err)