﻿{
  "version": "1.0",
  "owner": "ai-platform-team",
  "alerts": [
    {
      "name": "error_rate_spike",
      "metric": "trace.error_rate",
      "window_minutes": 10,
      "threshold": 0.05,
      "severity": "high",
      "channels": ["email", "slack"]
    },
    {
      "name": "latency_p95_degradation",
      "metric": "trace.duration_ms.p95",
      "window_minutes": 15,
      "threshold": 12000,
      "severity": "medium",
      "channels": ["email"]
    },
    {
      "name": "daily_cost_spike",
      "metric": "cost.usd.daily",
      "window_minutes": 60,
      "threshold": 250,
      "severity": "high",
      "channels": ["email", "slack", "webhook"]
    }
  ],
  "notes": [
    "Tune thresholds using your baseline for each workflow.",
    "Set separate rules for high-volume and low-volume agents.",
    "Validate alert noise weekly and adjust windows."
  ]
}
