随着 AI Agent 从原型阶段迈入生产部署,如何建立可靠的 CI/CD 流水线和自动化部署体系成为团队面临的核心挑战。与传统的软件部署不同,AI Agent 的部署涉及模型推理配置、工具注册管理、Prompt 版本控制、安全策略同步等多个动态维度。
2026 年,领先的 Agent 工程团队已经建立起成熟的分级部署体系:从开发环境(Dev)到预发布环境(Staging)再到生产环境(Production),每层都包含自动化的 Prompt 回归测试、工具调用准确性验证、延迟 SLA 检测和回滚机制。
本文将全面解析 AI Agent 的 CI/CD 流水线设计、自动化测试策略、容器化部署方案、蓝绿/灰度发布策略以及生产级监控与告警体系,并附完整 Python/YAML 实现代码。
| 维度 | 传统微服务部署 | AI Agent 部署 |
|---|---|---|
| 配置项 | 环境变量 + 代码 | 代码 + Prompt 模板 + 工具注册 + 模型参数 |
| 验证手段 | 单元/集成测试 | 代码测试 + LLM-as-Judge 回归测试 |
| 回滚策略 | 代码版本回退 | 代码 + Prompt + 模型配置三重回滚 |
| 灰度发布 | 流量比例切换 | Prompt 变体 + 模型路由 + 工具集 A/B |
| 性能基线 | API 延迟 ±5% | Token 消耗 + 推理延迟 + 工具调用成功率 |
AI Agent 的部署架构建议采用四环境分级模型:
Dev [Mock LLM + Mock Tools] → Staging [Mini LLM + Mock Tools] → Canary [Full LLM + Real Tools, 5%] → Production [Full LLM + Real Tools, 100%]
# config/environments.py
from dataclasses import dataclass, field
from typing import Optional
from enum import Enum
class DeploymentEnvironment(Enum):
DEV = "dev"
STAGING = "staging"
CANARY = "canary"
PRODUCTION = "production"
@dataclass
class AgentDeploymentConfig:
environment: DeploymentEnvironment
llm_model: str
llm_temperature: float
tools_enabled: list[str]
prompt_version: str
max_tokens: int
timeout_seconds: int
mock_llm: bool = False
mock_tools: bool = False
traffic_weight: float = 0.0
log_level: str = "INFO"
@classmethod
def get_dev_config(cls) -> "AgentDeploymentConfig":
return cls(
environment=DeploymentEnvironment.DEV,
llm_model="gpt-4o-mini", llm_temperature=0.3,
tools_enabled=["search", "calculator"],
prompt_version="dev-latest", max_tokens=1024,
timeout_seconds=30, mock_llm=True, mock_tools=True,
log_level="DEBUG",
)
@classmethod
def get_staging_config(cls) -> "AgentDeploymentConfig":
return cls(
environment=DeploymentEnvironment.STAGING,
llm_model="gpt-4o", llm_temperature=0.1,
tools_enabled=["search", "calculator", "code_executor"],
prompt_version="staging-1.2.3", max_tokens=2048,
timeout_seconds=45, mock_llm=False, mock_tools=True,
log_level="INFO",
)
@classmethod
def get_production_config(cls) -> "AgentDeploymentConfig":
return cls(
environment=DeploymentEnvironment.PRODUCTION,
llm_model="claude-opus-4", llm_temperature=0.0,
tools_enabled=["search", "calculator", "code_executor",
"file_read", "database_query"],
prompt_version="prod-2.0.0", max_tokens=4096,
timeout_seconds=60, mock_llm=False, mock_tools=False,
traffic_weight=1.0, log_level="WARNING",
)
[代码提交] → [静态分析] → [单元测试] → [Prompt回归测试] → [工具集成测试] → [安全扫描] → [构建镜像] → [部署预发布] → [端到端测试] → [审批门] → [灰度发布] → [全量发布]
# cicd/agent_pipeline.py
import json, os, subprocess, time
from dataclasses import dataclass, field
from datetime import datetime
from typing import Optional
from enum import Enum
class PipelineStage(Enum):
STATIC_ANALYSIS = "static_analysis"
UNIT_TEST = "unit_test"
PROMPT_REGRESSION = "prompt_regression"
TOOL_INTEGRATION = "tool_integration"
SECURITY_SCAN = "security_scan"
BUILD_IMAGE = "build_image"
DEPLOY_STAGING = "deploy_staging"
E2E_TEST = "e2e_test"
APPROVAL_GATE = "approval_gate"
CANARY_DEPLOY = "canary_deploy"
PRODUCTION_DEPLOY = "production_deploy"
@dataclass
class PipelineResult:
stage: PipelineStage
status: str # "passed" | "failed" | "skipped"
duration_seconds: float
details: dict = field(default_factory=dict)
artifacts: list[str] = field(default_factory=list)
@dataclass
class AgentRelease:
version: str
commit_sha: str
config: AgentDeploymentConfig
prompt_version: str
tool_manifest: list[str]
created_at: datetime = field(default_factory=datetime.now)
class AgentCICDPipeline:
"""AI Agent CI/CD 流水线编排器"""
def __init__(self, repo_path: str, config_path: str):
self.repo_path = repo_path
self.config_path = config_path
self.results: list[PipelineResult] = []
self.release: Optional[AgentRelease] = None
def run_full_pipeline(self) -> bool:
print(f"[Pipeline] 启动完整部署流水线 - {datetime.now()}")
if not self._run_static_analysis(): return False
if not self._run_unit_tests(): return False
if not self._run_prompt_regression(): return False
if not self._run_tool_integration_tests(): return False
if not self._run_security_scan(): return False
if not self._build_docker_image(): return False
if not self._deploy_staging(): return False
if not self._run_e2e_tests(): return False
return True
def _run_static_analysis(self) -> bool:
start = time.time()
issues = []
py_files = self._find_files("*.py")
for f in py_files:
result = subprocess.run(
["python3", "-m", "py_compile", f],
capture_output=True, text=True, cwd=self.repo_path
)
if result.returncode != 0:
issues.append(f"语法错误: {f}: {result.stderr}")
prompt_files = self._find_files("*prompt*.yaml", "*prompt*.json")
for f in prompt_files:
with open(f, 'r') as fh:
content = fh.read()
import re
unmatched = re.findall(r'\{\{\s*\w+(?!\s*\}\})', content)
if unmatched:
issues.append(f"Prompt 模板变量未闭合: {f}: {unmatched}")
status = "passed" if not issues else "failed"
self.results.append(PipelineResult(
stage=PipelineStage.STATIC_ANALYSIS, status=status,
duration_seconds=time.time() - start,
details={"files_checked": len(py_files)+len(prompt_files), "issues": issues}
))
return status == "passed"
def _run_unit_tests(self) -> bool:
start = time.time()
result = subprocess.run(
["pytest", "tests/unit/", "-x", "--tb=short", "-q"],
capture_output=True, text=True, cwd=self.repo_path
)
status = "passed" if result.returncode == 0 else "failed"
self.results.append(PipelineResult(
stage=PipelineStage.UNIT_TEST, status=status,
duration_seconds=time.time() - start,
details={"output": result.stdout[-500:]}
))
return status == "passed"
def _run_prompt_regression(self) -> bool:
start = time.time()
test_cases = self._load_prompt_test_cases()
failures = []
for case in test_cases:
result = self._evaluate_prompt(case)
if not result["passed"]:
failures.append({
"case_id": case["id"],
"expected": case["expected_behavior"],
"actual": result["output"][:200],
"score": result["score"]
})
status = "passed" if len(failures)/max(len(test_cases),1) < 0.1 else "failed"
self.results.append(PipelineResult(
stage=PipelineStage.PROMPT_REGRESSION, status=status,
duration_seconds=time.time() - start,
details={"total_cases": len(test_cases), "failures": failures[:10],
"pass_rate": f"{(1-len(failures)/max(len(test_cases),1))*100:.1f}%"}
))
return status == "passed"
def _run_tool_integration_tests(self) -> bool:
start = time.time()
result = subprocess.run(
["pytest", "tests/integration/", "-x", "--tb=short", "-q"],
capture_output=True, text=True, cwd=self.repo_path
)
status = "passed" if result.returncode == 0 else "failed"
self.results.append(PipelineResult(
stage=PipelineStage.TOOL_INTEGRATION, status=status,
duration_seconds=time.time() - start,
details={"output": result.stdout[-500:]}
))
return status == "passed"
def _run_security_scan(self) -> bool:
start = time.time()
scan_result = subprocess.run(
["python3", "scripts/security_scan.py", "--config", self.config_path],
capture_output=True, text=True, cwd=self.repo_path
)
issues = []
if scan_result.returncode != 0:
try:
issues = json.loads(scan_result.stdout).get("issues", [])
except json.JSONDecodeError:
issues = [scan_result.stderr[:500]]
status = "passed" if not issues else "failed"
self.results.append(PipelineResult(
stage=PipelineStage.SECURITY_SCAN, status=status,
duration_seconds=time.time() - start,
details={"issues_count": len(issues), "issues": issues[:5]}
))
return status == "passed"
def _build_docker_image(self) -> bool:
start = time.time()
tag = f"agent-system:{datetime.now().strftime('%Y%m%d%H%M%S')}"
result = subprocess.run(
["docker", "build", "-t", tag, "-f", "deploy/Dockerfile", "."],
capture_output=True, text=True, cwd=self.repo_path
)
status = "passed" if result.returncode == 0 else "failed"
self.results.append(PipelineResult(
stage=PipelineStage.BUILD_IMAGE, status=status,
duration_seconds=time.time() - start,
artifacts=[tag], details={"image_tag": tag}
))
return status == "passed"
def _deploy_staging(self) -> bool:
start = time.time()
result = subprocess.run(
["kubectl", "apply", "-f", "deploy/staging/"],
capture_output=True, text=True, cwd=self.repo_path
)
status = "passed" if result.returncode == 0 else "failed"
self.results.append(PipelineResult(
stage=PipelineStage.DEPLOY_STAGING, status=status,
duration_seconds=time.time() - start,
details={"namespace": "agent-staging", "output": result.stdout[-300:]}
))
return status == "passed"
def _run_e2e_tests(self) -> bool:
start = time.time()
result = subprocess.run(
["pytest", "tests/e2e/", "--tb=long", "-q"],
capture_output=True, text=True, cwd=self.repo_path
)
status = "passed" if result.returncode == 0 else "failed"
self.results.append(PipelineResult(
stage=PipelineStage.E2E_TEST, status=status,
duration_seconds=time.time() - start,
details={"output": result.stdout[-500:]}
))
return status == "passed"
def _find_files(self, *patterns):
files = []
for pattern in patterns:
result = subprocess.run(
["find", ".", "-name", pattern, "-type", "f"],
capture_output=True, text=True, cwd=self.repo_path
)
files.extend(result.stdout.strip().split("\n"))
return [f for f in files if f]
def _load_prompt_test_cases(self) -> list[dict]:
test_file = os.path.join(self.repo_path, "tests", "prompt_test_cases.json")
if os.path.exists(test_file):
with open(test_file, 'r') as f:
return json.load(f)
return []
def _evaluate_prompt(self, case: dict) -> dict:
return {"passed": True, "output": "mock_output", "score": 0.95}
def generate_report(self) -> str:
total_duration = sum(r.duration_seconds for r in self.results)
passed = sum(1 for r in self.results if r.status == "passed")
failed = sum(1 for r in self.results if r.status == "failed")
report = [
f"# AI Agent 部署流水线报告",
f"", f"**执行时间**: {datetime.now()}",
f"**总耗时**: {total_duration:.1f}s",
f"**通过率**: {passed}/{len(self.results)} 阶段",
]
for r in self.results:
emoji = "✅" if r.status == "passed" else "❌"
report.append(f"| {emoji} {r.stage.value} | {r.status} | {r.duration_seconds:.1f}s |")
return "\n".join(report)
# .github/workflows/agent-deploy.yml
name: AI Agent Deploy Pipeline
on:
push:
branches: [main, 'release/*']
paths: ['src/**', 'prompts/**', 'tools/**', 'config/**', 'deploy/**', 'tests/**']
env:
REGISTRY: ghcr.io
IMAGE_NAME: ${{ github.repository }}
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with: { python-version: '3.12', cache: 'pip' }
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt pytest pytest-cov
- name: Static Analysis
run: |
python -m py_compile src/agent/*.py
python -c "import yaml; [yaml.safe_load(open(f)) for f in __import__('glob').glob('prompts/*.yaml')]"
- name: Unit Tests
run: pytest tests/unit/ -v --cov=src/ --cov-report=term-missing
- name: Prompt Regression Tests
run: python scripts/prompt_regression.py --config config/prompts.yaml
env: { LLM_API_KEY: ${{ secrets.LLM_API_KEY }} }
- name: Security Scan
run: python scripts/security_scan.py --config config/security.yaml
- name: Integration Tests
run: pytest tests/integration/ -v --timeout=60
build:
needs: [test]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to Registry
uses: docker/login-action@v3
with: { registry: ${{ env.REGISTRY }}, username: ${{ github.actor }}, password: ${{ secrets.GITHUB_TOKEN }} }
- name: Build and Push
uses: docker/build-push-action@v5
with:
context: .; push: true
tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }},${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:latest
cache-from: type=gha; cache-to: type=gha,mode=max
deploy-staging:
needs: [build]
runs-on: ubuntu-latest
environment: staging
steps:
- uses: actions/checkout@v4
- name: Deploy to Staging
run: |
kubectl set image deployment/agent-agent agent=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} -n agent-staging
kubectl rollout status deployment/agent-agent -n agent-staging --timeout=5m
- name: Run E2E Tests
run: pytest tests/e2e/ -v --base-url=https://staging.agent-system.com
deploy-canary:
needs: [deploy-staging]
runs-on: ubuntu-latest
environment: canary
steps:
- name: Deploy Canary (5% traffic)
run: |
kubectl set image deployment/agent-canary agent=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} -n agent-prod
kubectl scale deployment/agent-canary --replicas=1 -n agent-prod
sleep 300
kubectl annotate deployment/agent-canary agent.canary.approved=true -n agent-prod
deploy-production:
needs: [deploy-canary]
runs-on: ubuntu-latest
environment: { name: production, url: https://agent-system.com }
steps:
- name: Full Production Deploy
run: |
kubectl set image deployment/agent-prod agent=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.sha }} -n agent-prod
kubectl rollout status deployment/agent-prod -n agent-prod --timeout=10m
# config/prompt_manifest.yaml
prompts:
system_prompt:
versions:
v2.0.0:
path: prompts/system/v2.0.0.yaml
hash: sha256:a1b2c3d4...
description: "生产级 V2 版本,添加了工具安全约束"
metrics:
task_success_rate: 0.94
tool_accuracy: 0.97
user_satisfaction: 4.2/5.0
changes:
- "添加工具调用安全边界检查"
- "优化多轮对话上下文保持"
- "增加错误处理策略描述"
v1.5.0:
path: prompts/system/v1.5.0.yaml
hash: sha256:e5f6g7h8...
description: "添加 RAG 检索能力"
metrics:
task_success_rate: 0.91
tool_accuracy: 0.95
user_satisfaction: 4.0/5.0
changes: ["集成上下文检索指令", "优化工具选择优先级"]
# tests/prompt_regression_suite.py
from dataclasses import dataclass
from typing import Callable
@dataclass
class PromptTestCase:
id: str; name: str; system_prompt_version: str
user_input: str; expected_behavior: str
expected_tools: list[str]; expected_response_keywords: list[str]
forbidden_keywords: list[str]; max_tokens_limit: int
@dataclass
class PromptTestResult:
case_id: str; passed: bool; tools_called: list[str]
response_preview: str; token_count: int
violations: list[str]; score: float
class PromptRegressionSuite:
def __init__(self, llm_client: Callable, judge_llm: Callable):
self.llm_client = llm_client
self.judge_llm = judge_llm
self.test_cases: list[PromptTestCase] = []
def load_cases(self, path: str):
with open(path, 'r') as f:
data = __import__('json').load(f)
for item in data:
self.test_cases.append(PromptTestCase(**item))
def run_all(self) -> dict:
results = [self._evaluate_single(c) for c in self.test_cases]
passed = sum(1 for r in results if r.passed)
return {
"total": len(results), "passed": passed,
"failed": len(results)-passed,
"pass_rate": passed/max(len(results),1),
"details": [r.__dict__ for r in results]
}
def _evaluate_single(self, case: PromptTestCase) -> PromptTestResult:
violations = []
response = self.llm_client(
system_prompt_version=case.system_prompt_version,
user_input=case.user_input
)
tools_called = response.get("tools_called", [])
for tool in case.expected_tools:
if tool not in tools_called:
violations.append(f"缺少预期工具: {tool}")
response_text = response.get("response", "")
for keyword in case.forbidden_keywords:
if keyword in response_text:
violations.append(f"包含禁用词: {keyword}")
judge_result = self.judge_llm(
prompt=f"评估Agent响应质量。输入:{case.user_input}。预期:{case.expected_behavior}。响应:{response_text[:500]}。评分0-1:",
temperature=0.0
)
score = judge_result.get("score", 0.5)
passed = len(violations) == 0 and score >= 0.7
return PromptTestResult(
case_id=case.id, passed=passed,
tools_called=tools_called,
response_preview=response_text[:200],
token_count=response.get("token_count", 0),
violations=violations, score=score
)
# deploy/Dockerfile
FROM python:3.12-slim AS builder
WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends gcc git \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install --no-cache-dir --user -r requirements.txt
FROM python:3.12-slim AS runtime
WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends curl ca-certificates \
&& rm -rf /var/lib/apt/lists/*
COPY --from=builder /root/.local /root/.local
ENV PATH=/root/.local/bin:$PATH
COPY src/ ./src/
COPY prompts/ ./prompts/
COPY tools/ ./tools/
COPY config/ ./config/
RUN useradd -m -u 1000 agentuser && chown -R agentuser:agentuser /app
USER agentuser
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
CMD curl -f http://localhost:8080/health || exit 1
EXPOSE 8080
CMD ["python3", "-m", "src.agent_server", "--config", "config/production.yaml"]
# deploy/k8s/agent-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: agent-prod
namespace: agent-prod
labels: { app: agent-system, tier: production }
spec:
replicas: 3
strategy:
type: RollingUpdate
rollingUpdate: { maxSurge: 1, maxUnavailable: 0 }
selector: { matchLabels: { app: agent-system, tier: production } }
template:
metadata:
labels: { app: agent-system, tier: production }
annotations: { prometheus.io/scrape: "true", prometheus.io/port: "9090" }
spec:
containers:
- name: agent
image: ghcr.io/org/agent-system:latest
ports:
- containerPort: 8080; name: http
- containerPort: 9090; name: metrics
env:
- name: DEPLOYMENT_ENV; value: "production"
- name: LLM_API_KEY
valueFrom: { secretKeyRef: { name: agent-secrets, key: llm-api-key } }
- name: PROMPT_VERSION; value: "prod-2.0.0"
resources:
requests: { memory: "2Gi", cpu: "1" }
limits: { memory: "4Gi", cpu: "2" }
livenessProbe:
httpGet: { path: /health, port: 8080 }
initialDelaySeconds: 30; periodSeconds: 15
readinessProbe:
httpGet: { path: /ready, port: 8080 }
initialDelaySeconds: 5; periodSeconds: 10
volumeMounts:
- name: config; mountPath: /app/config; readOnly: true
- name: prompts; mountPath: /app/prompts; readOnly: true
volumes:
- name: config; configMap: { name: agent-config }
- name: prompts; configMap: { name: agent-prompts }
---
apiVersion: v1
kind: Service
metadata: { name: agent-service, namespace: agent-prod }
spec:
selector: { app: agent-system, tier: production }
ports:
- port: 8080; targetPort: 8080; name: http
- port: 9090; targetPort: 9090; name: metrics
type: ClusterIP
---
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata: { name: agent-hpa, namespace: agent-prod }
spec:
scaleTargetRef: { apiVersion: apps/v1, kind: Deployment, name: agent-prod }
minReplicas: 2; maxReplicas: 10
metrics:
- type: Resource; resource: { name: cpu, target: { type: Utilization, averageUtilization: 70 } }
- type: Resource; resource: { name: memory, target: { type: Utilization, averageUtilization: 80 } }
- type: Pods; metric: { name: agent_request_latency_p99 }; target: { type: AverageValue, averageValue: 2s }
# deploy/blue_green_deploy.py
from dataclasses import dataclass
import json, subprocess, time
@dataclass
class BlueGreenState:
active: str; blue_version: str; green_version: str
blue_replicas: int; green_replicas: int
class BlueGreenDeployer:
def __init__(self, namespace: str, service_name: str):
self.namespace = namespace
self.service_name = service_name
self.state = BlueGreenState(active="blue", blue_version="2.0.0",
green_version="1.9.0", blue_replicas=3, green_replicas=1)
def deploy_new_version(self, new_version: str, image_tag: str) -> bool:
inactive = "green" if self.state.active == "blue" else "blue"
inactive_deploy = f"agent-{inactive}"
print(f"[BlueGreen] 部署 {new_version} 到 {inactive} 环境")
# 更新镜像
subprocess.run(["kubectl", "set", "image", f"deployment/{inactive_deploy}",
f"agent={image_tag}", "-n", self.namespace], check=True)
# 扩容
subprocess.run(["kubectl", "scale", f"deployment/{inactive_deploy}",
"--replicas=3", "-n", self.namespace], check=True)
# 等待就绪
result = subprocess.run(["kubectl", "rollout", "status", f"deployment/{inactive_deploy}",
"-n", self.namespace, "--timeout=5m"], capture_output=True, text=True)
if result.returncode != 0:
print(f"部署失败: {result.stderr}"); return False
# Smoke Tests
smoke = subprocess.run(["pytest", "tests/smoke/", "-q", "--timeout=30"],
capture_output=True, text=True)
if smoke.returncode != 0:
print(f"Smoke Tests 失败,回滚"); self._rollback(inactive); return False
# 切换流量
subprocess.run(["kubectl", "patch", "service", self.service_name,
"-p", json.dumps({"spec": {"selector": {"agent-env": inactive}}}),
"-n", self.namespace], check=True)
# 缩容旧环境
old = "blue" if inactive == "green" else "green"
subprocess.run(["kubectl", "scale", f"deployment/agent-{old}",
"--replicas=1", "-n", self.namespace], check=True)
print(f"✅ 部署完成,活跃环境: {inactive}")
return True
def _rollback(self, failed_env: str):
subprocess.run(["kubectl", "scale", f"deployment/agent-{failed_env}",
"--replicas=1", "-n", self.namespace])
print(f"🔄 已回滚 {failed_env} 环境")
# deploy/k8s/agent-canary.yaml
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: agent-canary-ingress
namespace: agent-prod
annotations:
nginx.ingress.kubernetes.io/canary: "true"
nginx.ingress.kubernetes.io/canary-weight: "5"
spec:
ingressClassName: nginx
rules:
- host: api.agent-system.com
http:
paths:
- path: /v1/chat; pathType: Prefix
backend: { service: { name: agent-canary, port: { number: 8080 } } }
# monitoring/deployment_metrics.py
from dataclasses import dataclass
from datetime import datetime
@dataclass
class DeploymentMetrics:
deploy_success_rate: float = 0.0
rollback_rate: float = 0.0
mean_time_to_recover: float = 0.0
mean_time_to_detect: float = 0.0
service_uptime_7d: float = 0.0
pod_restart_count: int = 0
readiness_check_failures: int = 0
p50_latency_ms: float = 0.0
p99_latency_ms: float = 0.0
token_throughput_per_sec: float = 0.0
prompt_regression_pass_rate: float = 0.0
e2e_test_pass_rate: float = 0.0
def to_prometheus_metrics(self) -> str:
ts = int(datetime.now().timestamp())
return "\n".join([
f"# HELP agent_deploy_success_rate 部署成功率",
f"# TYPE agent_deploy_success_rate gauge",
f"agent_deploy_success_rate {self.deploy_success_rate} {ts}",
f"agent_rollback_rate {self.rollback_rate} {ts}",
f"agent_p99_latency_ms {self.p99_latency_ms} {ts}",
f"agent_service_uptime_7d {self.service_uptime_7d} {ts}",
])
# deploy/prometheus/alert-rules.yaml
groups:
- name: agent-deployment-alerts
rules:
- alert: AgentDeployFailed
expr: agent_deploy_success_rate < 0.95
for: 5m
labels: { severity: critical }
annotations:
summary: "Agent 部署成功率低于 95%"
- alert: AgentHighLatency
expr: agent_p99_latency_ms > 5000
for: 2m
labels: { severity: warning }
annotations:
summary: "Agent P99 延迟超过 5s"
- alert: AgentFrequentRollback
expr: rate(agent_rollback_count[1h]) > 2
for: 10m
labels: { severity: critical }
annotations:
summary: "Agent 频繁回滚"
- alert: AgentPodRestart
expr: increase(kube_pod_container_status_restarts_total{pod=~"agent-.*"}[5m]) > 0
for: 1m
labels: { severity: warning }
- alert: AgentRegressionFail
expr: agent_prompt_regression_pass_rate < 0.9
for: 5m
labels: { severity: critical }
本文全面解析了 AI Agent CI/CD 与部署自动化的完整技术栈。生产级 Agent 部署不仅仅是 Docker + Kubernetes 的堆叠,更需要 Prompt 版本管理、回归测试、安全扫描、灰度发布和自动回滚等 Agent 特有的工程实践。随着 Agent 系统在生产环境中日益普及,建立完善的 CI/CD 体系将成为 Agent 工程的标配能力。🚀🤖