chore: initialize medical consultation agent demo

This commit is contained in:
刘金宝
2026-06-01 09:25:26 +08:00
commit a7733243b2
139 changed files with 15764 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
"""Agent 编排层:Patient、Scoring、Report 和 LLM Adapter。"""
+158
View File
@@ -0,0 +1,158 @@
import json
from pathlib import Path
from typing import Any
from app.agents.llm_adapter import DeepSeekClient
from app.core.config import settings
from app.models.source_case import CaseBase
from app.models.training import SessionOrder, TrainingSession
class HintAgent:
"""新手提示 Agent:基于病例、对话和检查结果调用快速模型生成结构化提示。"""
def __init__(self, llm: DeepSeekClient | None = None) -> None:
self.llm = llm or DeepSeekClient()
self.template_path = Path(__file__).resolve().parents[1] / "prompts" / "hint" / "novice_case_hint.md"
async def generate(
self,
session: TrainingSession,
case: CaseBase,
memory_messages: list[dict],
orders: list[SessionOrder],
last_user_message: str | None = None,
) -> dict:
"""LLM 提示:标准化输入病例上下文并要求模型返回固定 JSON 结构。"""
payload = self._build_input(session, case, memory_messages, orders, last_user_message)
messages = [
{"role": "system", "content": self._load_template()},
{"role": "user", "content": json.dumps(payload, ensure_ascii=False)},
]
try:
response = await self.llm.chat(
messages,
settings.llm_fast_model,
thinking_enabled=settings.llm_fast_thinking_enabled,
response_format={"type": "json_object"},
max_tokens=settings.llm_hint_max_tokens,
)
data = json.loads(response.content)
return self._normalize_output(data, payload)
except Exception:
return self._fallback_output(payload)
def _build_input(
self,
session: TrainingSession,
case: CaseBase,
memory_messages: list[dict],
orders: list[SessionOrder],
last_user_message: str | None,
) -> dict:
"""输入构造:把病例、会话、对话摘要和已申请检查整理为稳定 JSON。"""
return {
"case": {
"case_id": case.id,
"department": case.department.name if getattr(case, "department", None) else "",
"title": case.title,
"chief_complaint": case.chief_complaint,
"key_symptoms": case.key_symptoms or [],
"key_exams": case.key_exams or [],
"key_points": case.key_points or [],
},
"session": {
"mode": session.mode,
"status": session.status,
},
"conversation_summary": [
{"role": item.get("role"), "content": str(item.get("content", ""))[:240]}
for item in memory_messages[-12:]
if item.get("content")
],
"ordered_results": [
{
"item_code": order.item_code,
"item_name": order.item_name,
"item_type": order.item_type,
"result_text": order.result_text,
"is_key": order.is_key,
"is_abnormal": order.is_abnormal,
}
for order in orders
],
"last_user_message": last_user_message or "",
}
def _load_template(self) -> str:
"""提示词读取:加载新手模式病例提示模板。"""
if self.template_path.exists():
return self.template_path.read_text(encoding="utf-8")
return "你是医疗问诊训练提示 Agent,只输出合法 JSON。"
def _normalize_output(self, data: Any, payload: dict) -> dict:
"""输出校验:确保 LLM 返回结构稳定,不把原始文本透传给前端。"""
if not isinstance(data, dict):
return self._fallback_output(payload)
normalized = {
"hints": self._clean_str_list(data.get("hints"))[:4],
"missing_dimensions": self._clean_str_list(data.get("missing_dimensions"))[:6],
"next_questions": self._clean_str_list(data.get("next_questions"))[:5],
"recommended_orders": self._clean_orders(data.get("recommended_orders"))[:5],
}
if not any(normalized.values()):
return self._fallback_output(payload)
return normalized
def _fallback_output(self, payload: dict) -> dict:
"""提示兜底:LLM 异常或 JSON 不合法时按病例关键点生成稳定提示。"""
case = payload.get("case", {})
ordered_codes = {item.get("item_code") for item in payload.get("ordered_results", [])}
key_exams = case.get("key_exams") or []
recommended_orders = []
if "blood_routine" not in ordered_codes:
recommended_orders.append({"item_code": "blood_routine", "reason": "用于初步判断感染及炎症反应"})
if "crp" not in ordered_codes:
recommended_orders.append({"item_code": "crp", "reason": "用于辅助判断炎症程度"})
if "chest_xray" not in ordered_codes:
recommended_orders.append({"item_code": "chest_xray", "reason": "用于判断肺部感染影像学证据"})
if "spo2" not in ordered_codes:
recommended_orders.append({"item_code": "spo2", "reason": "用于判断氧合和病情严重程度"})
return {
"hints": [
f"本病例主诉为{case.get('chief_complaint') or '当前症状'},问诊要围绕起病时间、症状演变和严重程度展开。",
"当前提示来自病例关键症状、关键检查和已完成对话的结构化兜底分析。",
"获得检查结果后,需要把异常结果用于诊断依据和病情严重程度判断。",
],
"missing_dimensions": ["既往史", "严重程度评估", "人文沟通"],
"next_questions": [
"孩子最高体温多少?退烧药后能不能降下来?",
"有没有喘息、气促、口唇发绀或呼吸困难?",
"以前有没有喘息、哮喘、湿疹或药物过敏史?",
"精神、食欲、饮水和尿量怎么样?",
"家属现在最担心什么?",
],
"recommended_orders": recommended_orders or [
{"item_code": str(item), "reason": "病例关键检查,需要结合结果完善诊断依据"} for item in key_exams[:3]
],
}
def _clean_str_list(self, value: Any) -> list[str]:
"""字段清洗:把模型返回的数组字段压成字符串列表。"""
if not isinstance(value, list):
return []
return [str(item).strip() for item in value if str(item).strip()]
def _clean_orders(self, value: Any) -> list[dict]:
"""推荐检查清洗:只保留 item_code 和 reason 两个前端需要的字段。"""
if not isinstance(value, list):
return []
cleaned = []
for item in value:
if not isinstance(item, dict):
continue
code = str(item.get("item_code", "")).strip()
reason = str(item.get("reason", "")).strip()
if code:
cleaned.append({"item_code": code, "reason": reason})
return cleaned
+280
View File
@@ -0,0 +1,280 @@
import asyncio
import json
import time
from collections.abc import AsyncIterator
from dataclasses import dataclass
import httpx
from app.core.config import settings
from app.core.exceptions import AppError
@dataclass
class LLMResponse:
"""LLM 响应:封装非流式模型输出和耗时指标。"""
content: str
model: str
latency_ms: int
token_usage: dict | None = None
@dataclass
class LLMStreamChunk:
"""LLM 流式片段:封装 SSE 增量内容和完成状态。"""
delta: str
done: bool = False
first_token_ms: int | None = None
total_latency_ms: int | None = None
model: str | None = None
fallback_used: bool = False
class OpenAICompatibleLLMClient:
"""LLM Adapter:统一封装 OpenAI-compatible 模型的可替换调用。"""
def __init__(self) -> None:
self.base_url = settings.llm_base_url.rstrip("/")
self.api_key = settings.llm_api_key
self.timeout = settings.llm_timeout_seconds
self.chat_completions_url = self._build_chat_completions_url()
@property
def is_mock_mode(self) -> bool:
"""模型模式:没有 API Key 或开启 mock 时使用本地模拟响应。"""
return settings.llm_mock_enabled or not self.api_key
async def chat(
self,
messages: list[dict],
model: str,
*,
thinking_enabled: bool | None = None,
reasoning_effort: str | None = None,
response_format: dict | None = None,
max_tokens: int | None = None,
) -> LLMResponse:
"""非流式调用:向 OpenAI-compatible 接口发送 messages 并返回完整文本。"""
start = time.perf_counter()
if self.is_mock_mode:
content = self._mock_response(messages)
return LLMResponse(content=content, model=f"mock-{model}", latency_ms=int((time.perf_counter() - start) * 1000))
try:
async with httpx.AsyncClient(timeout=self._http_timeout()) as client:
resp = await client.post(
self.chat_completions_url,
headers={"Authorization": f"Bearer {self.api_key}"},
json=self._build_payload(
model=model,
messages=messages,
stream=False,
thinking_enabled=thinking_enabled,
reasoning_effort=reasoning_effort,
response_format=response_format,
max_tokens=max_tokens,
),
)
resp.raise_for_status()
data = resp.json()
content = (data["choices"][0]["message"].get("content") or "").strip()
if not content:
raise KeyError("empty llm content")
return LLMResponse(
content=content,
model=model,
latency_ms=int((time.perf_counter() - start) * 1000),
token_usage=data.get("usage"),
)
except (httpx.TimeoutException, httpx.HTTPError, KeyError, IndexError, json.JSONDecodeError) as exc:
if settings.llm_fallback_to_mock:
content = self._mock_response(messages)
return LLMResponse(
content=content,
model=f"mock-fallback-{model}",
latency_ms=int((time.perf_counter() - start) * 1000),
token_usage={"fallback_reason": exc.__class__.__name__},
)
raise AppError("LLM_CALL_FAILED", "llm service call failed", 502) from exc
async def stream_chat(
self,
messages: list[dict],
model: str,
*,
thinking_enabled: bool | None = None,
reasoning_effort: str | None = None,
max_tokens: int | None = None,
) -> AsyncIterator[LLMStreamChunk]:
"""流式调用:以统一 chunk 结构输出 OpenAI-compatible SSE 增量。"""
start = time.perf_counter()
first_token_ms: int | None = None
if self.is_mock_mode:
async for chunk in self._mock_stream(messages, model, start, model_label=f"mock-{model}"):
yield chunk
return
try:
async with httpx.AsyncClient(timeout=self._http_timeout()) as client:
async with client.stream(
"POST",
self.chat_completions_url,
headers={"Authorization": f"Bearer {self.api_key}"},
json=self._build_payload(
model=model,
messages=messages,
stream=True,
thinking_enabled=thinking_enabled,
reasoning_effort=reasoning_effort,
max_tokens=max_tokens,
),
) as resp:
resp.raise_for_status()
async for line in resp.aiter_lines():
if not line.startswith("data:"):
continue
payload = line.removeprefix("data:").strip()
if payload == "[DONE]":
break
data = json.loads(payload)
delta_obj = data["choices"][0].get("delta", {})
content_delta = delta_obj.get("content") or ""
reasoning_delta = delta_obj.get("reasoning_content") or ""
if (content_delta or reasoning_delta) and first_token_ms is None:
first_token_ms = int((time.perf_counter() - start) * 1000)
if content_delta:
yield LLMStreamChunk(delta=content_delta, first_token_ms=first_token_ms)
except (httpx.TimeoutException, httpx.HTTPError, KeyError, IndexError, json.JSONDecodeError) as exc:
if settings.llm_fallback_to_mock:
async for chunk in self._mock_stream(
messages,
model,
start,
model_label=f"mock-fallback-{model}",
fallback_used=True,
):
yield chunk
return
raise AppError("LLM_STREAM_FAILED", "llm stream call failed", 502) from exc
yield LLMStreamChunk(
delta="",
done=True,
first_token_ms=first_token_ms,
total_latency_ms=int((time.perf_counter() - start) * 1000),
model=model,
)
async def _mock_stream(
self,
messages: list[dict],
model: str,
start: float,
model_label: str,
fallback_used: bool = False,
) -> AsyncIterator[LLMStreamChunk]:
"""Mock 流式输出:在模型不可用时保持 Demo 流程可验证。"""
first_token_ms: int | None = None
content = self._mock_response(messages)
for piece in self._split_mock_content(content):
await asyncio.sleep(0.02)
if first_token_ms is None:
first_token_ms = int((time.perf_counter() - start) * 1000)
yield LLMStreamChunk(delta=piece, first_token_ms=first_token_ms)
yield LLMStreamChunk(
delta="",
done=True,
first_token_ms=first_token_ms,
total_latency_ms=int((time.perf_counter() - start) * 1000),
model=model_label,
fallback_used=fallback_used,
)
def _mock_response(self, messages: list[dict]) -> str:
"""Mock 输出:在没有 DeepSeek Key 时保证 Demo 闭环可运行。"""
latest = next((m.get("content", "") for m in reversed(messages) if m.get("role") == "user"), "")
prompt_head = " ".join(m.get("content", "").lower() for m in messages[:2])
if "score_type" in prompt_head and "dimension_scores" in prompt_head:
return json.dumps(
{
"score_type": "percentage",
"total_score": 82,
"dimension_scores": [
{"dimension": "信息获取", "score": 20, "max_score": 25, "comment": "覆盖了发热、咳嗽和喘息,儿科特异性病史仍需加强。"},
{"dimension": "分析推理", "score": 21, "max_score": 25, "comment": "能够识别肺炎方向,鉴别诊断完整性中等。"},
{"dimension": "处置决策", "score": 17, "max_score": 20, "comment": "治疗原则基本合理,风险预案需要更具体。"},
{"dimension": "沟通人文", "score": 12, "max_score": 15, "comment": "有告知意识,家属安抚和健康教育可更系统。"},
{"dimension": "临床整合", "score": 12, "max_score": 15, "comment": "诊疗流程完整,时间分配和整体组织较清晰。"},
],
"errors": [{"title": "儿科特异性病史不足", "description": "疫苗接种、过敏史、既往喘息史追问不足。"}],
"improvement_plan": ["补充儿科问诊框架:出生史、接种史、过敏史、既往喘息史。"],
"evidence_summary": ["用户完成了核心症状追问、检查申请、诊断和治疗提交。"],
"guideline_refs": [],
"overall_comment": "本次训练完成主要诊疗流程,诊断方向正确,治疗方案具备基本可执行性。",
},
ensure_ascii=False,
)
if "体温" in latest or "发热" in latest:
return "最高烧到39度多,已经反复四天了,退烧后会好一点,但很快又起来。"
if "" in latest or "呼吸" in latest:
return "昨天开始喘得明显,活动后更明显,晚上咳嗽也更重。"
if "精神" in latest or "" in latest:
return "精神比平时差一些,吃饭少了,但还能喝水,小便比平时略少。"
if "既往" in latest or "过敏" in latest:
return "以前没有明确哮喘诊断,也没有药物过敏史,小时候感冒时偶尔会咳得久。"
return "家长:孩子主要是发热、咳嗽,昨天开始喘,您可以继续问我具体情况。"
def _split_mock_content(self, content: str) -> list[str]:
"""Mock 分片:把本地模拟文本拆成流式输出片段。"""
return [content[i : i + 8] for i in range(0, len(content), 8)]
def _build_chat_completions_url(self) -> str:
"""接口地址:兼容 base_url 和完整 chat/completions URL 两种写法。"""
if self.base_url.endswith("/chat/completions"):
return self.base_url
return f"{self.base_url}/chat/completions"
def _http_timeout(self) -> httpx.Timeout:
"""超时策略:限制连接、写入和读取等待,避免前端长时间卡在生成中。"""
return httpx.Timeout(
timeout=self.timeout,
connect=min(8, self.timeout),
read=self.timeout,
write=min(15, self.timeout),
pool=min(8, self.timeout),
)
def _build_payload(
self,
*,
model: str,
messages: list[dict],
stream: bool,
thinking_enabled: bool | None = None,
reasoning_effort: str | None = None,
response_format: dict | None = None,
max_tokens: int | None = None,
) -> dict:
"""请求构造:兼容 DeepSeek V4 thinking、reasoning_effort 和 JSON 输出。"""
payload: dict = {"model": model, "messages": messages, "stream": stream}
supports_reasoning_options = self._supports_reasoning_options(model)
if thinking_enabled is not None and supports_reasoning_options:
payload["thinking"] = {"type": "enabled" if thinking_enabled else "disabled"}
if reasoning_effort and supports_reasoning_options and thinking_enabled is not False:
payload["reasoning_effort"] = reasoning_effort
if response_format:
payload["response_format"] = response_format
if max_tokens:
payload["max_tokens"] = max_tokens
return payload
def _supports_reasoning_options(self, model: str) -> bool:
"""厂商兼容:只向 DeepSeek 发送 thinking/reasoning_effort 等专有参数。"""
base = self.base_url.lower()
model_name = model.lower()
return "deepseek" in base or model_name.startswith("deepseek")
DeepSeekClient = OpenAICompatibleLLMClient
+69
View File
@@ -0,0 +1,69 @@
from collections.abc import AsyncIterator
from app.agents.llm_adapter import LLMResponse, LLMStreamChunk
from app.agents.hint_agent import HintAgent
from app.agents.patient_agent import PatientAgent
from app.agents.report_agent import ReportAgent
from app.agents.scoring_agent import ScoringAgent
from app.models.source_case import CaseBase
from app.models.training import SessionOrder, SessionSubmission, TrainingSession
class MedicalConsultationOrchestrator:
"""主编排器:统一调度 Patient、Scoring、Report 等子 Agent。"""
def __init__(self) -> None:
self.patient_agent = PatientAgent()
self.hint_agent = HintAgent()
self.scoring_agent = ScoringAgent()
self.report_agent = ReportAgent()
async def patient_reply(self, session: TrainingSession, case: CaseBase, memory_messages: list[dict], message: str) -> LLMResponse:
"""问诊编排:调用 Patient Agent 生成 AI 病人回复。"""
return await self.patient_agent.reply(case, memory_messages, message, session.mode)
async def patient_stream_reply(
self,
session: TrainingSession,
case: CaseBase,
memory_messages: list[dict],
message: str,
) -> AsyncIterator[LLMStreamChunk]:
"""流式问诊编排:调用 Patient Agent 并返回流式片段。"""
async for chunk in self.patient_agent.stream_reply(case, memory_messages, message, session.mode):
yield chunk
async def evaluate(
self,
session: TrainingSession,
case: CaseBase,
memory_messages: list[dict],
orders: list[SessionOrder],
submission: SessionSubmission,
rubric: object | None,
guideline_refs: list[dict],
scoring_rules: list | None = None,
) -> dict:
"""评价编排:调用 Scoring Agent 后交给 Report Agent 整理报告。"""
scoring_result = await self.scoring_agent.score(
session=session,
case=case,
memory_messages=memory_messages,
orders=orders,
submission=submission,
rubric=rubric,
guideline_refs=guideline_refs,
scoring_rules=scoring_rules or [],
)
return self.report_agent.build_report(scoring_result)
async def generate_hints(
self,
session: TrainingSession,
case: CaseBase,
memory_messages: list[dict],
orders: list[SessionOrder],
last_user_message: str | None = None,
) -> dict:
"""新手提示编排:基于当前会话上下文生成轻量训练提醒。"""
return await self.hint_agent.generate(session, case, memory_messages, orders, last_user_message)
+76
View File
@@ -0,0 +1,76 @@
from collections.abc import AsyncIterator
from app.agents.llm_adapter import DeepSeekClient, LLMResponse, LLMStreamChunk
from app.core.config import settings
from app.models.source_case import CaseBase
class PatientAgent:
"""AI 病人:根据病例资料、隐藏信息和短期 memory 回复医生问诊。"""
def __init__(self, llm: DeepSeekClient | None = None) -> None:
self.llm = llm or DeepSeekClient()
async def reply(self, case: CaseBase, memory_messages: list[dict], user_message: str, mode: str) -> LLMResponse:
"""问诊回复:拼接病例上下文、短期记忆和用户输入后调用 Patient Agent。"""
messages = self._build_messages(case, memory_messages, user_message, mode)
return await self.llm.chat(
messages,
settings.llm_fast_model,
thinking_enabled=settings.llm_fast_thinking_enabled,
max_tokens=settings.llm_fast_max_tokens,
)
async def stream_reply(
self,
case: CaseBase,
memory_messages: list[dict],
user_message: str,
mode: str,
) -> AsyncIterator[LLMStreamChunk]:
"""流式问诊:以 SSE 方式返回 AI 病人增量回复。"""
messages = self._build_messages(case, memory_messages, user_message, mode)
async for chunk in self.llm.stream_chat(
messages,
settings.llm_fast_model,
thinking_enabled=settings.llm_fast_thinking_enabled,
max_tokens=settings.llm_fast_max_tokens,
):
yield chunk
def _build_messages(self, case: CaseBase, memory_messages: list[dict], user_message: str, mode: str) -> list[dict]:
"""提示词拼接:构造 AI 病人的系统提示词和对话历史。"""
profile = case.ai_patient_profile or {}
hidden_info = case.hidden_patient_info or {}
mode_rule = {
"novice": "新手模式:回答清楚,必要时可提示医生继续追问症状、既往史或检查。",
"practice": "练习模式:只回答被问到的信息,不主动给诊断建议。",
"teaching": "教学模式:保持患者身份,允许在回答后补充简短学习提示。",
}.get(mode, "只回答被问到的信息。")
system = f"""
你是一名标准化 AI 病人或患儿家属,只能基于病例资料回答。
病例主诉:{case.chief_complaint}
患者人设:{profile}
隐藏信息:{hidden_info}
回答规则:
1. 不主动透露未被问到的隐藏信息。
2. 不替医生做诊断,不提供治疗方案。
3. 不编造病例外检查检验结果。
4. 每次回答控制在1到3句话,使用患儿家属口吻,不输出分析过程。
5. 只输出给医生看的家属回答纯文本,不输出 JSON、Markdown、标题、解释或思考过程。
6. 如果医生一次问多个问题,按问题顺序简短回答,不扩展病例外信息。
7. {mode_rule}
"""
messages = [{"role": "system", "content": system.strip()}]
messages.extend(self._to_llm_history(memory_messages[-12:]))
messages.append({"role": "user", "content": user_message})
return messages
def _to_llm_history(self, memory_messages: list[dict]) -> list[dict]:
"""历史转换:把业务角色 doctor/patient 转换为 LLM role。"""
role_map = {"doctor": "user", "patient": "assistant", "system": "system", "tool": "assistant"}
return [
{"role": role_map.get(item.get("role"), "user"), "content": item.get("content", "")}
for item in memory_messages
if item.get("content")
]
+48
View File
@@ -0,0 +1,48 @@
class ReportAgent:
"""报告 Agent:整理评分结果为接口和 PDF 可复用的报告结构。"""
def build_report(self, scoring_result: dict) -> dict:
"""报告整理:校验评分结果字段并补齐展示默认值,不重新评分。"""
dimension_scores = self._normalize_dimension_scores(scoring_result.get("dimension_scores", []))
total_score = self._safe_float(scoring_result.get("total_score"), 0)
return {
"score_type": scoring_result.get("score_type", "percentage"),
"total_score": total_score,
"dimension_scores": dimension_scores,
"errors": self._ensure_list(scoring_result.get("errors")),
"improvement_plan": self._ensure_list(scoring_result.get("improvement_plan")),
"evidence_summary": self._ensure_list(scoring_result.get("evidence_summary")),
"guideline_refs": self._ensure_list(scoring_result.get("guideline_refs")),
"overall_comment": scoring_result.get("overall_comment", ""),
"_llm_model": scoring_result.get("_llm_model"),
"_latency_metrics": scoring_result.get("_latency_metrics", {}),
}
def _normalize_dimension_scores(self, raw_scores: object) -> list[dict]:
"""维度校验:把模型输出归一为前端和数据库可保存的评分列表。"""
if not isinstance(raw_scores, list):
return []
normalized: list[dict] = []
for item in raw_scores:
if not isinstance(item, dict):
continue
normalized.append(
{
"dimension": str(item.get("dimension", "未命名维度")),
"score": self._safe_float(item.get("score"), 0),
"max_score": self._safe_float(item.get("max_score"), 0),
"comment": str(item.get("comment", "")),
}
)
return normalized
def _ensure_list(self, value: object) -> list:
"""列表校验:保证报告中的数组字段稳定返回 list。"""
return value if isinstance(value, list) else []
def _safe_float(self, value: object, default: float) -> float:
"""数值校验:把模型输出中的分数安全转换为 float。"""
try:
return float(value)
except (TypeError, ValueError):
return default
+369
View File
@@ -0,0 +1,369 @@
import json
import logging
import re
import time
from typing import Any
from app.agents.llm_adapter import DeepSeekClient
from app.core.config import settings
from app.core.exceptions import AppError
from app.models.source_case import CaseBase
from app.models.training import SessionOrder, SessionSubmission, TrainingSession
logger = logging.getLogger(__name__)
class ScoringAgent:
"""评分 Agent:结合病例、问诊过程、检查结果、提交内容和 scoring_rule 生成结构化评价。"""
def __init__(self, llm: DeepSeekClient | None = None) -> None:
self.llm = llm or DeepSeekClient()
async def score(
self,
session: TrainingSession,
case: CaseBase,
memory_messages: list[dict],
orders: list[SessionOrder],
submission: SessionSubmission,
rubric: object | None,
guideline_refs: list[dict],
scoring_rules: list | None = None,
) -> dict:
"""评价生成:优先调用快速模型输出 JSON,失败时返回稳定兜底评分。"""
start = time.perf_counter()
messages = self._build_messages(session, case, memory_messages, orders, submission, guideline_refs, scoring_rules or [])
try:
response = await self.llm.chat(
messages,
settings.llm_fast_model,
thinking_enabled=settings.llm_fast_thinking_enabled,
reasoning_effort=None,
response_format={"type": "json_object"} if settings.llm_scoring_json_response else None,
max_tokens=min(settings.llm_scoring_max_tokens, 1800),
)
data = json.loads(response.content)
data = self._normalize_score_payload(data, session.score_type, guideline_refs)
data["_llm_model"] = response.model
data["_latency_metrics"] = {"scoring_latency_ms": response.latency_ms, "fallback_used": False}
return data
except (AppError, json.JSONDecodeError, KeyError, TypeError, ValueError) as exc:
logger.warning("scoring_agent.fallback session_id=%s error=%s", session.id, exc.__class__.__name__)
data = self._fallback_score(session.score_type, guideline_refs)
data["_llm_model"] = f"local-fallback-{settings.llm_fast_model}"
data["_latency_metrics"] = {
"scoring_latency_ms": int((time.perf_counter() - start) * 1000),
"fallback_used": True,
"fallback_reason": exc.__class__.__name__,
}
return data
def _build_messages(
self,
session: TrainingSession,
case: CaseBase,
memory_messages: list[dict],
orders: list[SessionOrder],
submission: SessionSubmission,
guideline_refs: list[dict],
scoring_rules: list,
) -> list[dict]:
"""评分提示词:只传评价必需字段,避免旧表冗余字段和长病历全文拖慢生成。"""
transcript_summary = [
{"role": item.get("role"), "content": self._truncate(item.get("content", ""), 220)}
for item in memory_messages[-14:]
if item.get("content")
]
ordered_payload = [
{
"item_code": order.item_code,
"item_name": order.item_name,
"result_text": self._truncate(order.result_text, 180),
"is_key": order.is_key,
"is_abnormal": order.is_abnormal,
}
for order in orders
]
payload = {
"score_type": session.score_type,
"case": {
"title": case.title,
"chief_complaint": case.chief_complaint,
"standard_diagnosis": case.diagnosis_primary,
"diagnosis_basis": self._truncate(case.diagnosis_basis, 260),
"key_symptoms": case.key_symptoms or [],
"key_exams": case.key_exams or [],
"key_points": case.key_points or [],
},
"conversation": transcript_summary,
"orders": ordered_payload,
"submission": {
"primary_diagnosis": submission.primary_diagnosis,
"differential_diagnoses": submission.differential_diagnoses or [],
"diagnosis_basis": self._truncate(submission.diagnosis_basis, 320),
"treatment_principle": self._truncate(submission.treatment_principle, 220),
"treatment_measures": self._truncate(submission.treatment_measures, 320),
"risk_plan": self._truncate(submission.risk_plan, 220),
"communication": self._truncate(submission.communication, 220),
"follow_up": self._truncate(submission.follow_up, 180),
},
"scoring_rules": self._compact_scoring_rules(scoring_rules),
"guidelines": self._compact_guidelines(guideline_refs),
}
system = (
"你是医学教学问诊评分专家,只输出合法 JSON。"
"请结合病例、问诊过程、检查申请、诊断和治疗提交进行教学评价。"
"输出字段固定为 score_type,total_score,dimension_scores,errors,improvement_plan,"
"evidence_summary,guideline_refs,overall_comment。"
"dimension_scores 为 5-6 项,每项包含 dimension,score,max_score,comment,evidence,deductions,improvement。"
"evidence、deductions、improvement_plan、evidence_summary 必须是数组,每个元素一句话。"
"errors 每项包含 title,description,severity,related_dimension。"
"评价必须具体指出用户问了什么、申请了什么检查、诊断治疗哪里充分或不足。"
"报告仅用于教学训练,不替代真实临床诊疗。"
)
return [{"role": "system", "content": system}, {"role": "user", "content": json.dumps(payload, ensure_ascii=False)}]
def _compact_scoring_rules(self, scoring_rules: list) -> list[dict]:
"""源库评分规则压缩:把 scoring_rule 转为评分 Agent 可直接使用的细则。"""
compact = []
for item in scoring_rules[:12]:
compact.append(
{
"dimension": getattr(item, "dimension", ""),
"competency_dimension": getattr(item, "competency_dimension", ""),
"score_weight": float(getattr(item, "score_weight", 0) or 0),
"ai_auto_score": bool(getattr(item, "ai_auto_score", True)),
"osce_dimension": bool(getattr(item, "osce_dimension", False)),
"scoring_standard": self._truncate(getattr(item, "scoring_standard", ""), 240),
"rubric_json": getattr(item, "rubric_json", {}) or {},
}
)
return compact
def _compact_guidelines(self, guideline_refs: list[dict]) -> list[dict]:
"""参考指南压缩:保留少量命中来源,供模型引用。"""
compact = []
for item in (guideline_refs or [])[:3]:
if isinstance(item, dict):
compact.append(
{
"title": item.get("title") or item.get("source"),
"content": self._truncate(item.get("content") or item.get("chunk"), 180),
}
)
return compact
def _normalize_score_payload(self, data: dict, score_type: str, guideline_refs: list[dict]) -> dict:
"""评分结构校验:补齐缺失字段,并拆分模型返回的长编号列表。"""
if not isinstance(data, dict):
data = self._fallback_score(score_type, guideline_refs)
data.setdefault("score_type", "percentage")
data.setdefault("total_score", 0)
data.setdefault("dimension_scores", [])
data.setdefault("errors", [])
data.setdefault("improvement_plan", [])
data.setdefault("evidence_summary", [])
data.setdefault("guideline_refs", guideline_refs)
data.setdefault("overall_comment", "")
normalized_dimensions = []
for item in data.get("dimension_scores") or []:
if not isinstance(item, dict):
continue
normalized_dimensions.append(
{
"dimension": str(item.get("dimension") or "未命名维度"),
"score": float(item.get("score") or 0),
"max_score": float(item.get("max_score") or (100 if data.get("score_type") == "percentage" else 5)),
"comment": self._truncate(item.get("comment") or "", 180),
"evidence": self._ensure_list(item.get("evidence")),
"deductions": self._ensure_list(item.get("deductions")),
"improvement": self._truncate(item.get("improvement") or "", 180),
}
)
data["dimension_scores"] = normalized_dimensions or self._fallback_score("percentage", guideline_refs)["dimension_scores"]
data["errors"] = self._normalize_errors(data.get("errors"))
data["improvement_plan"] = self._ensure_list(data.get("improvement_plan"))
data["evidence_summary"] = self._ensure_list(data.get("evidence_summary"))
data["guideline_refs"] = self._normalize_guideline_refs(data.get("guideline_refs"), guideline_refs)
data["overall_comment"] = self._truncate(data.get("overall_comment") or "", 260)
if score_type == "five_point" and data.get("score_type") != "five_point":
return self._convert_to_five_point(data)
if score_type == "percentage" and data.get("score_type") != "percentage":
data["score_type"] = "percentage"
return data
def _normalize_errors(self, errors: object) -> list[dict]:
"""错误项归一化:转为报告可渲染的扣分项。"""
normalized = []
for index, item in enumerate(self._ensure_list(errors), start=1):
if isinstance(item, dict):
normalized.append(
{
"title": self._truncate(item.get("title") or f"问题 {index}", 60),
"description": self._truncate(item.get("description") or item.get("comment") or "", 180),
"severity": item.get("severity") or "medium",
"related_dimension": item.get("related_dimension") or item.get("dimension") or "综合表现",
}
)
else:
normalized.append(
{
"title": f"问题 {index}",
"description": self._truncate(str(item), 180),
"severity": "medium",
"related_dimension": "综合表现",
}
)
return normalized[:6]
def _normalize_guideline_refs(self, value: object, fallback_refs: list[dict]) -> list[dict]:
"""指南引用归一化:保证字符串、字典或空值都能转成字典数组。"""
raw_items = value if isinstance(value, list) else ([value] if value else fallback_refs)
normalized = []
for index, item in enumerate(raw_items or [], start=1):
if isinstance(item, dict):
normalized.append(
{
"title": self._truncate(item.get("title") or item.get("source") or f"参考依据 {index}", 80),
"content": self._truncate(
item.get("content") or item.get("text") or item.get("chunk") or item.get("summary") or "",
180,
),
"source": self._truncate(item.get("source") or item.get("source_type") or "knowledge_base", 80),
}
)
elif item:
normalized.append({"title": f"参考依据 {index}", "content": self._truncate(str(item), 180), "source": "llm_output"})
return normalized[:6]
def _ensure_list(self, value: object) -> list:
"""列表规整:拆分模型常见的 1/2/3 编号长文本。"""
if value is None:
return []
raw_items = value if isinstance(value, list) else [value]
items: list = []
for raw in raw_items:
if raw is None:
continue
if isinstance(raw, dict):
items.append(raw)
continue
text = str(raw).strip()
if not text:
continue
parts = re.split(r"(?:^|\s)(?:\d+[\.\、)]|[;]\d+[;])\s*", text)
parts = [part.strip(";、\n\t ") for part in parts if part and part.strip(";、\n\t ")]
items.extend(parts if len(parts) > 1 else [text])
return [self._truncate(item, 180) if not isinstance(item, dict) else item for item in items[:8]]
def _fallback_score(self, score_type: str, guideline_refs: list[dict]) -> dict:
"""评分兜底:LLM 失败时生成稳定结构化评分,保证报告流程可展示。"""
data = {
"score_type": "percentage",
"total_score": 80,
"dimension_scores": [
{
"dimension": "信息获取",
"score": 20,
"max_score": 25,
"comment": "完成主要症状追问,但儿科专科病史仍需补充。",
"evidence": ["围绕发热、咳嗽、喘息等核心症状展开问诊。"],
"deductions": ["既往喘息史、过敏史、疫苗接种史、家属照护能力等信息不够完整。"],
"improvement": "下一次按主诉、现病史、既往史、个人史、家族史和家属顾虑逐项补全。",
},
{
"dimension": "分析推理",
"score": 16,
"max_score": 20,
"comment": "诊断方向基本正确,但严重程度分层需要更清晰。",
"evidence": ["主要诊断指向支气管肺炎。"],
"deductions": ["鉴别诊断和严重程度判断未充分引用血氧、胸片和炎症指标。"],
"improvement": "把症状、体征、影像、血氧和炎症指标逐条对应到诊断依据和病情分层。",
},
{
"dimension": "检查利用",
"score": 12,
"max_score": 15,
"comment": "关键检查申请较完整,但检查结果解释仍可细化。",
"evidence": ["胸片、血氧或炎症指标可支持肺炎诊断和严重程度判断。"],
"deductions": ["对 SpO2、胸片异常和炎症指标的临床意义解释不够具体。"],
"improvement": "在诊断依据中写明每项异常检查如何支持诊断、鉴别诊断和治疗决策。",
},
{
"dimension": "处置决策",
"score": 16,
"max_score": 20,
"comment": "治疗原则基本完整,仍需补充监测和升级处理条件。",
"evidence": ["治疗原则覆盖抗感染、止咳平喘、改善氧合和观察病情。"],
"deductions": ["药物选择、门诊/住院判断、风险预案和随访节点不够具体。"],
"improvement": "补充治疗适应证、监测指标、病情加重预案和复诊/住院指征。",
},
{
"dimension": "沟通人文",
"score": 8,
"max_score": 10,
"comment": "有基本沟通意识,但家属顾虑回应不够结构化。",
"evidence": ["治疗方案中包含向家属说明病情和复诊指征。"],
"deductions": ["知情同意、家属担心、用药注意事项和家庭护理教育仍需补充。"],
"improvement": "增加对家属顾虑的回应、危险信号说明和家庭护理指导。",
},
{
"dimension": "临床整合",
"score": 8,
"max_score": 10,
"comment": "完成训练闭环,证据衔接仍需强化。",
"evidence": ["完成问诊、检查、诊断、治疗和评价流程。"],
"deductions": ["各阶段证据之间的逻辑衔接和 SOAP 化表达不够清晰。"],
"improvement": "用 SOAP 结构归纳病情,把证据、判断和计划串联起来。",
},
],
"errors": [
{
"title": "信息采集不够系统",
"description": "儿科特异性病史追问不足,影响对喘息诱因、感染风险和肺炎严重程度的判断。",
"severity": "medium",
"related_dimension": "信息获取",
}
],
"improvement_plan": [
"补充疫苗接种史、过敏史、既往喘息史、近期接触史和家庭照护能力评估。",
"诊断依据中逐条引用发热、咳嗽、喘息、肺部体征、胸片、血氧和炎症指标。",
"治疗方案中补充监测指标、病情加重预案、复诊/住院指征和家属健康教育。",
"沟通环节增加家属担心回应、知情说明和家庭护理要点。",
],
"evidence_summary": [
"问诊:已完成核心症状追问,但儿科特异性病史仍需补全。",
"检查:需重点评价是否申请并利用胸片、血氧和炎症指标。",
"诊断:主要诊断方向基本正确,鉴别诊断和严重程度分层需要更完整。",
"治疗:治疗原则基本覆盖抗感染、平喘、氧合和观察。",
"沟通:已有基本告知意识,仍需加强家属顾虑回应和健康教育。",
],
"guideline_refs": guideline_refs,
"overall_comment": "本次训练完成核心闭环,诊断和处理方向基本正确;后续需要强化问诊结构化、检查结果利用和治疗细化。",
}
return self._convert_to_five_point(data) if score_type == "five_point" else data
def _convert_to_five_point(self, data: dict) -> dict:
"""分数转换:将百分制评价转换为五分制,同时保留证据、扣分和改进细则。"""
converted = dict(data)
converted["score_type"] = "five_point"
converted["total_score"] = round(float(data.get("total_score", 0)) / 20, 1)
converted["dimension_scores"] = [
{
**item,
"score": round(float(item.get("score", 0)) / float(item.get("max_score") or 100) * 5, 1),
"max_score": 5,
}
for item in data.get("dimension_scores", [])
]
return converted
def _truncate(self, value: Any, limit: int) -> str:
"""文本截断:限制模型输入和报告字段长度,降低评分耗时并稳定排版。"""
if value is None:
return ""
text = str(value).strip()
return text if len(text) <= limit else f"{text[:limit]}..."