Files

54 lines
2.0 KiB
Python
Raw Permalink Normal View History

import asyncio
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from app.agents.llm_adapter import OpenAICompatibleLLMClient
from app.agents.patient_agent import PatientAgent
from app.core.config import settings
from app.db.session import SessionLocal
from app.repositories.case_repository import CaseRepository
async def main() -> None:
"""本地调试:直接调用 Patient Agent 流式回复,绕过前端和 FastAPI。"""
client = OpenAICompatibleLLMClient()
print(f"mock_mode={client.is_mock_mode}")
print(f"fast_model={settings.llm_fast_model}")
print(f"fast_thinking={settings.llm_fast_thinking_enabled}")
print(f"stream_first_token_timeout={settings.llm_stream_first_token_timeout_seconds}")
print(f"stream_total_timeout={settings.llm_stream_total_timeout_seconds}")
db = SessionLocal()
try:
case = CaseRepository(db).list_active_cases()[0]
text = ""
first_token_ms = None
done_seen = False
async for chunk in PatientAgent().stream_reply(case, [], "孩子发热几天了?最高体温多少?", "novice"):
if first_token_ms is None and chunk.first_token_ms is not None:
first_token_ms = chunk.first_token_ms
if chunk.done:
done_seen = True
print(f"done_seen={done_seen}")
print(f"first_token_ms={first_token_ms}")
print(f"total_latency_ms={chunk.total_latency_ms}")
print(f"model={chunk.model}")
print(f"fallback_used={chunk.fallback_used}")
print(f"text_len={len(text)}")
print(f"text_preview={text[:30]}")
break
text += chunk.delta
if not done_seen:
print("done_seen=False")
print(f"text_len={len(text)}")
print(f"text_preview={text[:30]}")
raise SystemExit(1)
finally:
db.close()
if __name__ == "__main__":
asyncio.run(main())