finalize medical consultation agent backend

2026-06-03 15:51:46 +08:00
parent 93d9e1c6a5
commit eb43573a44
33 changed files with 1063 additions and 281 deletions
@@ -0,0 +1,130 @@
+from __future__ import annotations
+
+import json
+import sys
+from pathlib import Path
+from typing import Any
+
+from sqlalchemy import text
+from sqlalchemy.exc import SQLAlchemyError
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+
+from app.core.config import settings
+from app.db.session import SessionLocal
+from scripts.check_final_schema import build_schema_report
+
+
+COUNT_SQL = {
+    "departments": "SELECT COUNT(*) FROM department",
+    "active_cases": "SELECT COUNT(*) FROM case_base WHERE status = 1 AND publish_status = 1",
+    "traditional_cases": "SELECT COUNT(*) FROM traditional_case",
+    "teaching_cases": "SELECT COUNT(*) FROM teaching_case",
+    "exam_items": "SELECT COUNT(*) FROM case_exam_item",
+    "scoring_rules": "SELECT COUNT(*) FROM scoring_rule",
+    "active_prompt_templates": "SELECT COUNT(*) FROM prompt_templates WHERE is_active = 1",
+    "knowledge_sources": "SELECT COUNT(*) FROM knowledge_sources",
+    "knowledge_documents": "SELECT COUNT(*) FROM knowledge_documents",
+    "knowledge_chunks": "SELECT COUNT(*) FROM knowledge_chunks",
+}
+
+
+def main() -> None:
+    """最终就绪检查：校验当前 Demo 功能运行所需的结构、基础数据、提示词和配置。"""
+    try:
+        report = build_readiness_report()
+    except SQLAlchemyError as exc:
+        print(
+            json.dumps(
+                {
+                    "ready": False,
+                    "error": "database operation failed",
+                    "detail": str(exc).splitlines()[0],
+                },
+                ensure_ascii=False,
+                indent=2,
+            )
+        )
+        raise SystemExit(2) from exc
+
+    print(json.dumps(report, ensure_ascii=False, indent=2))
+    if not report["summary"]["ready"]:
+        raise SystemExit(1)
+
+
+def build_readiness_report() -> dict[str, Any]:
+    """就绪报告：聚合数据库结构、基础业务数据、提示词文件和关键环境配置。"""
+    schema_report = build_schema_report()
+    counts = _collect_counts()
+    prompt_files = _prompt_files()
+    config = _public_config()
+
+    critical_checks = {
+        "schema_complete": schema_report["summary"]["can_run_demo"],
+        "has_active_cases": counts["active_cases"] > 0,
+        "has_department": counts["departments"] > 0,
+        "has_case_detail": counts["traditional_cases"] + counts["teaching_cases"] > 0,
+        "has_exam_items": counts["exam_items"] > 0,
+        "has_scoring_rules": counts["scoring_rules"] > 0,
+        "has_prompt_templates": counts["active_prompt_templates"] > 0,
+        "has_prompt_files": len(prompt_files) > 0,
+        "auth_user_center_configured": bool(settings.auth_user_me_url),
+    }
+    warnings = []
+    if not settings.llm_api_key:
+        warnings.append("LLM_API_KEY is not configured; real LLM calls will not work unless mock is enabled.")
+    if counts["knowledge_chunks"] == 0:
+        warnings.append("knowledge_chunks is empty; scoring can run but guideline reference retrieval has no data.")
+    missing_indexes = schema_report["summary"].get("missing_indexes") or []
+    if missing_indexes:
+        warnings.append("Some recommended indexes are missing; functions can run, but high-concurrency query performance may be affected.")
+
+    return {
+        "summary": {
+            "ready": all(critical_checks.values()),
+            "critical_checks": critical_checks,
+            "warnings": warnings,
+        },
+        "database": {
+            "dialect": schema_report["database_dialect"],
+            "counts": counts,
+            "schema_summary": schema_report["summary"],
+        },
+        "prompts": {
+            "markdown_count": len(prompt_files),
+            "files": prompt_files,
+        },
+        "config": config,
+    }
+
+
+def _collect_counts() -> dict[str, int]:
+    """数据计数：统计 Demo 闭环运行依赖的基础数据。"""
+    with SessionLocal() as db:
+        return {name: int(db.execute(text(sql)).scalar() or 0) for name, sql in COUNT_SQL.items()}
+
+
+def _prompt_files() -> list[str]:
+    """提示词检查：读取 prompts 目录下所有 Markdown 模板。"""
+    prompt_root = Path(__file__).resolve().parents[1] / "app" / "prompts"
+    return sorted(str(path.relative_to(prompt_root)).replace("\\", "/") for path in prompt_root.rglob("*.md"))
+
+
+def _public_config() -> dict[str, Any]:
+    """配置摘要：只输出可公开的配置状态，不暴露密钥。"""
+    return {
+        "auth_validate_enabled": settings.auth_validate_enabled,
+        "auth_user_me_url_configured": bool(settings.auth_user_me_url),
+        "llm_base_url_configured": bool(settings.llm_base_url),
+        "llm_model": settings.llm_model,
+        "llm_fast_model": settings.llm_fast_model,
+        "llm_reason_model": settings.llm_reason_model,
+        "llm_api_key_configured": bool(settings.llm_api_key),
+        "llm_mock_enabled": settings.llm_mock_enabled,
+        "runtime_memory_backend": settings.runtime_memory_backend,
+        "redis_url_configured": bool(settings.redis_url),
+    }
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,240 @@
+from __future__ import annotations
+
+import json
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+
+from sqlalchemy import inspect
+from sqlalchemy.exc import SQLAlchemyError
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+
+from app.db.session import engine
+
+
+@dataclass(frozen=True)
+class TableSpec:
+    """最终表结构规格：定义当前医疗问诊 Agent 完整功能所需的表、字段和关键索引。"""
+
+    columns: tuple[str, ...]
+    indexed_columns: tuple[str, ...] = ()
+
+
+REQUIRED_SCHEMA: dict[str, TableSpec] = {
+    "user": TableSpec(
+        columns=(
+            "id",
+            "username",
+            "real_name",
+            "phone",
+            "role_type",
+            "department_id",
+            "institution_id",
+            "competency_profile",
+            "weak_dimensions",
+            "strong_dimensions",
+            "ai_preference",
+            "total_training_count",
+            "total_case_count",
+            "current_level",
+            "status",
+        ),
+        indexed_columns=("id", "username", "phone", "department_id", "institution_id"),
+    ),
+    "department": TableSpec(
+        columns=("id", "name", "category", "institution_id", "created_at", "updated_at"),
+        indexed_columns=("id", "institution_id"),
+    ),
+    "case_base": TableSpec(
+        columns=(
+            "id",
+            "title",
+            "case_type",
+            "difficulty",
+            "chief_complaint",
+            "description",
+            "patient_age",
+            "patient_gender",
+            "publish_status",
+            "status",
+            "department_id",
+        ),
+        indexed_columns=("id", "case_type", "difficulty", "publish_status", "status", "department_id"),
+    ),
+    "traditional_case": TableSpec(
+        columns=("id", "case_id", "standard_diagnosis", "standard_treatment", "guideline_reference"),
+        indexed_columns=("id", "case_id"),
+    ),
+    "teaching_case": TableSpec(
+        columns=("id", "case_id", "teaching_goal", "discussion_questions", "teacher_guide", "scoring_focus"),
+        indexed_columns=("id", "case_id"),
+    ),
+    "scoring_rule": TableSpec(
+        columns=("id", "case_id", "dimension", "competency_dimension", "score_weight", "scoring_standard", "rubric_json"),
+        indexed_columns=("id", "case_id", "dimension", "competency_dimension"),
+    ),
+    "case_exam_item": TableSpec(
+        columns=("id", "case_id", "item_code", "item_name", "item_type", "result_text", "is_key", "is_abnormal"),
+        indexed_columns=("id", "case_id", "item_code", "item_type"),
+    ),
+    "training_session": TableSpec(
+        columns=(
+            "id",
+            "session_code",
+            "external_user_id",
+            "case_id",
+            "case_type",
+            "training_mode",
+            "score_type",
+            "status",
+            "memory_key",
+        ),
+        indexed_columns=("id", "session_code", "external_user_id", "case_id", "training_mode", "status"),
+    ),
+    "training_order": TableSpec(
+        columns=("id", "session_id", "external_user_id", "case_id", "exam_item_id", "item_code", "result_text", "ordered_at"),
+        indexed_columns=("id", "session_id", "external_user_id", "case_id"),
+    ),
+    "training_submission": TableSpec(
+        columns=("id", "session_id", "external_user_id", "primary_diagnosis", "treatment_measures"),
+        indexed_columns=("id", "external_user_id"),
+    ),
+    "training_record": TableSpec(
+        columns=(
+            "id",
+            "user_id",
+            "external_user_id",
+            "session_id",
+            "case_id",
+            "total_score",
+            "ai_feedback_structured",
+            "pdf_file_path",
+        ),
+        indexed_columns=("id", "user_id", "external_user_id", "session_id", "case_id"),
+    ),
+    "training_score_detail": TableSpec(
+        columns=(
+            "id",
+            "record_id",
+            "rule_id",
+            "dimension",
+            "score",
+            "deducted_reason",
+            "evidence_message_ids",
+            "ai_confidence",
+            "comment",
+            "created_at",
+            "updated_at",
+        ),
+        indexed_columns=("id", "record_id", "rule_id", "dimension"),
+    ),
+    "prompt_templates": TableSpec(
+        columns=("id", "template_code", "agent_type", "scene", "version_no", "model_type", "output_format", "file_path", "is_active"),
+        indexed_columns=("id", "template_code", "agent_type", "scene"),
+    ),
+    "knowledge_sources": TableSpec(
+        columns=("id", "source_code", "source_name", "source_type", "authority_level", "is_active"),
+        indexed_columns=("id", "source_code", "source_type"),
+    ),
+    "knowledge_documents": TableSpec(
+        columns=("id", "source_id", "department_id", "title", "task_type", "file_path", "is_active"),
+        indexed_columns=("id", "source_id", "department_id", "task_type"),
+    ),
+    "knowledge_chunks": TableSpec(
+        columns=("id", "document_id", "department_id", "task_type", "chunk_text", "keywords", "weight", "is_active"),
+        indexed_columns=("id", "document_id", "department_id", "task_type"),
+    ),
+    "audit_logs": TableSpec(
+        columns=("id", "user_id", "tenant_id", "session_id", "action", "resource_type", "request_id", "created_at"),
+        indexed_columns=("id", "user_id", "session_id", "action", "created_at"),
+    ),
+}
+
+
+def main() -> None:
+    """最终结构检查：只读校验当前 Agent 完整功能所需数据库结构。"""
+    try:
+        report = build_schema_report()
+    except SQLAlchemyError as exc:
+        print(
+            json.dumps(
+                {
+                    "database_dialect": engine.dialect.name,
+                    "summary": {
+                        "can_run_demo": False,
+                        "database_available": False,
+                        "error": "database connection failed",
+                        "detail": str(exc).splitlines()[0],
+                    },
+                },
+                ensure_ascii=False,
+                indent=2,
+            )
+        )
+        raise SystemExit(2) from exc
+    print(json.dumps(report, ensure_ascii=False, indent=2))
+    if not report["summary"]["can_run_demo"]:
+        raise SystemExit(1)
+
+
+def build_schema_report() -> dict[str, Any]:
+    """结构报告：检查必需表、字段和关键索引，不修改数据库。"""
+    inspector = inspect(engine)
+    existing_tables = set(inspector.get_table_names())
+    tables: dict[str, Any] = {}
+    missing_tables: list[str] = []
+    missing_columns: list[str] = []
+    missing_indexes: list[str] = []
+
+    for table_name, spec in REQUIRED_SCHEMA.items():
+        if table_name not in existing_tables:
+            missing_tables.append(table_name)
+            tables[table_name] = {"exists": False}
+            continue
+
+        actual_columns = {column["name"] for column in inspector.get_columns(table_name)}
+        actual_indexes = _indexed_columns(inspector, table_name)
+        table_missing_columns = [name for name in spec.columns if name not in actual_columns]
+        table_missing_indexes = [name for name in spec.indexed_columns if name not in actual_indexes]
+
+        missing_columns.extend(f"{table_name}.{column_name}" for column_name in table_missing_columns)
+        missing_indexes.extend(f"{table_name}.{column_name}" for column_name in table_missing_indexes)
+
+        tables[table_name] = {
+            "exists": True,
+            "missing_columns": table_missing_columns,
+            "missing_indexes": table_missing_indexes,
+        }
+
+    can_run_demo = not missing_tables and not missing_columns
+    return {
+        "database_dialect": engine.dialect.name,
+        "identity_rule": "Authorization -> Django /api/user/users/me/ -> data.id -> user isolation fields",
+        "tables": tables,
+        "summary": {
+            "checked_tables": len(REQUIRED_SCHEMA),
+            "missing_tables": missing_tables,
+            "missing_columns": missing_columns,
+            "missing_indexes": missing_indexes,
+            "index_warning": bool(missing_indexes),
+            "can_run_demo": can_run_demo,
+        },
+    }
+
+
+def _indexed_columns(inspector, table_name: str) -> set[str]:
+    """索引读取：汇总普通索引、唯一索引和主键覆盖的字段。"""
+    indexed: set[str] = set()
+    primary_key = inspector.get_pk_constraint(table_name) or {}
+    indexed.update(primary_key.get("constrained_columns") or [])
+    for index in inspector.get_indexes(table_name):
+        indexed.update(index.get("column_names") or [])
+    for unique in inspector.get_unique_constraints(table_name):
+        indexed.update(unique.get("column_names") or [])
+    return indexed
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,75 @@
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+from sqlalchemy import text
+
+from app.core.config import settings
+from app.db.session import SessionLocal
+
+
+TRAINING_TABLES = (
+    "training_score_detail",
+    "training_record",
+    "training_submission",
+    "training_order",
+    "training_session",
+    "audit_logs",
+)
+
+
+def clear_training_runtime_data(clear_reports: bool = False) -> dict:
+    """训练数据清理：只清空训练运行表和本地报告文件，不删除病例、用户、评分规则和知识库。"""
+    with SessionLocal() as db:
+        before = {table: _count(db, table) for table in TRAINING_TABLES}
+        for table in TRAINING_TABLES:
+            db.execute(text(f"DELETE FROM {table}"))
+        for table in TRAINING_TABLES:
+            db.execute(text(f"ALTER TABLE {table} AUTO_INCREMENT = 1"))
+        db.commit()
+        after = {table: _count(db, table) for table in TRAINING_TABLES}
+
+    deleted_reports = _clear_reports() if clear_reports else 0
+    return {
+        "database": settings.database_url.split("@")[-1] if "@" in settings.database_url else settings.database_url,
+        "tables_before": before,
+        "tables_after": after,
+        "deleted_report_files": deleted_reports,
+    }
+
+
+def _count(db, table: str) -> int:
+    """数据计数：读取目标表当前行数，用于清理前后核对。"""
+    return int(db.execute(text(f"SELECT COUNT(*) FROM {table}")).scalar() or 0)
+
+
+def _clear_reports() -> int:
+    """报告清理：只删除 backend/storage/reports 下的文件，保留目录本身。"""
+    report_dir = Path(settings.report_storage_dir)
+    if not report_dir.is_absolute():
+        report_dir = Path(__file__).resolve().parents[1] / report_dir
+    expected_root = Path(__file__).resolve().parents[1] / "storage" / "reports"
+    report_dir = report_dir.resolve()
+    if report_dir != expected_root.resolve():
+        raise RuntimeError(f"refuse to clear unexpected report directory: {report_dir}")
+    report_dir.mkdir(parents=True, exist_ok=True)
+    files = [path for path in report_dir.iterdir() if path.is_file()]
+    for path in files:
+        path.unlink()
+    return len(files)
+
+
+def main() -> None:
+    """命令入口：要求显式确认后才执行训练数据清理。"""
+    parser = argparse.ArgumentParser(description="Clear training runtime data only.")
+    parser.add_argument("--confirm", required=True, help="Must be CLEAR_TRAINING_DATA")
+    parser.add_argument("--reports", action="store_true", help="Also clear local generated PDF reports")
+    args = parser.parse_args()
+    if args.confirm != "CLEAR_TRAINING_DATA":
+        raise SystemExit("confirmation mismatch; use --confirm CLEAR_TRAINING_DATA")
+    print(clear_training_runtime_data(clear_reports=args.reports))
+
+
+if __name__ == "__main__":
+    main()
@@ -20,7 +20,6 @@ from app.models import (
    ScoringRule,
    TeachingCase,
    TraditionalCase,
-    User,
 )


@@ -35,8 +34,7 @@ def init_database() -> None:
 def seed_demo_data(db) -> None:
    """病例导入：写入儿科支气管肺炎病例、检查项目、评分规则和提示词元数据。"""
    department = _get_or_create_department(db)
-    user = _get_or_create_seed_user(db)
-    case = _get_or_create_case_base(db, department.id, user.id)
+    case = _get_or_create_case_base(db, department.id)
    _seed_traditional_case(db, case.id)
    _seed_teaching_case(db, case.id)
    _seed_exam_items(db, case.id)
@@ -47,27 +45,16 @@ def seed_demo_data(db) -> None:

 def _get_or_create_department(db) -> Department:
    """科室种子：写入儿科科室。"""
-    department = db.scalar(select(Department).where(Department.code == "PEDIATRICS"))
+    department = db.scalar(select(Department).where(Department.name == "儿科"))
    if department:
        return department
-    department = Department(name="儿科", code="PEDIATRICS", sort_order=1, is_active=True)
+    department = Department(name="儿科", category="clinical", institution_id=1)
    db.add(department)
    db.flush()
    return department


-def _get_or_create_seed_user(db) -> User:
-    """用户占位：写入系统种子用户，不承担登录职责。"""
-    user = db.scalar(select(User).where(User.external_user_id == "system_seed"))
-    if user:
-        return user
-    user = User(external_user_id="system_seed", display_name="系统种子数据")
-    db.add(user)
-    db.flush()
-    return user
-
-
-def _get_or_create_case_base(db, department_id: int, user_id: int) -> CaseBase:
+def _get_or_create_case_base(db, department_id: int) -> CaseBase:
    """病例主表种子：以 case_base 作为病例唯一主表。"""
    case = db.scalar(select(CaseBase).where(CaseBase.title == "支气管肺炎 - 6岁男性患儿"))
    if case:
@@ -99,7 +86,7 @@ def _get_or_create_case_base(db, department_id: int, user_id: int) -> CaseBase:
        vector_status=0,
        publish_status=1,
        status=1,
-        created_by_id=user_id,
+        created_by_id=None,
        department_id=department_id,
    )
    db.add(case)
@@ -32,6 +32,9 @@ def _apply_table_comments(db) -> None:
        "training_order": "训练检查申请表",
        "training_submission": "训练诊断治疗提交表",
        "training_record": "训练记录表",
+        "training_score_detail": "评分明细表",
+        "department": "科室表",
+        "user": "用户表",
    }
    dialect = db.bind.dialect.name if db.bind else ""
    if dialect != "mysql":
@@ -0,0 +1,65 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+from sqlalchemy import inspect, text
+
+sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+
+from app.db.base import Base
+from app.db.session import SessionLocal, engine
+from app.models import Department, TrainingScoreDetail, User  # noqa: F401
+
+
+def main() -> None:
+    """结构迁移：创建用户端 user/department 表模型和 training_score_detail，不删除旧表。"""
+    Base.metadata.create_all(bind=engine)
+    with SessionLocal() as db:
+        _copy_old_departments(db)
+        _apply_table_comments(db)
+        db.commit()
+    print("user/department/score detail migration completed")
+
+
+def _copy_old_departments(db) -> None:
+    """科室迁移：如果旧 departments 表存在，则复制到新 department 表并保留原 id。"""
+    inspector = inspect(engine)
+    tables = set(inspector.get_table_names())
+    if "departments" not in tables or "department" not in tables:
+        return
+    count = int(db.execute(text("SELECT COUNT(*) FROM `department`")).scalar() or 0)
+    if count > 0:
+        return
+    db.execute(
+        text(
+            """
+            INSERT INTO `department` (`id`, `name`, `category`, `institution_id`, `created_at`, `updated_at`)
+            SELECT
+                `id`,
+                `name`,
+                'clinical',
+                1,
+                `created_at`,
+                `updated_at`
+            FROM `departments`
+            """
+        )
+    )
+
+
+def _apply_table_comments(db) -> None:
+    """表注释补齐：为新增或调整后的表写入中文说明。"""
+    if db.bind.dialect.name != "mysql":
+        return
+    comments = {
+        "user": "用户表",
+        "department": "科室表",
+        "training_score_detail": "评分明细表",
+    }
+    for table_name, comment in comments.items():
+        db.execute(text(f"ALTER TABLE `{table_name}` COMMENT='{comment}'"))
+
+
+if __name__ == "__main__":
+    main()