from datetime import datetime from sqlalchemy import DateTime, Float, Integer, JSON, String, Text, UniqueConstraint from sqlalchemy.orm import Mapped, mapped_column from app.db.base import Base from app.models.mixins import TimestampMixin class KbKnowledgeSpace(TimestampMixin, Base): """知识空间模型:记录机构与 Milvus collection、embedding 参数之间的映射。""" __tablename__ = "kb_spaces" __table_args__ = (UniqueConstraint("institution_id", "embedding_version", name="uq_kb_space_institution_version"),) id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) institution_id: Mapped[int] = mapped_column(Integer, nullable=False, index=True, comment="机构ID") institution_name: Mapped[str | None] = mapped_column(String(128), comment="机构名称") space_code: Mapped[str] = mapped_column(String(128), nullable=False, unique=True, comment="知识空间编码") collection_name: Mapped[str] = mapped_column(String(128), nullable=False, unique=True, comment="Milvus集合名") embedding_model: Mapped[str] = mapped_column(String(128), nullable=False, comment="向量模型") embedding_dim: Mapped[int] = mapped_column(Integer, nullable=False, comment="向量维度") embedding_version: Mapped[str] = mapped_column(String(32), nullable=False, default="v1", comment="向量版本") chunk_size: Mapped[int] = mapped_column(Integer, nullable=False, comment="分片长度") chunk_overlap: Mapped[int] = mapped_column(Integer, nullable=False, comment="分片重叠长度") top_k_default: Mapped[int] = mapped_column(Integer, nullable=False, default=5, comment="默认返回片段数") score_threshold: Mapped[float] = mapped_column(Float, nullable=False, default=0.35, comment="默认相似度阈值") status: Mapped[str] = mapped_column(String(32), nullable=False, default="active", index=True, comment="状态") class KbKnowledgeDocument(TimestampMixin, Base): """知识文档模型:记录内容管理员上传的 PDF 及其处理状态。""" __tablename__ = "kb_documents" __table_args__ = (UniqueConstraint("institution_id", "file_sha256", name="uq_kb_document_institution_sha"),) id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) institution_id: Mapped[int] = mapped_column(Integer, nullable=False, index=True, comment="机构ID") uploaded_by: Mapped[str] = mapped_column(String(128), nullable=False, index=True, comment="上传用户ID") file_name: Mapped[str] = mapped_column(String(255), nullable=False, comment="原始文件名") file_sha256: Mapped[str] = mapped_column(String(64), nullable=False, index=True, comment="文件SHA256") file_type: Mapped[str] = mapped_column(String(32), nullable=False, default="pdf", comment="文件类型") file_size: Mapped[int] = mapped_column(Integer, nullable=False, comment="文件大小") file_path: Mapped[str] = mapped_column(String(512), nullable=False, comment="文件保存路径") document_title: Mapped[str | None] = mapped_column(String(255), comment="文档标题") document_category: Mapped[str] = mapped_column(String(64), nullable=False, default="other", comment="文档分类") version: Mapped[str] = mapped_column(String(32), nullable=False, default="v1", comment="文档版本") status: Mapped[str] = mapped_column(String(32), nullable=False, default="uploaded", index=True, comment="处理状态") parse_status: Mapped[str] = mapped_column(String(32), nullable=False, default="pending", comment="解析状态") embedding_status: Mapped[str] = mapped_column(String(32), nullable=False, default="pending", comment="向量状态") chunk_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0, comment="分片数量") error_message: Mapped[str | None] = mapped_column(Text, comment="错误信息") class KbKnowledgeChunk(Base): """知识分片模型:保存 PDF 分片文本、页码和 Milvus 向量 ID 元数据。""" __tablename__ = "kb_chunks" __table_args__ = (UniqueConstraint("chunk_uid", name="uq_kb_chunk_uid"),) id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) institution_id: Mapped[int] = mapped_column(Integer, nullable=False, index=True, comment="机构ID") document_id: Mapped[int] = mapped_column(Integer, nullable=False, index=True, comment="文档ID") chunk_uid: Mapped[str] = mapped_column(String(128), nullable=False, index=True, comment="分片唯一ID") chunk_index: Mapped[int] = mapped_column(Integer, nullable=False, comment="分片序号") page_start: Mapped[int] = mapped_column(Integer, nullable=False, comment="起始页") page_end: Mapped[int] = mapped_column(Integer, nullable=False, comment="结束页") section_title: Mapped[str | None] = mapped_column(String(255), comment="章节标题") chunk_text: Mapped[str] = mapped_column(Text, nullable=False, comment="分片文本") chunk_hash: Mapped[str] = mapped_column(String(64), nullable=False, index=True, comment="分片Hash") token_count: Mapped[int] = mapped_column(Integer, nullable=False, default=0, comment="估算token数") vector_id: Mapped[str] = mapped_column(String(128), nullable=False, index=True, comment="Milvus向量ID") collection_name: Mapped[str] = mapped_column(String(128), nullable=False, index=True, comment="Milvus集合名") embedding_model: Mapped[str] = mapped_column(String(128), nullable=False, comment="向量模型") metadata_: Mapped[dict | None] = mapped_column("metadata", JSON, comment="扩展元数据") created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, index=True) class KbKnowledgeIngestionTask(TimestampMixin, Base): """知识入库任务模型:记录 PDF 解析、分片、向量化和入 Milvus 的异步进度。""" __tablename__ = "kb_ingestion_tasks" id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) document_id: Mapped[int] = mapped_column(Integer, nullable=False, index=True, comment="文档ID") institution_id: Mapped[int] = mapped_column(Integer, nullable=False, index=True, comment="机构ID") task_type: Mapped[str] = mapped_column(String(64), nullable=False, default="document_ingestion", comment="任务类型") status: Mapped[str] = mapped_column(String(32), nullable=False, default="queued", index=True, comment="任务状态") progress: Mapped[int] = mapped_column(Integer, nullable=False, default=0, comment="进度百分比") current_step: Mapped[str | None] = mapped_column(String(255), comment="当前步骤") error_message: Mapped[str | None] = mapped_column(Text, comment="错误信息") started_at: Mapped[datetime | None] = mapped_column(DateTime) finished_at: Mapped[datetime | None] = mapped_column(DateTime) class KbKnowledgeQueryLog(Base): """学习助手问答日志:记录 RAG 检索命中、来源和耗时,用于审计和效果分析。""" __tablename__ = "kb_query_logs" id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) user_id: Mapped[str] = mapped_column(String(128), nullable=False, index=True, comment="用户ID") institution_id: Mapped[int] = mapped_column(Integer, nullable=False, index=True, comment="机构ID") question: Mapped[str] = mapped_column(Text, nullable=False, comment="用户问题") retrieval_hit: Mapped[bool] = mapped_column(Integer, nullable=False, default=0, comment="是否命中知识库") retrieved_chunk_ids: Mapped[list | None] = mapped_column(JSON, comment="命中分片ID") answer_summary: Mapped[str | None] = mapped_column(Text, comment="回答摘要") llm_model: Mapped[str | None] = mapped_column(String(128), comment="LLM模型") embedding_model: Mapped[str | None] = mapped_column(String(128), comment="向量模型") top_k: Mapped[int | None] = mapped_column(Integer, comment="最终返回片段数") score_threshold: Mapped[float | None] = mapped_column(Float, comment="检索阈值") embedding_latency_ms: Mapped[int | None] = mapped_column(Integer) search_latency_ms: Mapped[int | None] = mapped_column(Integer) llm_latency_ms: Mapped[int | None] = mapped_column(Integer) total_latency_ms: Mapped[int | None] = mapped_column(Integer) created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow, index=True)