medical_training/apps/cms/stats.py

"""CMS 各角色概览大屏聚合接口（只读 GET）。

依据《各角色首页数据展示.pdf》与本文档第八章，**全部基于现有库真实列**：
- 训练聚合主表 `TrainingRecord`（有 `user_id`→机构/科室、`case_id`、`status`、`total_score`、`ai_feedback_structured`）；
- `TrainingSession`（仅平台级「发起/完成」，无机构外键）；得分按 `score_type` 归一（five_point×20）。
- 维度雷达固定 5 维（信息获取/分析推理/处置决策/沟通人文/临床整合，复用 `apps/user/stats`）。
- 科室为全局表：凡「按科室分组」走 `case_base.department_id`，不按机构过滤科室。

不做项：AI 调用量 / AI 响应时长（超管平台总览）、知识文档数（内容概览）、带教任务/分配任务（无任务表）。
"""
from collections import defaultdict
from datetime import datetime, timedelta

from django.db.models import Count, Sum, Avg, Q
from django.utils import timezone
from rest_framework.decorators import api_view, permission_classes
from rest_framework.permissions import IsAuthenticated
from rest_framework.response import Response

from apps.user.models import User, Institution, Department, TeacherStudentRelation
from apps.case.models import CaseBase
from apps.training.models import TrainingRecord, TrainingSession
from apps.user.stats import STANDARD_DIMS, DIMENSION_MAP, _dimension_scores, _NORM_TOTAL_EXPR
from apps.cms.permissions import IsSuperAdmin, IsHospitalAdmin, IsContentAdmin, IsTeacher

PASS_SCORE = 60  # 通过线（归一百分制）


# ── 通用小工具 ──────────────────────────────────────────────────────────────

def _recent_months(n=6):
    """返回最近 n 个月 (year, month)，由旧到新。"""
    now = timezone.localtime()
    out, y, m = [], now.year, now.month
    for _ in range(n):
        out.append((y, m))
        m -= 1
        if m == 0:
            m, y = 12, y - 1
    return list(reversed(out))


def _month_label(y, m):
    return f'{y}-{m:02d}'


def _month_start(y, m):
    return timezone.make_aware(datetime(y, m, 1))


def _norm_avg(qs):
    """TrainingRecord 查询集 → 归一百分制平均分（保留 1 位）或 None。"""
    v = qs.aggregate(a=Avg(_NORM_TOTAL_EXPR))['a']
    return round(float(v), 1) if v is not None else None


def _norm(score, score_type):
    if score is None:
        return None
    return float(score) * 20 if score_type == 'five_point' else float(score)


def _mom(this, last):
    """环比%（本期 vs 上期），上期为 0/None 返回 None。"""
    if not last:
        return None
    return round((this - last) / last * 100)


def _hours(seconds):
    return round((seconds or 0) / 3600, 1)


def _radar(records):
    """固定 5 维雷达：各维得分率均值（0~100），无数据维度记 0。"""
    buckets = defaultdict(list)
    for r in records:
        for dim, score, mx in _dimension_scores(r):
            std = DIMENSION_MAP.get(dim)
            if std and score is not None and mx and float(mx) > 0:
                buckets[std].append(float(score) / float(mx) * 100)
    return [{'dimension': d, 'score': (round(sum(buckets[d]) / len(buckets[d])) if buckets[d] else 0)}
            for d in STANDARD_DIMS]


def _inst_names(ids):
    return {i.id: i.name for i in Institution.all_objects.filter(id__in=[x for x in ids if x])}


def _dept_names(ids):
    return {d.id: d.name for d in Department.all_objects.filter(id__in=[x for x in ids if x])}


def _case_titles(ids):
    return {c.id: c.title for c in CaseBase.all_objects.filter(id__in=[x for x in ids if x])}


def _pass_rates(tr_qs):
    """按 case 统计通过率（归一分≥60）。返回 [{case_id, pass_rate, total}]（小数据集 Python 聚合）。"""
    agg = defaultdict(lambda: [0, 0])  # case_id -> [passed, total]
    for r in tr_qs.only('case_id', 'total_score', 'score_type'):
        n = _norm(r.total_score, r.score_type)
        if n is None:
            continue
        agg[r.case_id][1] += 1
        if n >= PASS_SCORE:
            agg[r.case_id][0] += 1
    out = [{'case_id': cid, 'pass_rate': round(p / t * 100) if t else 0, 'total': t}
           for cid, (p, t) in agg.items()]
    return out


# 注：MySQL 未装时区表，DB 侧 TruncMonth/ExtractHour（CONVERT_TZ）会报错；
# 故月/小时分桶一律在 Python 用 localtime 完成（与 apps/user/stats 一致）。

def _tr_monthly(qs, n=6):
    """TrainingRecord 月度：返回 (labels, counts, active_users)。Python 分桶。"""
    months = _recent_months(n)
    idx = {ym: i for i, ym in enumerate(months)}
    counts = [0] * n
    users = [set() for _ in range(n)]
    for created_at, uid in qs.filter(created_at__gte=_month_start(*months[0])).values_list('created_at', 'user_id'):
        if not created_at:
            continue
        lt = timezone.localtime(created_at)
        i = idx.get((lt.year, lt.month))
        if i is not None:
            counts[i] += 1
            users[i].add(uid)
    return [_month_label(*ym) for ym in months], counts, [len(s) for s in users]


def _ts_monthly(n=6):
    """TrainingSession 月度发起次数：返回 (labels, counts)。Python 分桶。"""
    months = _recent_months(n)
    idx = {ym: i for i, ym in enumerate(months)}
    counts = [0] * n
    for (created_at,) in (TrainingSession.objects.filter(created_at__gte=_month_start(*months[0]))
                          .values_list('created_at')):
        if not created_at:
            continue
        lt = timezone.localtime(created_at)
        i = idx.get((lt.year, lt.month))
        if i is not None:
            counts[i] += 1
    return [_month_label(*ym) for ym in months], counts


def _named_rank(rows, key, name_map, label='name', value='value'):
    """把 [{key:id, ...value}] 附上名称。"""
    return [{**{label: name_map.get(r[key], ''), 'id': r[key]},
             value: r.get(value)} for r in rows]


# ── 8.1 超级管理员 · 平台总览 ────────────────────────────────────────────────

@api_view(['GET'])
@permission_classes([IsAuthenticated, IsSuperAdmin])
def platform_overview(request):
    now = timezone.now()
    d30 = now - timedelta(days=30)
    TR = TrainingRecord.objects.all()
    TS = TrainingSession.objects.all()

    ms = _month_start(now.year, now.month)
    pm_y, pm_m = (now.year, now.month - 1) if now.month > 1 else (now.year - 1, 12)
    pms = _month_start(pm_y, pm_m)

    # KPI
    kpi = {
        'institution_count': Institution.objects.count(),
        'mau_institution': TR.filter(created_at__gte=d30).exclude(user__institution_id__isnull=True)
                             .values('user__institution_id').distinct().count(),
        'user_total': User.objects.filter(status=1).count(),
        'mau_user': TR.filter(created_at__gte=d30).values('user_id').distinct().count(),
    }

    # 核心指标
    sess_total = TS.count()
    sess_done = TS.filter(completed_at__isnull=False).count()
    train_this = TS.filter(created_at__gte=ms).count()
    train_last = TS.filter(created_at__gte=pms, created_at__lt=ms).count()
    core = {
        'train_new_month': train_this,
        'train_new_mom': _mom(train_this, train_last),
        'train_total': sess_total,
        'complete_rate': round(sess_done / sess_total * 100, 1) if sess_total else 0,
        'avg_score': _norm_avg(TR.filter(status='completed')) or 0,
    }

    # 用户与活跃度趋势
    labels, sess_counts = _ts_monthly(6)
    _, _, active_users = _tr_monthly(TR, 6)
    trend = {'months': labels, 'train_counts': sess_counts, 'active_users': active_users}

    user_compose = [{'role': r['role_type'] or 'unknown', 'count': r['n']}
                    for r in User.objects.filter(status=1).values('role_type').annotate(n=Count('id'))]

    since7 = now - timedelta(days=7)
    hbucket = defaultdict(int)
    for (created_at,) in TR.filter(created_at__gte=since7).values_list('created_at'):
        if created_at:
            hbucket[timezone.localtime(created_at).hour] += 1
    hourly_7d = [{'hour': h, 'avg': round(hbucket.get(h, 0) / 7, 2)} for h in range(24)]

    # 各机构用户人数 / 活跃
    inst_users = {r['institution_id']: r['n'] for r in User.objects.filter(status=1)
                  .exclude(institution_id__isnull=True).values('institution_id').annotate(n=Count('id'))}
    inst_active = {r['user__institution_id']: r['c'] for r in TR.filter(created_at__gte=d30)
                   .exclude(user__institution_id__isnull=True).values('user__institution_id')
                   .annotate(c=Count('user_id', distinct=True))}
    inames = _inst_names(set(inst_users) | set(inst_active))
    institution_dist = [{'id': iid, 'institution': inames.get(iid, ''),
                         'users': inst_users[iid], 'active': inst_active.get(iid, 0)}
                        for iid in inst_users]

    # 医院使用排行
    by_train = list(TR.exclude(user__institution_id__isnull=True).values('user__institution_id')
                    .annotate(value=Count('id')).order_by('-value')[:10])
    by_score = list(TR.exclude(user__institution_id__isnull=True).values('user__institution_id')
                    .annotate(value=Avg(_NORM_TOTAL_EXPR)).order_by('-value')[:10])
    rnames = _inst_names([r['user__institution_id'] for r in by_train + by_score])
    hospital_rank = {
        'by_train': [{'id': r['user__institution_id'], 'institution': rnames.get(r['user__institution_id'], ''),
                      'count': r['value']} for r in by_train],
        'by_avg_score': [{'id': r['user__institution_id'], 'institution': rnames.get(r['user__institution_id'], ''),
                          'avg_score': round(float(r['value']), 1) if r['value'] is not None else None}
                         for r in by_score],
    }

    # 病例资产与使用
    case_total = CaseBase.objects.count()
    case_this = CaseBase.objects.filter(created_at__gte=ms).count()
    case_last = CaseBase.objects.filter(created_at__gte=pms, created_at__lt=ms).count()
    type_dist = [{'case_type': r['case_type'], 'count': r['n']}
                 for r in CaseBase.objects.values('case_type').annotate(n=Count('id'))]
    # 每种类型使用率 = 该类型被训练过的去重病例数 / 该类型病例总数
    type_total = {r['case_type']: r['n'] for r in CaseBase.objects.values('case_type').annotate(n=Count('id'))}
    used_by_type = defaultdict(set)
    for r in TR.exclude(case_id__isnull=True).values('case_id', 'case__case_type'):
        used_by_type[r['case__case_type']].add(r['case_id'])
    type_usage_rate = [{'case_type': t, 'rate': round(len(used_by_type.get(t, set())) / n * 100) if n else 0}
                       for t, n in type_total.items()]
    top_used_rows = list(TR.exclude(case_id__isnull=True).values('case_id')
                         .annotate(value=Count('id')).order_by('-value')[:10])
    ctitles = _case_titles([r['case_id'] for r in top_used_rows])
    top_used = [{'case_id': r['case_id'], 'title': ctitles.get(r['case_id'], ''), 'count': r['value']}
                for r in top_used_rows]
    prates = _pass_rates(TR)
    ptitles = _case_titles([p['case_id'] for p in prates])
    low_pass = sorted(prates, key=lambda x: x['pass_rate'])[:10]
    low_pass = [{**p, 'title': ptitles.get(p['case_id'], '')} for p in low_pass]
    case_asset = {
        'total': case_total, 'new_month': case_this, 'new_mom': _mom(case_this, case_last),
        'type_dist': type_dist, 'type_usage_rate': type_usage_rate,
        'top_used': top_used, 'low_pass': low_pass,
    }

    return Response({
        'kpi': kpi, 'core': core, 'trend': trend, 'user_compose': user_compose,
        'hourly_7d': hourly_7d, 'institution_dist': institution_dist,
        'hospital_rank': hospital_rank, 'case_asset': case_asset,
    })


# ── 8.2 医院管理员 · 医院驾驶舱（本院）──────────────────────────────────────

@api_view(['GET'])
@permission_classes([IsAuthenticated, IsHospitalAdmin])
def hospital_overview(request):
    inst_id = request.user.institution_id
    now = timezone.now()
    d30 = now - timedelta(days=30)
    ms = _month_start(now.year, now.month)
    pm_y, pm_m = (now.year, now.month - 1) if now.month > 1 else (now.year - 1, 12)
    pms = _month_start(pm_y, pm_m)

    inst = Institution.all_objects.filter(id=inst_id).first()
    TR_h = TrainingRecord.objects.filter(user__institution_id=inst_id)   # 本院学员的训练
    CASE_h = CaseBase.objects.filter(institution_id=inst_id)              # 本院病例

    profile = {
        'institution_id': inst_id,
        'name': inst.name if inst else '',
        'logo': inst.banner_url if inst else '',
        'level': inst.level if inst else '',
        'cooperation_days': (now - inst.created_at).days if inst and inst.created_at else None,
    }

    summary = {
        'dept_count': CASE_h.exclude(department_id__isnull=True).values('department_id').distinct().count(),
        'doctor_count': User.objects.filter(institution_id=inst_id, role_type='doctor', status=1).count(),
        'student_count': User.objects.filter(institution_id=inst_id, role_type='student', status=1).count(),
        'train_total': TR_h.count(),
        'complete_rate': round(TR_h.filter(status='completed').count() / TR_h.count() * 100, 1) if TR_h.count() else 0,
        'avg_score': _norm_avg(TR_h.filter(status='completed')) or 0,
    }
    labels, counts, _ = _tr_monthly(TR_h, 6)
    summary['train_months'] = labels
    summary['train_monthly'] = counts

    # 科室排行（全局科室，按病例所属科室聚合本院数据）
    dept_case = {r['department_id']: r['n'] for r in CASE_h.exclude(department_id__isnull=True)
                 .values('department_id').annotate(n=Count('id'))}
    dept_train = {r['case__department_id']: (r['c'], r['done']) for r in TR_h
                  .exclude(case__department_id__isnull=True).values('case__department_id')
                  .annotate(c=Count('id'),
                            done=Count('id', filter=Q(status='completed')))}
    dept_active = {r['case__department_id']: r['u'] for r in TR_h.filter(created_at__gte=d30)
                   .exclude(case__department_id__isnull=True).values('case__department_id')
                   .annotate(u=Count('user_id', distinct=True))}
    dept_score = {r['case__department_id']: r['a'] for r in TR_h.exclude(case__department_id__isnull=True)
                  .values('case__department_id').annotate(a=Avg(_NORM_TOTAL_EXPR))}
    dnames = _dept_names(set(dept_case) | set(dept_train) | set(dept_active) | set(dept_score))
    dept_rank = [{
        'id': did, 'department': dnames.get(did, ''),
        'case_count': dept_case.get(did, 0),
        'train_count': dept_train.get(did, (0, 0))[0],
        'effective_train': dept_train.get(did, (0, 0))[1],
        'active_users': dept_active.get(did, 0),
        'avg_score': round(float(dept_score[did]), 1) if dept_score.get(did) is not None else None,
    } for did in (set(dept_case) | set(dept_train))]

    case_asset = {
        'total': CASE_h.count(),
        'new_month': CASE_h.filter(created_at__gte=ms).count(),
        'top_used': [], 'pass_high': [], 'pass_low': [],
    }
    top_rows = list(TR_h.exclude(case_id__isnull=True).values('case_id')
                    .annotate(value=Count('id')).order_by('-value')[:10])
    ctitles = _case_titles([r['case_id'] for r in top_rows])
    case_asset['top_used'] = [{'case_id': r['case_id'], 'title': ctitles.get(r['case_id'], ''),
                               'count': r['value']} for r in top_rows]
    prates = _pass_rates(TR_h)
    ptitles = _case_titles([p['case_id'] for p in prates])
    prates = [{**p, 'title': ptitles.get(p['case_id'], '')} for p in prates]
    case_asset['pass_high'] = sorted(prates, key=lambda x: -x['pass_rate'])[:5]
    case_asset['pass_low'] = sorted(prates, key=lambda x: x['pass_rate'])[:5]

    competency = {
        'student_avg': _norm_avg(TR_h.filter(status='completed')) or 0,
        'platform_avg': _norm_avg(TrainingRecord.objects.filter(status='completed')) or 0,
        'radar': _radar(TR_h.only('ai_feedback_structured')),
        'radar_platform': _radar(TrainingRecord.objects.only('ai_feedback_structured')),
    }

    return Response({'profile': profile, 'summary': summary, 'dept_rank': dept_rank,
                     'case_asset': case_asset, 'competency': competency})


# ── 8.3 内容管理员 · 内容概览（本院）────────────────────────────────────────

@api_view(['GET'])
@permission_classes([IsAuthenticated, IsContentAdmin])
def content_overview(request):
    inst_id = request.user.institution_id
    now = timezone.now()
    ms = _month_start(now.year, now.month)
    pm_y, pm_m = (now.year, now.month - 1) if now.month > 1 else (now.year - 1, 12)
    pms = _month_start(pm_y, pm_m)

    CASE_h = CaseBase.objects.filter(institution_id=inst_id)
    TR_h = TrainingRecord.objects.filter(case__institution_id=inst_id)

    case_total = CASE_h.count()
    used_cases = TR_h.exclude(case_id__isnull=True).values('case_id').distinct().count()
    case_this = CASE_h.filter(created_at__gte=ms).count()
    case_last = CASE_h.filter(created_at__gte=pms, created_at__lt=ms).count()
    tr_this = TR_h.filter(created_at__gte=ms).count()
    tr_last = TR_h.filter(created_at__gte=pms, created_at__lt=ms).count()
    summary = {
        'case_total': case_total,
        'pending_publish': CASE_h.filter(publish_status=1).count(),   # 1=正常=待发布
        'case_new_month': case_this, 'case_new_mom': _mom(case_this, case_last),
        'usage_rate': round(used_cases / case_total * 100) if case_total else 0,
        'train_total': TR_h.count(), 'train_mom': _mom(tr_this, tr_last),
    }

    type_dist = [{'case_type': r['case_type'], 'count': r['n']}
                 for r in CASE_h.values('case_type').annotate(n=Count('id'))]
    type_train = {r['case__case_type']: r['c'] for r in TR_h.values('case__case_type').annotate(c=Count('id'))}
    # 不同科室（全局科室）病例分布 / 使用数(去重) / 训练次数
    dept_case = {r['department_id']: r['n'] for r in CASE_h.exclude(department_id__isnull=True)
                 .values('department_id').annotate(n=Count('id'))}
    dept_train = {r['case__department_id']: r['c'] for r in TR_h.exclude(case__department_id__isnull=True)
                  .values('case__department_id').annotate(c=Count('id'))}
    dept_used = defaultdict(set)
    for r in TR_h.exclude(case__department_id__isnull=True).values('case__department_id', 'case_id'):
        dept_used[r['case__department_id']].add(r['case_id'])
    dnames = _dept_names(set(dept_case) | set(dept_train))
    dept_dist = [{'id': did, 'department': dnames.get(did, ''),
                  'case_count': dept_case.get(did, 0),
                  'used_count': len(dept_used.get(did, set())),
                  'train_count': dept_train.get(did, 0)} for did in (set(dept_case) | set(dept_train))]
    # 难度分布与使用次数
    diff_case = {r['difficulty'] or '未分级': r['n'] for r in CASE_h.values('difficulty').annotate(n=Count('id'))}
    diff_train = defaultdict(int)
    for r in TR_h.values('case__difficulty').annotate(c=Count('id')):
        diff_train[r['case__difficulty'] or '未分级'] += r['c']
    difficulty_dist = [{'difficulty': k, 'case_count': v, 'train_count': diff_train.get(k, 0)}
                       for k, v in diff_case.items()]
    dist = {'type_dist': type_dist, 'type_train': type_train, 'dept_dist': dept_dist,
            'difficulty_dist': difficulty_dist}

    prates = _pass_rates(TR_h)
    ptitles = _case_titles([p['case_id'] for p in prates])
    prates = [{**p, 'title': ptitles.get(p['case_id'], '')} for p in prates]
    warning = sorted(prates, key=lambda x: x['pass_rate'])[:5]   # 低通过率预警

    hot_rows = list(TR_h.exclude(case_id__isnull=True).values('case_id')
                    .annotate(value=Count('id')).order_by('-value')[:5])
    htitles = _case_titles([r['case_id'] for r in hot_rows])
    hot = [{'case_id': r['case_id'], 'title': htitles.get(r['case_id'], ''), 'count': r['value']}
           for r in hot_rows]

    return Response({'summary': summary, 'dist': dist, 'warning': warning, 'hot': hot})


# ── 8.4 带教医生 · 教学概览（名下学生）──────────────────────────────────────

@api_view(['GET'])
@permission_classes([IsAuthenticated, IsTeacher])
def teaching_overview(request):
    teacher = request.user
    student_ids = list(TeacherStudentRelation.objects.filter(teacher=teacher, status=1)
                       .values_list('student_id', flat=True))
    students_qs = User.objects.filter(id__in=student_ids)
    TR_s = TrainingRecord.objects.filter(user_id__in=student_ids)

    # 每个学生派生指标
    by_user = defaultdict(lambda: {'total': 0, 'done': 0, 'score_sum': 0.0, 'score_n': 0,
                                   'last': None, 'types': defaultdict(int)})
    for r in TR_s.only('user_id', 'status', 'total_score', 'score_type', 'end_time', 'case_type'):
        b = by_user[r.user_id]
        b['total'] += 1
        if r.status == 'completed':
            b['done'] += 1
        n = _norm(r.total_score, r.score_type)
        if n is not None:
            b['score_sum'] += n
            b['score_n'] += 1
        if r.case_type:
            b['types'][r.case_type] += 1
        if r.end_time and (b['last'] is None or r.end_time > b['last']):
            b['last'] = r.end_time

    students = []
    for u in students_qs.select_related('department'):
        b = by_user.get(u.id)
        most_type = max(b['types'].items(), key=lambda x: x[1])[0] if (b and b['types']) else None
        students.append({
            'id': u.id, 'real_name': u.real_name, 'username': u.username,
            'department': u.department.name if u.department_id else '',
            'train_total': b['total'] if b else 0,
            'complete_rate': round(b['done'] / b['total'] * 100) if (b and b['total']) else 0,
            'avg_score': round(b['score_sum'] / b['score_n'], 1) if (b and b['score_n']) else None,
            'weak_dimensions': u.weak_dimensions or [],
            'most_trained_type': most_type,
            'last_trained_at': b['last'] if b else None,
            'pending_tasks': None,   # ⛔ 无任务表，不做
        })

    # 整体概览
    inst_id = teacher.institution_id
    labels, counts, _ = _tr_monthly(TR_s, 6)
    overview = {
        'student_count': len(student_ids),
        'radar': _radar(TR_s.only('ai_feedback_structured')),
        'students_avg': _norm_avg(TR_s.filter(status='completed')) or 0,
        'institution_avg': _norm_avg(TrainingRecord.objects.filter(
            user__institution_id=inst_id, status='completed')) or 0,
        'train_months': labels, 'train_monthly': counts,
        'task_summary': None,   # ⛔ 无任务表，不做（进行中任务/完成情况/得分分布）
    }

    return Response({'students': students, 'overview': overview})