import logging

import pdfplumber

from config.exceptions import AppError

logger = logging.getLogger(__name__)

MAX_FILES = 5
MAX_FILE_SIZE = 20 * 1024 * 1024   # 20 MB
MAX_TOTAL_SIZE = 60 * 1024 * 1024  # 60 MB
FILE_BREAK = '\n\n---FILE_BREAK: {name}---\n\n'


def extract_text_from_pdfs(files) -> str:
    """从 1~5 份 PDF 中提取文本，拼接返回。

    files: request.FILES.getlist('files') 或类似 UploadedFile 列表。
    """
    if not files:
        raise AppError('CASE_PDF_EMPTY', '未上传 PDF 文件')
    if len(files) > MAX_FILES:
        raise AppError('CASE_TOO_MANY_FILES', f'最多上传 {MAX_FILES} 份 PDF', status_code=400)

    total_size = 0
    for f in files:
        if f.size > MAX_FILE_SIZE:
            raise AppError('CASE_FILE_TOO_LARGE', f'单份 PDF 不得超过 {MAX_FILE_SIZE // (1024*1024)} MB', status_code=400)
        total_size += f.size
    if total_size > MAX_TOTAL_SIZE:
        raise AppError('CASE_FILE_TOO_LARGE', f'PDF 总大小不得超过 {MAX_TOTAL_SIZE // (1024*1024)} MB', status_code=400)

    parts = []
    for f in files:
        text = _extract_single(f)
        if not text.strip():
            raise AppError('CASE_PDF_EMPTY', f'PDF "{f.name}" 无法提取文本（可能为扫描版）', status_code=400)
        parts.append(FILE_BREAK.format(name=f.name) + text if len(files) > 1 else text)

    return ''.join(parts)


def _extract_single(uploaded_file) -> str:
    try:
        with pdfplumber.open(uploaded_file) as pdf:
            pages = [page.extract_text() or '' for page in pdf.pages]
        return '\n'.join(pages)
    except Exception as e:
        logger.error('pdfplumber extract failed for %s: %s', uploaded_file.name, e)
        raise AppError('CASE_PDF_EMPTY', f'PDF "{uploaded_file.name}" 解析失败: {e}', status_code=400)