<?php

namespace App\Services\Analytics;

use Illuminate\Support\Carbon;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Schema;

/**
 * 从做题与错题数据抽取「题库标定难度 vs 实测正确率」等指标，用于检验难度体系是否合理。
 *
 * 说明：当前库表未见独立「学生逐题自评难易」字段；{@see self::parsePaperDifficultyCategory()}
 * 将 papers.difficulty_category 解析为数值，作为「本次练习/学案侧难度选择」的代理变量。
 */
class QuestionDifficultyCalibrationAnalyzer
{
    /**
     * @param  array{
     *     min_attempts?: int,
     *     since?: Carbon|null,
     *     include_mistakes?: bool,
     *     student_id?: string|int|null,
     *     question_bank_id?: int|null,
     *     question_code?: string|null,
     *     calibration_min_attempts?: int,
     *     alpha?: float,
     *     max_step?: float,
     *     half_life_days?: int
     * }  $options
     * @return array<string, mixed>
     */
    public function run(array $options = []): array
    {
        $minAttempts = max(1, (int) ($options['min_attempts'] ?? 5));
        $since = $options['since'] ?? null;
        $includeMistakes = (bool) ($options['include_mistakes'] ?? true);
        $studentId = isset($options['student_id']) && $options['student_id'] !== '' && $options['student_id'] !== null
            ? (string) $options['student_id']
            : null;
        $questionBankId = isset($options['question_bank_id']) ? (int) $options['question_bank_id'] : null;
        if ($questionBankId === 0) {
            $questionBankId = null;
        }
        $questionCode = isset($options['question_code']) ? trim((string) $options['question_code']) : '';
        if ($questionCode !== '' && Schema::hasTable('questions')) {
            $resolved = DB::table('questions')->where('question_code', $questionCode)->value('id');
            if ($resolved === null) {
                return [
                    'ok' => false,
                    'error' => '未找到 question_code='.$questionCode.' 对应的题库题目',
                ];
            }
            $questionBankId = (int) $resolved;
        }

        if (! Schema::hasTable('paper_questions') || ! Schema::hasTable('papers')) {
            return [
                'ok' => false,
                'error' => '缺少必要数据表 paper_questions 或 papers',
            ];
        }

        // 4 条硬约束参数（可通过命令行覆盖）
        $calibrationMinAttempts = max(1, (int) ($options['calibration_min_attempts'] ?? 10));
        $alpha = (float) ($options['alpha'] ?? 0.2);
        $alpha = max(0.01, min(1.0, $alpha));
        $maxStep = (float) ($options['max_step'] ?? 0.03);
        $maxStep = max(0.001, min(0.2, $maxStep));
        $halfLifeDays = max(1, (int) ($options['half_life_days'] ?? 30));

        $perQuestion = $this->aggregatePerQuestion($minAttempts, $since, $studentId, $questionBankId);
        $byPaperDifficulty = $this->aggregatePerQuestionByPaperDifficulty($since, $studentId, $questionBankId);
        $bankDiffs = [];
        $errorRates = [];
        foreach ($perQuestion as $row) {
            $d = self::normalizeDifficulty($row['bank_difficulty'] ?? null);
            if ($d === null) {
                continue;
            }
            $n = (int) $row['attempts'];
            if ($n < 1) {
                continue;
            }
            $acc = (float) $row['correct_count'] / $n;
            $bankDiffs[] = $d;
            $errorRates[] = 1.0 - $acc;
        }

        $bins = $this->binByDifficulty($perQuestion);
        $pearson = $this->pearsonCorrelation($bankDiffs, $errorRates);

        $paperLevelRows = $this->rowLevelPaperDifficultyVsOutcome($since, $studentId);

        $mistakeByBankId = [];
        if ($includeMistakes && Schema::hasTable('mistake_records')) {
            $mistakeByBankId = $this->mistakeCountsByQuestionBankId($studentId);
        }

        $merged = [];
        foreach ($perQuestion as $row) {
            $bid = (int) $row['question_bank_id'];
            $norm = self::normalizeDifficulty($row['bank_difficulty'] ?? null);
            $emp = $row['attempts'] > 0
                ? 1.0 - ((float) $row['correct_count'] / (int) $row['attempts'])
                : null;
            $gap = ($emp !== null && $norm !== null) ? round($emp - $norm, 4) : null;
            $strata = $byPaperDifficulty[$bid] ?? [];
            $calibration = $this->buildCalibrationRecommendation(
                $norm,
                $strata,
                $calibrationMinAttempts,
                $alpha,
                $maxStep,
                $halfLifeDays
            );
            $merged[] = array_merge($row, [
                'wrong_count' => max(0, (int) $row['attempts'] - (int) $row['correct_count']),
                'bank_difficulty_normalized' => $norm,
                'empirical_error_rate' => $emp,
                /** 实测错误率 − 题库难度(0–1)：越大表示相对标定「更难做对」 */
                'calibration_gap' => $gap,
                'mistake_records_count' => $mistakeByBankId[$bid] ?? 0,
                'paper_difficulty_breakdown' => $strata,
                'calibration_weighted_error_rate' => $calibration['weighted_error_rate'],
                'calibration_effective_attempts' => $calibration['effective_attempts'],
                'calibration_recommendation' => $calibration['recommendation'],
            ]);
        }

        return [
            'ok' => true,
            'meta' => [
                'min_attempts' => $minAttempts,
                'since' => $since?->toIso8601String(),
                'student_id' => $studentId,
                'question_bank_id' => $questionBankId,
                'question_rows' => count($perQuestion),
                'note' => '无独立「学生逐题自评难易」字段；mistake_records 为错题本行数。下列「每题一行」为 paper_questions 已判分聚合。',
                'calibration_constraints' => [
                    'stratified_by' => 'papers.difficulty_category',
                    'min_attempts' => $calibrationMinAttempts,
                    'alpha' => $alpha,
                    'max_step' => $maxStep,
                    'time_decay_half_life_days' => $halfLifeDays,
                ],
            ],
            'summary' => [
                'pearson_bank_difficulty_vs_empirical_error_rate' => $pearson,
                'interpretation' => $this->interpretPearson($pearson),
                'pearson_paper_difficulty_category_vs_incorrect' => $paperLevelRows['pearson_category_vs_incorrect'] ?? null,
                'interpretation_paper_category' => $this->interpretPearson($paperLevelRows['pearson_category_vs_incorrect'] ?? null),
            ],
            'bins_by_bank_difficulty' => $bins,
            'paper_difficulty_category_vs_incorrect_rate' => $paperLevelRows,
            'per_question' => $merged,
        ];
    }

    /**
     * @return list<array<string, mixed>>
     */
    private function aggregatePerQuestion(int $minAttempts, ?Carbon $since, ?string $studentId, ?int $questionBankId): array
    {
        $q = DB::table('paper_questions as pq')
            ->join('papers as p', 'p.paper_id', '=', 'pq.paper_id')
            ->leftJoin('questions as qu', 'qu.id', '=', 'pq.question_bank_id')
            ->whereNotNull('pq.is_correct')
            ->whereNotNull('pq.question_bank_id');

        if ($studentId !== null) {
            $q->where('p.student_id', $studentId);
        }
        if ($questionBankId !== null) {
            $q->where('pq.question_bank_id', $questionBankId);
        }

        if ($since !== null) {
            $q->where(function ($w) use ($since) {
                $w->where('pq.updated_at', '>=', $since)
                    ->orWhere('pq.graded_at', '>=', $since);
            });
        }

        $rows = $q
            ->groupBy('pq.question_bank_id')
            ->havingRaw('COUNT(*) >= ?', [$minAttempts])
            ->selectRaw('
                pq.question_bank_id as question_bank_id,
                COUNT(*) as attempts,
                SUM(CASE WHEN pq.is_correct = 1 THEN 1 ELSE 0 END) as correct_count,
                AVG(pq.difficulty) as avg_paper_question_difficulty,
                MAX(qu.difficulty) as bank_difficulty,
                MAX(qu.question_code) as question_code
            ')
            ->get();

        return $rows->map(fn ($r) => [
            'question_bank_id' => (int) $r->question_bank_id,
            'question_code' => $r->question_code,
            'attempts' => (int) $r->attempts,
            'correct_count' => (int) $r->correct_count,
            'accuracy' => $r->attempts > 0 ? round((int) $r->correct_count / (int) $r->attempts, 4) : null,
            'avg_paper_question_difficulty' => $r->avg_paper_question_difficulty !== null ? (float) $r->avg_paper_question_difficulty : null,
            'bank_difficulty' => $r->bank_difficulty !== null ? (float) $r->bank_difficulty : null,
        ])->all();
    }

    /**
     * 分层统计：每道题在不同 papers.difficulty_category 下的对错分布。
     *
     * @return array<int, list<array<string, mixed>>>
     */
    private function aggregatePerQuestionByPaperDifficulty(?Carbon $since, ?string $studentId, ?int $questionBankId): array
    {
        $q = DB::table('paper_questions as pq')
            ->join('papers as p', 'p.paper_id', '=', 'pq.paper_id')
            ->whereNotNull('pq.is_correct')
            ->whereNotNull('pq.question_bank_id');

        if ($studentId !== null) {
            $q->where('p.student_id', $studentId);
        }
        if ($questionBankId !== null) {
            $q->where('pq.question_bank_id', $questionBankId);
        }
        if ($since !== null) {
            $q->where(function ($w) use ($since) {
                $w->where('pq.updated_at', '>=', $since)
                    ->orWhere('pq.graded_at', '>=', $since);
            });
        }

        $rows = $q->groupBy('pq.question_bank_id', 'p.difficulty_category')
            ->selectRaw('
                pq.question_bank_id as question_bank_id,
                p.difficulty_category as difficulty_category,
                COUNT(*) as attempts,
                SUM(CASE WHEN pq.is_correct = 1 THEN 1 ELSE 0 END) as correct_count,
                SUM(CASE WHEN pq.is_correct = 0 THEN 1 ELSE 0 END) as wrong_count,
                MAX(COALESCE(pq.graded_at, pq.updated_at, pq.created_at)) as last_answered_at
            ')
            ->get();

        $out = [];
        foreach ($rows as $r) {
            $bid = (int) $r->question_bank_id;
            $attempts = (int) $r->attempts;
            $wrong = (int) $r->wrong_count;
            $out[$bid] ??= [];
            $out[$bid][] = [
                'difficulty_category' => $r->difficulty_category,
                'difficulty_category_numeric' => self::parsePaperDifficultyCategory((string) ($r->difficulty_category ?? '')),
                'attempts' => $attempts,
                'correct_count' => (int) $r->correct_count,
                'wrong_count' => $wrong,
                'error_rate' => $attempts > 0 ? round($wrong / $attempts, 4) : null,
                'last_answered_at' => $r->last_answered_at,
            ];
        }

        return $out;
    }

    /**
     * 逐条作答：学案 difficulty_category（解析为 0–4 等级，再 /4 归一化）与是否做错（0/1）的 Pearson 相关。
     *
     * @return array{n_rows: int, n_rows_with_category: int, pearson_category_vs_incorrect: ?float, by_category: list<array<string, mixed>>}
     */
    private function rowLevelPaperDifficultyVsOutcome(?Carbon $since, ?string $studentId): array
    {
        $q = DB::table('paper_questions as pq')
            ->join('papers as p', 'p.paper_id', '=', 'pq.paper_id')
            ->whereNotNull('pq.is_correct');

        if ($studentId !== null) {
            $q->where('p.student_id', $studentId);
        }

        if ($since !== null) {
            $q->where(function ($w) use ($since) {
                $w->where('pq.updated_at', '>=', $since)
                    ->orWhere('pq.graded_at', '>=', $since);
            });
        }

        $rows = $q->select(['pq.is_correct', 'p.difficulty_category'])->get();

        $byCat = [];

        foreach ($rows as $r) {
            $cat = self::parsePaperDifficultyCategory($r->difficulty_category ?? null);
            $key = $cat === null ? '_unknown' : (string) $cat;
            if (! isset($byCat[$key])) {
                $byCat[$key] = ['category' => $cat, 'n' => 0, 'incorrect' => 0];
            }
            $byCat[$key]['n']++;
            $incorrect = ((int) $r->is_correct) === 0 ? 1 : 0;
            $byCat[$key]['incorrect'] += $incorrect;
        }

        $outBy = [];
        foreach ($byCat as $v) {
            $n = $v['n'];
            $outBy[] = [
                'difficulty_category_numeric' => $v['category'],
                'n' => $n,
                'incorrect_rate' => $n > 0 ? round($v['incorrect'] / $n, 4) : null,
            ];
        }
        usort($outBy, fn ($a, $b) => ($a['difficulty_category_numeric'] ?? -1) <=> ($b['difficulty_category_numeric'] ?? -1));

        $xs = [];
        $ys = [];
        foreach ($rows as $r) {
            $cat = self::parsePaperDifficultyCategory($r->difficulty_category ?? null);
            if ($cat === null) {
                continue;
            }
            $xs[] = $cat / 4.0;
            $ys[] = ((int) $r->is_correct) === 0 ? 1.0 : 0.0;
        }

        return [
            'n_rows' => $rows->count(),
            'n_rows_with_category' => count($xs),
            'pearson_category_vs_incorrect' => $this->pearsonCorrelation($xs, $ys),
            'by_category' => $outBy,
        ];
    }

    /**
     * @return array<int, int> question_bank_id => mistake 行数（学生维度错题本条目）
     */
    private function mistakeCountsByQuestionBankId(?string $studentId): array
    {
        $mq = DB::table('mistake_records')
            ->selectRaw('question_id, COUNT(*) as c')
            ->groupBy('question_id');
        if ($studentId !== null) {
            $mq->where('student_id', $studentId);
        }
        $counts = $mq->pluck('c', 'question_id')->all();

        $byBank = [];
        foreach ($counts as $qid => $c) {
            if (! is_numeric($qid)) {
                continue;
            }
            $bankId = (int) $qid;
            $byBank[$bankId] = ($byBank[$bankId] ?? 0) + (int) $c;
        }

        return $byBank;
    }

    /**
     * @param  list<array<string, mixed>>  $perQuestion
     * @return list<array<string, mixed>>
     */
    private function binByDifficulty(array $perQuestion): array
    {
        $edges = [0.0, 0.25, 0.5, 0.75, 1.0];
        $bins = [];
        for ($i = 0; $i < count($edges) - 1; $i++) {
            $bins[] = [
                'min' => $edges[$i],
                'max' => $edges[$i + 1],
                'n_questions' => 0,
                'total_attempts' => 0,
                'total_correct' => 0,
                'mean_accuracy' => null,
            ];
        }

        foreach ($perQuestion as $row) {
            $d = self::normalizeDifficulty($row['bank_difficulty'] ?? null);
            if ($d === null) {
                continue;
            }
            // [0,0.25), [0.25,0.5), [0.5,0.75), [0.75,1.0]
            $binIdx = (int) floor(min(0.999999, max(0.0, $d)) / 0.25);
            if ($binIdx > 3) {
                $binIdx = 3;
            }
            if ($binIdx < 0) {
                $binIdx = 0;
            }
            $bins[$binIdx]['n_questions']++;
            $bins[$binIdx]['total_attempts'] += (int) $row['attempts'];
            $bins[$binIdx]['total_correct'] += (int) $row['correct_count'];
        }

        foreach ($bins as &$b) {
            if ($b['total_attempts'] > 0) {
                $b['mean_accuracy'] = round($b['total_correct'] / $b['total_attempts'], 4);
            }
        }
        unset($b);

        return $bins;
    }

    private function interpretPearson(?float $r): string
    {
        if ($r === null) {
            return '样本不足或难度无变异，无法计算相关系数。';
        }
        if ($r > 0.15) {
            return '题库难度与实测错误率呈正相关：标定越高的题，学生越容易错，方向符合预期。';
        }
        if ($r < -0.15) {
            return '出现负相关：标定「难」的题反而正确率更高，建议检查标定、题型或样本偏差。';
        }

        return '相关较弱：标定难度与实测区分度不明显，可能样本量、标定噪声或题目同质性导致。';
    }

    /**
     * 将 papers.difficulty_category 解析为 0–4 的等级，再归一化到 0–1（便于与 0–1 题库难度对照）。
     */
    public static function parsePaperDifficultyCategory(?string $raw): ?float
    {
        if ($raw === null) {
            return null;
        }
        $s = strtolower(trim((string) $raw));
        if ($s === '') {
            return null;
        }
        if (is_numeric($s)) {
            $n = (int) $s;

            return (float) max(0, min(4, $n));
        }

        // 与业务侧 0–4 档一致：0 基础 / 1 筑基 / 2 提分 / 3 培优 / 4 竞赛（与 MasteryCalculator 区间命名对齐）
        $level = match ($s) {
            '0', '零基础', '0基础', '基础', '0级' => 0.0,
            '1', '筑基' => 1.0,
            '2', '进阶', '中等', '提分' => 2.0,
            '3', '培优' => 3.0,
            '4', '竞赛' => 4.0,
            default => null,
        };

        return $level;
    }

    public static function normalizeDifficulty(?float $d): ?float
    {
        if ($d === null) {
            return null;
        }
        $f = (float) $d;

        return $f > 1.0 ? $f / 5.0 : $f;
    }

    /**
     * @param  list<float>  $x
     * @param  list<float>  $y
     */
    private function pearsonCorrelation(array $x, array $y): ?float
    {
        $n = count($x);
        if ($n < 3 || count($y) !== $n) {
            return null;
        }
        $mx = array_sum($x) / $n;
        $my = array_sum($y) / $n;
        $num = 0.0;
        $dx = 0.0;
        $dy = 0.0;
        for ($i = 0; $i < $n; $i++) {
            $vx = $x[$i] - $mx;
            $vy = $y[$i] - $my;
            $num += $vx * $vy;
            $dx += $vx * $vx;
            $dy += $vy * $vy;
        }
        $den = sqrt($dx * $dy);

        return $den > 1e-12 ? round($num / $den, 4) : null;
    }

    /**
     * 在四条硬约束下给出每题的动态难度建议。
     *
     * 约束：
     * 1) 分层：先按 papers.difficulty_category 切分；
     * 2) 样本门槛：有效样本不足则不动；
     * 3) 平滑 + 限幅：delta = clip(alpha * gap, -maxStep, maxStep)；
     * 4) 时间衰减：分层样本按最近作答时间加权（半衰期 halfLifeDays）。
     *
     * @param  list<array<string, mixed>>  $strata
     * @return array{
     *   weighted_error_rate:?float,
     *   effective_attempts:float,
     *   recommendation:array{
     *     action:string,
     *     reason:string,
     *     gap:?float,
     *     delta:?float,
     *     suggested_difficulty:?float
     *   }
     * }
     */
    private function buildCalibrationRecommendation(
        ?float $bankDifficultyNormalized,
        array $strata,
        int $minAttempts,
        float $alpha,
        float $maxStep,
        int $halfLifeDays
    ): array {
        if ($bankDifficultyNormalized === null) {
            return [
                'weighted_error_rate' => null,
                'effective_attempts' => 0.0,
                'recommendation' => [
                    'action' => 'hold',
                    'reason' => '题库难度为空，无法计算建议。',
                    'gap' => null,
                    'delta' => null,
                    'suggested_difficulty' => null,
                ],
            ];
        }

        $now = Carbon::now();
        $weightedAttempts = 0.0;
        $weightedWrong = 0.0;

        foreach ($strata as $s) {
            $attempts = (int) ($s['attempts'] ?? 0);
            $wrong = (int) ($s['wrong_count'] ?? 0);
            if ($attempts <= 0) {
                continue;
            }
            $lastAtRaw = $s['last_answered_at'] ?? null;
            $days = 0.0;
            if ($lastAtRaw) {
                try {
                    $lastAt = Carbon::parse((string) $lastAtRaw);
                    $days = max(0.0, (float) $lastAt->diffInDays($now));
                } catch (\Throwable) {
                    $days = 0.0;
                }
            }
            $w = pow(0.5, $days / $halfLifeDays);
            $weightedAttempts += $attempts * $w;
            $weightedWrong += $wrong * $w;
        }

        if ($weightedAttempts <= 0.0) {
            return [
                'weighted_error_rate' => null,
                'effective_attempts' => 0.0,
                'recommendation' => [
                    'action' => 'hold',
                    'reason' => '无有效样本，保持不变。',
                    'gap' => null,
                    'delta' => null,
                    'suggested_difficulty' => round($bankDifficultyNormalized, 4),
                ],
            ];
        }

        $weightedErrorRate = $weightedWrong / $weightedAttempts;
        $gap = $weightedErrorRate - $bankDifficultyNormalized;

        if ($weightedAttempts < $minAttempts) {
            return [
                'weighted_error_rate' => round($weightedErrorRate, 4),
                'effective_attempts' => round($weightedAttempts, 2),
                'recommendation' => [
                    'action' => 'hold',
                    'reason' => '有效样本不足门槛 '.$minAttempts.'，仅观测不调整。',
                    'gap' => round($gap, 4),
                    'delta' => 0.0,
                    'suggested_difficulty' => round($bankDifficultyNormalized, 4),
                ],
            ];
        }

        $delta = max(-$maxStep, min($maxStep, $alpha * $gap));
        $suggested = max(0.0, min(1.0, $bankDifficultyNormalized + $delta));
        $eps = 1e-6;
        $action = $delta > $eps ? 'increase' : ($delta < -$eps ? 'decrease' : 'hold');
        $reason = match ($action) {
            'increase' => '实测（分层+时衰）错误率高于标定，建议小步上调。',
            'decrease' => '实测（分层+时衰）错误率低于标定，建议小步下调。',
            default => 'gap 接近 0，建议保持不变。',
        };

        return [
            'weighted_error_rate' => round($weightedErrorRate, 4),
            'effective_attempts' => round($weightedAttempts, 2),
            'recommendation' => [
                'action' => $action,
                'reason' => $reason,
                'gap' => round($gap, 4),
                'delta' => round($delta, 4),
                'suggested_difficulty' => round($suggested, 4),
            ],
        ];
    }
}