| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608 |
- <?php
- namespace App\Services\Analytics;
- use Illuminate\Support\Carbon;
- use Illuminate\Support\Facades\DB;
- use Illuminate\Support\Facades\Schema;
- /**
- * 从做题与错题数据抽取「题库标定难度 vs 实测正确率」等指标,用于检验难度体系是否合理。
- *
- * 说明:当前库表未见独立「学生逐题自评难易」字段;{@see self::parsePaperDifficultyCategory()}
- * 将 papers.difficulty_category 解析为数值,作为「本次练习/学案侧难度选择」的代理变量。
- */
- class QuestionDifficultyCalibrationAnalyzer
- {
- /**
- * @param array{
- * min_attempts?: int,
- * since?: Carbon|null,
- * include_mistakes?: bool,
- * student_id?: string|int|null,
- * question_bank_id?: int|null,
- * question_code?: string|null,
- * calibration_min_attempts?: int,
- * alpha?: float,
- * max_step?: float,
- * half_life_days?: int
- * } $options
- * @return array<string, mixed>
- */
- public function run(array $options = []): array
- {
- $minAttempts = max(1, (int) ($options['min_attempts'] ?? 5));
- $since = $options['since'] ?? null;
- $includeMistakes = (bool) ($options['include_mistakes'] ?? true);
- $studentId = isset($options['student_id']) && $options['student_id'] !== '' && $options['student_id'] !== null
- ? (string) $options['student_id']
- : null;
- $questionBankId = isset($options['question_bank_id']) ? (int) $options['question_bank_id'] : null;
- if ($questionBankId === 0) {
- $questionBankId = null;
- }
- $questionCode = isset($options['question_code']) ? trim((string) $options['question_code']) : '';
- if ($questionCode !== '' && Schema::hasTable('questions')) {
- $resolved = DB::table('questions')->where('question_code', $questionCode)->value('id');
- if ($resolved === null) {
- return [
- 'ok' => false,
- 'error' => '未找到 question_code='.$questionCode.' 对应的题库题目',
- ];
- }
- $questionBankId = (int) $resolved;
- }
- if (! Schema::hasTable('paper_questions') || ! Schema::hasTable('papers')) {
- return [
- 'ok' => false,
- 'error' => '缺少必要数据表 paper_questions 或 papers',
- ];
- }
- // 4 条硬约束参数(可通过命令行覆盖)
- $calibrationMinAttempts = max(1, (int) ($options['calibration_min_attempts'] ?? 10));
- $alpha = (float) ($options['alpha'] ?? 0.2);
- $alpha = max(0.01, min(1.0, $alpha));
- $maxStep = (float) ($options['max_step'] ?? 0.03);
- $maxStep = max(0.001, min(0.2, $maxStep));
- $halfLifeDays = max(1, (int) ($options['half_life_days'] ?? 30));
- $perQuestion = $this->aggregatePerQuestion($minAttempts, $since, $studentId, $questionBankId);
- $byPaperDifficulty = $this->aggregatePerQuestionByPaperDifficulty($since, $studentId, $questionBankId);
- $bankDiffs = [];
- $errorRates = [];
- foreach ($perQuestion as $row) {
- $d = self::normalizeDifficulty($row['bank_difficulty'] ?? null);
- if ($d === null) {
- continue;
- }
- $n = (int) $row['attempts'];
- if ($n < 1) {
- continue;
- }
- $acc = (float) $row['correct_count'] / $n;
- $bankDiffs[] = $d;
- $errorRates[] = 1.0 - $acc;
- }
- $bins = $this->binByDifficulty($perQuestion);
- $pearson = $this->pearsonCorrelation($bankDiffs, $errorRates);
- $paperLevelRows = $this->rowLevelPaperDifficultyVsOutcome($since, $studentId);
- $mistakeByBankId = [];
- if ($includeMistakes && Schema::hasTable('mistake_records')) {
- $mistakeByBankId = $this->mistakeCountsByQuestionBankId($studentId);
- }
- $merged = [];
- foreach ($perQuestion as $row) {
- $bid = (int) $row['question_bank_id'];
- $norm = self::normalizeDifficulty($row['bank_difficulty'] ?? null);
- $emp = $row['attempts'] > 0
- ? 1.0 - ((float) $row['correct_count'] / (int) $row['attempts'])
- : null;
- $gap = ($emp !== null && $norm !== null) ? round($emp - $norm, 4) : null;
- $strata = $byPaperDifficulty[$bid] ?? [];
- $calibration = $this->buildCalibrationRecommendation(
- $norm,
- $strata,
- $calibrationMinAttempts,
- $alpha,
- $maxStep,
- $halfLifeDays
- );
- $merged[] = array_merge($row, [
- 'wrong_count' => max(0, (int) $row['attempts'] - (int) $row['correct_count']),
- 'bank_difficulty_normalized' => $norm,
- 'empirical_error_rate' => $emp,
- /** 实测错误率 − 题库难度(0–1):越大表示相对标定「更难做对」 */
- 'calibration_gap' => $gap,
- 'mistake_records_count' => $mistakeByBankId[$bid] ?? 0,
- 'paper_difficulty_breakdown' => $strata,
- 'calibration_weighted_error_rate' => $calibration['weighted_error_rate'],
- 'calibration_effective_attempts' => $calibration['effective_attempts'],
- 'calibration_recommendation' => $calibration['recommendation'],
- ]);
- }
- return [
- 'ok' => true,
- 'meta' => [
- 'min_attempts' => $minAttempts,
- 'since' => $since?->toIso8601String(),
- 'student_id' => $studentId,
- 'question_bank_id' => $questionBankId,
- 'question_rows' => count($perQuestion),
- 'note' => '无独立「学生逐题自评难易」字段;mistake_records 为错题本行数。下列「每题一行」为 paper_questions 已判分聚合。',
- 'calibration_constraints' => [
- 'stratified_by' => 'papers.difficulty_category',
- 'min_attempts' => $calibrationMinAttempts,
- 'alpha' => $alpha,
- 'max_step' => $maxStep,
- 'time_decay_half_life_days' => $halfLifeDays,
- ],
- ],
- 'summary' => [
- 'pearson_bank_difficulty_vs_empirical_error_rate' => $pearson,
- 'interpretation' => $this->interpretPearson($pearson),
- 'pearson_paper_difficulty_category_vs_incorrect' => $paperLevelRows['pearson_category_vs_incorrect'] ?? null,
- 'interpretation_paper_category' => $this->interpretPearson($paperLevelRows['pearson_category_vs_incorrect'] ?? null),
- ],
- 'bins_by_bank_difficulty' => $bins,
- 'paper_difficulty_category_vs_incorrect_rate' => $paperLevelRows,
- 'per_question' => $merged,
- ];
- }
- /**
- * @return list<array<string, mixed>>
- */
- private function aggregatePerQuestion(int $minAttempts, ?Carbon $since, ?string $studentId, ?int $questionBankId): array
- {
- $q = DB::table('paper_questions as pq')
- ->join('papers as p', 'p.paper_id', '=', 'pq.paper_id')
- ->leftJoin('questions as qu', 'qu.id', '=', 'pq.question_bank_id')
- ->whereNotNull('pq.is_correct')
- ->whereNotNull('pq.question_bank_id');
- if ($studentId !== null) {
- $q->where('p.student_id', $studentId);
- }
- if ($questionBankId !== null) {
- $q->where('pq.question_bank_id', $questionBankId);
- }
- if ($since !== null) {
- $q->where(function ($w) use ($since) {
- $w->where('pq.updated_at', '>=', $since)
- ->orWhere('pq.graded_at', '>=', $since);
- });
- }
- $rows = $q
- ->groupBy('pq.question_bank_id')
- ->havingRaw('COUNT(*) >= ?', [$minAttempts])
- ->selectRaw('
- pq.question_bank_id as question_bank_id,
- COUNT(*) as attempts,
- SUM(CASE WHEN pq.is_correct = 1 THEN 1 ELSE 0 END) as correct_count,
- AVG(pq.difficulty) as avg_paper_question_difficulty,
- MAX(qu.difficulty) as bank_difficulty,
- MAX(qu.question_code) as question_code
- ')
- ->get();
- return $rows->map(fn ($r) => [
- 'question_bank_id' => (int) $r->question_bank_id,
- 'question_code' => $r->question_code,
- 'attempts' => (int) $r->attempts,
- 'correct_count' => (int) $r->correct_count,
- 'accuracy' => $r->attempts > 0 ? round((int) $r->correct_count / (int) $r->attempts, 4) : null,
- 'avg_paper_question_difficulty' => $r->avg_paper_question_difficulty !== null ? (float) $r->avg_paper_question_difficulty : null,
- 'bank_difficulty' => $r->bank_difficulty !== null ? (float) $r->bank_difficulty : null,
- ])->all();
- }
- /**
- * 分层统计:每道题在不同 papers.difficulty_category 下的对错分布。
- *
- * @return array<int, list<array<string, mixed>>>
- */
- private function aggregatePerQuestionByPaperDifficulty(?Carbon $since, ?string $studentId, ?int $questionBankId): array
- {
- $q = DB::table('paper_questions as pq')
- ->join('papers as p', 'p.paper_id', '=', 'pq.paper_id')
- ->whereNotNull('pq.is_correct')
- ->whereNotNull('pq.question_bank_id');
- if ($studentId !== null) {
- $q->where('p.student_id', $studentId);
- }
- if ($questionBankId !== null) {
- $q->where('pq.question_bank_id', $questionBankId);
- }
- if ($since !== null) {
- $q->where(function ($w) use ($since) {
- $w->where('pq.updated_at', '>=', $since)
- ->orWhere('pq.graded_at', '>=', $since);
- });
- }
- $rows = $q->groupBy('pq.question_bank_id', 'p.difficulty_category')
- ->selectRaw('
- pq.question_bank_id as question_bank_id,
- p.difficulty_category as difficulty_category,
- COUNT(*) as attempts,
- SUM(CASE WHEN pq.is_correct = 1 THEN 1 ELSE 0 END) as correct_count,
- SUM(CASE WHEN pq.is_correct = 0 THEN 1 ELSE 0 END) as wrong_count,
- MAX(COALESCE(pq.graded_at, pq.updated_at, pq.created_at)) as last_answered_at
- ')
- ->get();
- $out = [];
- foreach ($rows as $r) {
- $bid = (int) $r->question_bank_id;
- $attempts = (int) $r->attempts;
- $wrong = (int) $r->wrong_count;
- $out[$bid] ??= [];
- $out[$bid][] = [
- 'difficulty_category' => $r->difficulty_category,
- 'difficulty_category_numeric' => self::parsePaperDifficultyCategory((string) ($r->difficulty_category ?? '')),
- 'attempts' => $attempts,
- 'correct_count' => (int) $r->correct_count,
- 'wrong_count' => $wrong,
- 'error_rate' => $attempts > 0 ? round($wrong / $attempts, 4) : null,
- 'last_answered_at' => $r->last_answered_at,
- ];
- }
- return $out;
- }
- /**
- * 逐条作答:学案 difficulty_category(解析为 0–4 等级,再 /4 归一化)与是否做错(0/1)的 Pearson 相关。
- *
- * @return array{n_rows: int, n_rows_with_category: int, pearson_category_vs_incorrect: ?float, by_category: list<array<string, mixed>>}
- */
- private function rowLevelPaperDifficultyVsOutcome(?Carbon $since, ?string $studentId): array
- {
- $q = DB::table('paper_questions as pq')
- ->join('papers as p', 'p.paper_id', '=', 'pq.paper_id')
- ->whereNotNull('pq.is_correct');
- if ($studentId !== null) {
- $q->where('p.student_id', $studentId);
- }
- if ($since !== null) {
- $q->where(function ($w) use ($since) {
- $w->where('pq.updated_at', '>=', $since)
- ->orWhere('pq.graded_at', '>=', $since);
- });
- }
- $rows = $q->select(['pq.is_correct', 'p.difficulty_category'])->get();
- $byCat = [];
- foreach ($rows as $r) {
- $cat = self::parsePaperDifficultyCategory($r->difficulty_category ?? null);
- $key = $cat === null ? '_unknown' : (string) $cat;
- if (! isset($byCat[$key])) {
- $byCat[$key] = ['category' => $cat, 'n' => 0, 'incorrect' => 0];
- }
- $byCat[$key]['n']++;
- $incorrect = ((int) $r->is_correct) === 0 ? 1 : 0;
- $byCat[$key]['incorrect'] += $incorrect;
- }
- $outBy = [];
- foreach ($byCat as $v) {
- $n = $v['n'];
- $outBy[] = [
- 'difficulty_category_numeric' => $v['category'],
- 'n' => $n,
- 'incorrect_rate' => $n > 0 ? round($v['incorrect'] / $n, 4) : null,
- ];
- }
- usort($outBy, fn ($a, $b) => ($a['difficulty_category_numeric'] ?? -1) <=> ($b['difficulty_category_numeric'] ?? -1));
- $xs = [];
- $ys = [];
- foreach ($rows as $r) {
- $cat = self::parsePaperDifficultyCategory($r->difficulty_category ?? null);
- if ($cat === null) {
- continue;
- }
- $xs[] = $cat / 4.0;
- $ys[] = ((int) $r->is_correct) === 0 ? 1.0 : 0.0;
- }
- return [
- 'n_rows' => $rows->count(),
- 'n_rows_with_category' => count($xs),
- 'pearson_category_vs_incorrect' => $this->pearsonCorrelation($xs, $ys),
- 'by_category' => $outBy,
- ];
- }
- /**
- * @return array<int, int> question_bank_id => mistake 行数(学生维度错题本条目)
- */
- private function mistakeCountsByQuestionBankId(?string $studentId): array
- {
- $mq = DB::table('mistake_records')
- ->selectRaw('question_id, COUNT(*) as c')
- ->groupBy('question_id');
- if ($studentId !== null) {
- $mq->where('student_id', $studentId);
- }
- $counts = $mq->pluck('c', 'question_id')->all();
- $byBank = [];
- foreach ($counts as $qid => $c) {
- if (! is_numeric($qid)) {
- continue;
- }
- $bankId = (int) $qid;
- $byBank[$bankId] = ($byBank[$bankId] ?? 0) + (int) $c;
- }
- return $byBank;
- }
- /**
- * @param list<array<string, mixed>> $perQuestion
- * @return list<array<string, mixed>>
- */
- private function binByDifficulty(array $perQuestion): array
- {
- $edges = [0.0, 0.25, 0.5, 0.75, 1.0];
- $bins = [];
- for ($i = 0; $i < count($edges) - 1; $i++) {
- $bins[] = [
- 'min' => $edges[$i],
- 'max' => $edges[$i + 1],
- 'n_questions' => 0,
- 'total_attempts' => 0,
- 'total_correct' => 0,
- 'mean_accuracy' => null,
- ];
- }
- foreach ($perQuestion as $row) {
- $d = self::normalizeDifficulty($row['bank_difficulty'] ?? null);
- if ($d === null) {
- continue;
- }
- // [0,0.25), [0.25,0.5), [0.5,0.75), [0.75,1.0]
- $binIdx = (int) floor(min(0.999999, max(0.0, $d)) / 0.25);
- if ($binIdx > 3) {
- $binIdx = 3;
- }
- if ($binIdx < 0) {
- $binIdx = 0;
- }
- $bins[$binIdx]['n_questions']++;
- $bins[$binIdx]['total_attempts'] += (int) $row['attempts'];
- $bins[$binIdx]['total_correct'] += (int) $row['correct_count'];
- }
- foreach ($bins as &$b) {
- if ($b['total_attempts'] > 0) {
- $b['mean_accuracy'] = round($b['total_correct'] / $b['total_attempts'], 4);
- }
- }
- unset($b);
- return $bins;
- }
- private function interpretPearson(?float $r): string
- {
- if ($r === null) {
- return '样本不足或难度无变异,无法计算相关系数。';
- }
- if ($r > 0.15) {
- return '题库难度与实测错误率呈正相关:标定越高的题,学生越容易错,方向符合预期。';
- }
- if ($r < -0.15) {
- return '出现负相关:标定「难」的题反而正确率更高,建议检查标定、题型或样本偏差。';
- }
- return '相关较弱:标定难度与实测区分度不明显,可能样本量、标定噪声或题目同质性导致。';
- }
- /**
- * 将 papers.difficulty_category 解析为 0–4 的等级,再归一化到 0–1(便于与 0–1 题库难度对照)。
- */
- public static function parsePaperDifficultyCategory(?string $raw): ?float
- {
- if ($raw === null) {
- return null;
- }
- $s = strtolower(trim((string) $raw));
- if ($s === '') {
- return null;
- }
- if (is_numeric($s)) {
- $n = (int) $s;
- return (float) max(0, min(4, $n));
- }
- // 与业务侧 0–4 档一致:0 基础 / 1 筑基 / 2 提分 / 3 培优 / 4 竞赛(与 MasteryCalculator 区间命名对齐)
- $level = match ($s) {
- '0', '零基础', '0基础', '基础', '0级' => 0.0,
- '1', '筑基' => 1.0,
- '2', '进阶', '中等', '提分' => 2.0,
- '3', '培优' => 3.0,
- '4', '竞赛' => 4.0,
- default => null,
- };
- return $level;
- }
- public static function normalizeDifficulty(?float $d): ?float
- {
- if ($d === null) {
- return null;
- }
- $f = (float) $d;
- return $f > 1.0 ? $f / 5.0 : $f;
- }
- /**
- * @param list<float> $x
- * @param list<float> $y
- */
- private function pearsonCorrelation(array $x, array $y): ?float
- {
- $n = count($x);
- if ($n < 3 || count($y) !== $n) {
- return null;
- }
- $mx = array_sum($x) / $n;
- $my = array_sum($y) / $n;
- $num = 0.0;
- $dx = 0.0;
- $dy = 0.0;
- for ($i = 0; $i < $n; $i++) {
- $vx = $x[$i] - $mx;
- $vy = $y[$i] - $my;
- $num += $vx * $vy;
- $dx += $vx * $vx;
- $dy += $vy * $vy;
- }
- $den = sqrt($dx * $dy);
- return $den > 1e-12 ? round($num / $den, 4) : null;
- }
- /**
- * 在四条硬约束下给出每题的动态难度建议。
- *
- * 约束:
- * 1) 分层:先按 papers.difficulty_category 切分;
- * 2) 样本门槛:有效样本不足则不动;
- * 3) 平滑 + 限幅:delta = clip(alpha * gap, -maxStep, maxStep);
- * 4) 时间衰减:分层样本按最近作答时间加权(半衰期 halfLifeDays)。
- *
- * @param list<array<string, mixed>> $strata
- * @return array{
- * weighted_error_rate:?float,
- * effective_attempts:float,
- * recommendation:array{
- * action:string,
- * reason:string,
- * gap:?float,
- * delta:?float,
- * suggested_difficulty:?float
- * }
- * }
- */
- private function buildCalibrationRecommendation(
- ?float $bankDifficultyNormalized,
- array $strata,
- int $minAttempts,
- float $alpha,
- float $maxStep,
- int $halfLifeDays
- ): array {
- if ($bankDifficultyNormalized === null) {
- return [
- 'weighted_error_rate' => null,
- 'effective_attempts' => 0.0,
- 'recommendation' => [
- 'action' => 'hold',
- 'reason' => '题库难度为空,无法计算建议。',
- 'gap' => null,
- 'delta' => null,
- 'suggested_difficulty' => null,
- ],
- ];
- }
- $now = Carbon::now();
- $weightedAttempts = 0.0;
- $weightedWrong = 0.0;
- foreach ($strata as $s) {
- $attempts = (int) ($s['attempts'] ?? 0);
- $wrong = (int) ($s['wrong_count'] ?? 0);
- if ($attempts <= 0) {
- continue;
- }
- $lastAtRaw = $s['last_answered_at'] ?? null;
- $days = 0.0;
- if ($lastAtRaw) {
- try {
- $lastAt = Carbon::parse((string) $lastAtRaw);
- $days = max(0.0, (float) $lastAt->diffInDays($now));
- } catch (\Throwable) {
- $days = 0.0;
- }
- }
- $w = pow(0.5, $days / $halfLifeDays);
- $weightedAttempts += $attempts * $w;
- $weightedWrong += $wrong * $w;
- }
- if ($weightedAttempts <= 0.0) {
- return [
- 'weighted_error_rate' => null,
- 'effective_attempts' => 0.0,
- 'recommendation' => [
- 'action' => 'hold',
- 'reason' => '无有效样本,保持不变。',
- 'gap' => null,
- 'delta' => null,
- 'suggested_difficulty' => round($bankDifficultyNormalized, 4),
- ],
- ];
- }
- $weightedErrorRate = $weightedWrong / $weightedAttempts;
- $gap = $weightedErrorRate - $bankDifficultyNormalized;
- if ($weightedAttempts < $minAttempts) {
- return [
- 'weighted_error_rate' => round($weightedErrorRate, 4),
- 'effective_attempts' => round($weightedAttempts, 2),
- 'recommendation' => [
- 'action' => 'hold',
- 'reason' => '有效样本不足门槛 '.$minAttempts.',仅观测不调整。',
- 'gap' => round($gap, 4),
- 'delta' => 0.0,
- 'suggested_difficulty' => round($bankDifficultyNormalized, 4),
- ],
- ];
- }
- $delta = max(-$maxStep, min($maxStep, $alpha * $gap));
- $suggested = max(0.0, min(1.0, $bankDifficultyNormalized + $delta));
- $eps = 1e-6;
- $action = $delta > $eps ? 'increase' : ($delta < -$eps ? 'decrease' : 'hold');
- $reason = match ($action) {
- 'increase' => '实测(分层+时衰)错误率高于标定,建议小步上调。',
- 'decrease' => '实测(分层+时衰)错误率低于标定,建议小步下调。',
- default => 'gap 接近 0,建议保持不变。',
- };
- return [
- 'weighted_error_rate' => round($weightedErrorRate, 4),
- 'effective_attempts' => round($weightedAttempts, 2),
- 'recommendation' => [
- 'action' => $action,
- 'reason' => $reason,
- 'gap' => round($gap, 4),
- 'delta' => round($delta, 4),
- 'suggested_difficulty' => round($suggested, 4),
- ],
- ];
- }
- }
|