*/ public function run(array $options = []): array { $minAttempts = max(1, (int) ($options['min_attempts'] ?? 5)); $since = $options['since'] ?? null; $includeMistakes = (bool) ($options['include_mistakes'] ?? true); $studentId = isset($options['student_id']) && $options['student_id'] !== '' && $options['student_id'] !== null ? (string) $options['student_id'] : null; $questionBankId = isset($options['question_bank_id']) ? (int) $options['question_bank_id'] : null; if ($questionBankId === 0) { $questionBankId = null; } $questionCode = isset($options['question_code']) ? trim((string) $options['question_code']) : ''; if ($questionCode !== '' && Schema::hasTable('questions')) { $resolved = DB::table('questions')->where('question_code', $questionCode)->value('id'); if ($resolved === null) { return [ 'ok' => false, 'error' => '未找到 question_code='.$questionCode.' 对应的题库题目', ]; } $questionBankId = (int) $resolved; } if (! Schema::hasTable('paper_questions') || ! Schema::hasTable('papers')) { return [ 'ok' => false, 'error' => '缺少必要数据表 paper_questions 或 papers', ]; } // 4 条硬约束参数(可通过命令行覆盖) $calibrationMinAttempts = max(1, (int) ($options['calibration_min_attempts'] ?? 10)); $alpha = (float) ($options['alpha'] ?? 0.2); $alpha = max(0.01, min(1.0, $alpha)); $maxStep = (float) ($options['max_step'] ?? 0.03); $maxStep = max(0.001, min(0.2, $maxStep)); $halfLifeDays = max(1, (int) ($options['half_life_days'] ?? 30)); $perQuestion = $this->aggregatePerQuestion($minAttempts, $since, $studentId, $questionBankId); $byPaperDifficulty = $this->aggregatePerQuestionByPaperDifficulty($since, $studentId, $questionBankId); $bankDiffs = []; $errorRates = []; foreach ($perQuestion as $row) { $d = self::normalizeDifficulty($row['bank_difficulty'] ?? null); if ($d === null) { continue; } $n = (int) $row['attempts']; if ($n < 1) { continue; } $acc = (float) $row['correct_count'] / $n; $bankDiffs[] = $d; $errorRates[] = 1.0 - $acc; } $bins = $this->binByDifficulty($perQuestion); $pearson = $this->pearsonCorrelation($bankDiffs, $errorRates); $paperLevelRows = $this->rowLevelPaperDifficultyVsOutcome($since, $studentId); $mistakeByBankId = []; if ($includeMistakes && Schema::hasTable('mistake_records')) { $mistakeByBankId = $this->mistakeCountsByQuestionBankId($studentId); } $merged = []; foreach ($perQuestion as $row) { $bid = (int) $row['question_bank_id']; $norm = self::normalizeDifficulty($row['bank_difficulty'] ?? null); $emp = $row['attempts'] > 0 ? 1.0 - ((float) $row['correct_count'] / (int) $row['attempts']) : null; $gap = ($emp !== null && $norm !== null) ? round($emp - $norm, 4) : null; $strata = $byPaperDifficulty[$bid] ?? []; $calibration = $this->buildCalibrationRecommendation( $norm, $strata, $calibrationMinAttempts, $alpha, $maxStep, $halfLifeDays ); $merged[] = array_merge($row, [ 'wrong_count' => max(0, (int) $row['attempts'] - (int) $row['correct_count']), 'bank_difficulty_normalized' => $norm, 'empirical_error_rate' => $emp, /** 实测错误率 − 题库难度(0–1):越大表示相对标定「更难做对」 */ 'calibration_gap' => $gap, 'mistake_records_count' => $mistakeByBankId[$bid] ?? 0, 'paper_difficulty_breakdown' => $strata, 'calibration_weighted_error_rate' => $calibration['weighted_error_rate'], 'calibration_effective_attempts' => $calibration['effective_attempts'], 'calibration_recommendation' => $calibration['recommendation'], ]); } return [ 'ok' => true, 'meta' => [ 'min_attempts' => $minAttempts, 'since' => $since?->toIso8601String(), 'student_id' => $studentId, 'question_bank_id' => $questionBankId, 'question_rows' => count($perQuestion), 'note' => '无独立「学生逐题自评难易」字段;mistake_records 为错题本行数。下列「每题一行」为 paper_questions 已判分聚合。', 'calibration_constraints' => [ 'stratified_by' => 'papers.difficulty_category', 'min_attempts' => $calibrationMinAttempts, 'alpha' => $alpha, 'max_step' => $maxStep, 'time_decay_half_life_days' => $halfLifeDays, ], ], 'summary' => [ 'pearson_bank_difficulty_vs_empirical_error_rate' => $pearson, 'interpretation' => $this->interpretPearson($pearson), 'pearson_paper_difficulty_category_vs_incorrect' => $paperLevelRows['pearson_category_vs_incorrect'] ?? null, 'interpretation_paper_category' => $this->interpretPearson($paperLevelRows['pearson_category_vs_incorrect'] ?? null), ], 'bins_by_bank_difficulty' => $bins, 'paper_difficulty_category_vs_incorrect_rate' => $paperLevelRows, 'per_question' => $merged, ]; } /** * @return list> */ private function aggregatePerQuestion(int $minAttempts, ?Carbon $since, ?string $studentId, ?int $questionBankId): array { $q = DB::table('paper_questions as pq') ->join('papers as p', 'p.paper_id', '=', 'pq.paper_id') ->leftJoin('questions as qu', 'qu.id', '=', 'pq.question_bank_id') ->whereNotNull('pq.is_correct') ->whereNotNull('pq.question_bank_id'); if ($studentId !== null) { $q->where('p.student_id', $studentId); } if ($questionBankId !== null) { $q->where('pq.question_bank_id', $questionBankId); } if ($since !== null) { $q->where(function ($w) use ($since) { $w->where('pq.updated_at', '>=', $since) ->orWhere('pq.graded_at', '>=', $since); }); } $rows = $q ->groupBy('pq.question_bank_id') ->havingRaw('COUNT(*) >= ?', [$minAttempts]) ->selectRaw(' pq.question_bank_id as question_bank_id, COUNT(*) as attempts, SUM(CASE WHEN pq.is_correct = 1 THEN 1 ELSE 0 END) as correct_count, AVG(pq.difficulty) as avg_paper_question_difficulty, MAX(qu.difficulty) as bank_difficulty, MAX(qu.question_code) as question_code ') ->get(); return $rows->map(fn ($r) => [ 'question_bank_id' => (int) $r->question_bank_id, 'question_code' => $r->question_code, 'attempts' => (int) $r->attempts, 'correct_count' => (int) $r->correct_count, 'accuracy' => $r->attempts > 0 ? round((int) $r->correct_count / (int) $r->attempts, 4) : null, 'avg_paper_question_difficulty' => $r->avg_paper_question_difficulty !== null ? (float) $r->avg_paper_question_difficulty : null, 'bank_difficulty' => $r->bank_difficulty !== null ? (float) $r->bank_difficulty : null, ])->all(); } /** * 分层统计:每道题在不同 papers.difficulty_category 下的对错分布。 * * @return array>> */ private function aggregatePerQuestionByPaperDifficulty(?Carbon $since, ?string $studentId, ?int $questionBankId): array { $q = DB::table('paper_questions as pq') ->join('papers as p', 'p.paper_id', '=', 'pq.paper_id') ->whereNotNull('pq.is_correct') ->whereNotNull('pq.question_bank_id'); if ($studentId !== null) { $q->where('p.student_id', $studentId); } if ($questionBankId !== null) { $q->where('pq.question_bank_id', $questionBankId); } if ($since !== null) { $q->where(function ($w) use ($since) { $w->where('pq.updated_at', '>=', $since) ->orWhere('pq.graded_at', '>=', $since); }); } $rows = $q->groupBy('pq.question_bank_id', 'p.difficulty_category') ->selectRaw(' pq.question_bank_id as question_bank_id, p.difficulty_category as difficulty_category, COUNT(*) as attempts, SUM(CASE WHEN pq.is_correct = 1 THEN 1 ELSE 0 END) as correct_count, SUM(CASE WHEN pq.is_correct = 0 THEN 1 ELSE 0 END) as wrong_count, MAX(COALESCE(pq.graded_at, pq.updated_at, pq.created_at)) as last_answered_at ') ->get(); $out = []; foreach ($rows as $r) { $bid = (int) $r->question_bank_id; $attempts = (int) $r->attempts; $wrong = (int) $r->wrong_count; $out[$bid] ??= []; $out[$bid][] = [ 'difficulty_category' => $r->difficulty_category, 'difficulty_category_numeric' => self::parsePaperDifficultyCategory((string) ($r->difficulty_category ?? '')), 'attempts' => $attempts, 'correct_count' => (int) $r->correct_count, 'wrong_count' => $wrong, 'error_rate' => $attempts > 0 ? round($wrong / $attempts, 4) : null, 'last_answered_at' => $r->last_answered_at, ]; } return $out; } /** * 逐条作答:学案 difficulty_category(解析为 0–4 等级,再 /4 归一化)与是否做错(0/1)的 Pearson 相关。 * * @return array{n_rows: int, n_rows_with_category: int, pearson_category_vs_incorrect: ?float, by_category: list>} */ private function rowLevelPaperDifficultyVsOutcome(?Carbon $since, ?string $studentId): array { $q = DB::table('paper_questions as pq') ->join('papers as p', 'p.paper_id', '=', 'pq.paper_id') ->whereNotNull('pq.is_correct'); if ($studentId !== null) { $q->where('p.student_id', $studentId); } if ($since !== null) { $q->where(function ($w) use ($since) { $w->where('pq.updated_at', '>=', $since) ->orWhere('pq.graded_at', '>=', $since); }); } $rows = $q->select(['pq.is_correct', 'p.difficulty_category'])->get(); $byCat = []; foreach ($rows as $r) { $cat = self::parsePaperDifficultyCategory($r->difficulty_category ?? null); $key = $cat === null ? '_unknown' : (string) $cat; if (! isset($byCat[$key])) { $byCat[$key] = ['category' => $cat, 'n' => 0, 'incorrect' => 0]; } $byCat[$key]['n']++; $incorrect = ((int) $r->is_correct) === 0 ? 1 : 0; $byCat[$key]['incorrect'] += $incorrect; } $outBy = []; foreach ($byCat as $v) { $n = $v['n']; $outBy[] = [ 'difficulty_category_numeric' => $v['category'], 'n' => $n, 'incorrect_rate' => $n > 0 ? round($v['incorrect'] / $n, 4) : null, ]; } usort($outBy, fn ($a, $b) => ($a['difficulty_category_numeric'] ?? -1) <=> ($b['difficulty_category_numeric'] ?? -1)); $xs = []; $ys = []; foreach ($rows as $r) { $cat = self::parsePaperDifficultyCategory($r->difficulty_category ?? null); if ($cat === null) { continue; } $xs[] = $cat / 4.0; $ys[] = ((int) $r->is_correct) === 0 ? 1.0 : 0.0; } return [ 'n_rows' => $rows->count(), 'n_rows_with_category' => count($xs), 'pearson_category_vs_incorrect' => $this->pearsonCorrelation($xs, $ys), 'by_category' => $outBy, ]; } /** * @return array question_bank_id => mistake 行数(学生维度错题本条目) */ private function mistakeCountsByQuestionBankId(?string $studentId): array { $mq = DB::table('mistake_records') ->selectRaw('question_id, COUNT(*) as c') ->groupBy('question_id'); if ($studentId !== null) { $mq->where('student_id', $studentId); } $counts = $mq->pluck('c', 'question_id')->all(); $byBank = []; foreach ($counts as $qid => $c) { if (! is_numeric($qid)) { continue; } $bankId = (int) $qid; $byBank[$bankId] = ($byBank[$bankId] ?? 0) + (int) $c; } return $byBank; } /** * @param list> $perQuestion * @return list> */ private function binByDifficulty(array $perQuestion): array { $edges = [0.0, 0.25, 0.5, 0.75, 1.0]; $bins = []; for ($i = 0; $i < count($edges) - 1; $i++) { $bins[] = [ 'min' => $edges[$i], 'max' => $edges[$i + 1], 'n_questions' => 0, 'total_attempts' => 0, 'total_correct' => 0, 'mean_accuracy' => null, ]; } foreach ($perQuestion as $row) { $d = self::normalizeDifficulty($row['bank_difficulty'] ?? null); if ($d === null) { continue; } // [0,0.25), [0.25,0.5), [0.5,0.75), [0.75,1.0] $binIdx = (int) floor(min(0.999999, max(0.0, $d)) / 0.25); if ($binIdx > 3) { $binIdx = 3; } if ($binIdx < 0) { $binIdx = 0; } $bins[$binIdx]['n_questions']++; $bins[$binIdx]['total_attempts'] += (int) $row['attempts']; $bins[$binIdx]['total_correct'] += (int) $row['correct_count']; } foreach ($bins as &$b) { if ($b['total_attempts'] > 0) { $b['mean_accuracy'] = round($b['total_correct'] / $b['total_attempts'], 4); } } unset($b); return $bins; } private function interpretPearson(?float $r): string { if ($r === null) { return '样本不足或难度无变异,无法计算相关系数。'; } if ($r > 0.15) { return '题库难度与实测错误率呈正相关:标定越高的题,学生越容易错,方向符合预期。'; } if ($r < -0.15) { return '出现负相关:标定「难」的题反而正确率更高,建议检查标定、题型或样本偏差。'; } return '相关较弱:标定难度与实测区分度不明显,可能样本量、标定噪声或题目同质性导致。'; } /** * 将 papers.difficulty_category 解析为 0–4 的等级,再归一化到 0–1(便于与 0–1 题库难度对照)。 */ public static function parsePaperDifficultyCategory(?string $raw): ?float { if ($raw === null) { return null; } $s = strtolower(trim((string) $raw)); if ($s === '') { return null; } if (is_numeric($s)) { $n = (int) $s; return (float) max(0, min(4, $n)); } // 与业务侧 0–4 档一致:0 基础 / 1 筑基 / 2 提分 / 3 培优 / 4 竞赛(与 MasteryCalculator 区间命名对齐) $level = match ($s) { '0', '零基础', '0基础', '基础', '0级' => 0.0, '1', '筑基' => 1.0, '2', '进阶', '中等', '提分' => 2.0, '3', '培优' => 3.0, '4', '竞赛' => 4.0, default => null, }; return $level; } public static function normalizeDifficulty(?float $d): ?float { if ($d === null) { return null; } $f = (float) $d; return $f > 1.0 ? $f / 5.0 : $f; } /** * @param list $x * @param list $y */ private function pearsonCorrelation(array $x, array $y): ?float { $n = count($x); if ($n < 3 || count($y) !== $n) { return null; } $mx = array_sum($x) / $n; $my = array_sum($y) / $n; $num = 0.0; $dx = 0.0; $dy = 0.0; for ($i = 0; $i < $n; $i++) { $vx = $x[$i] - $mx; $vy = $y[$i] - $my; $num += $vx * $vy; $dx += $vx * $vx; $dy += $vy * $vy; } $den = sqrt($dx * $dy); return $den > 1e-12 ? round($num / $den, 4) : null; } /** * 在四条硬约束下给出每题的动态难度建议。 * * 约束: * 1) 分层:先按 papers.difficulty_category 切分; * 2) 样本门槛:有效样本不足则不动; * 3) 平滑 + 限幅:delta = clip(alpha * gap, -maxStep, maxStep); * 4) 时间衰减:分层样本按最近作答时间加权(半衰期 halfLifeDays)。 * * @param list> $strata * @return array{ * weighted_error_rate:?float, * effective_attempts:float, * recommendation:array{ * action:string, * reason:string, * gap:?float, * delta:?float, * suggested_difficulty:?float * } * } */ private function buildCalibrationRecommendation( ?float $bankDifficultyNormalized, array $strata, int $minAttempts, float $alpha, float $maxStep, int $halfLifeDays ): array { if ($bankDifficultyNormalized === null) { return [ 'weighted_error_rate' => null, 'effective_attempts' => 0.0, 'recommendation' => [ 'action' => 'hold', 'reason' => '题库难度为空,无法计算建议。', 'gap' => null, 'delta' => null, 'suggested_difficulty' => null, ], ]; } $now = Carbon::now(); $weightedAttempts = 0.0; $weightedWrong = 0.0; foreach ($strata as $s) { $attempts = (int) ($s['attempts'] ?? 0); $wrong = (int) ($s['wrong_count'] ?? 0); if ($attempts <= 0) { continue; } $lastAtRaw = $s['last_answered_at'] ?? null; $days = 0.0; if ($lastAtRaw) { try { $lastAt = Carbon::parse((string) $lastAtRaw); $days = max(0.0, (float) $lastAt->diffInDays($now)); } catch (\Throwable) { $days = 0.0; } } $w = pow(0.5, $days / $halfLifeDays); $weightedAttempts += $attempts * $w; $weightedWrong += $wrong * $w; } if ($weightedAttempts <= 0.0) { return [ 'weighted_error_rate' => null, 'effective_attempts' => 0.0, 'recommendation' => [ 'action' => 'hold', 'reason' => '无有效样本,保持不变。', 'gap' => null, 'delta' => null, 'suggested_difficulty' => round($bankDifficultyNormalized, 4), ], ]; } $weightedErrorRate = $weightedWrong / $weightedAttempts; $gap = $weightedErrorRate - $bankDifficultyNormalized; if ($weightedAttempts < $minAttempts) { return [ 'weighted_error_rate' => round($weightedErrorRate, 4), 'effective_attempts' => round($weightedAttempts, 2), 'recommendation' => [ 'action' => 'hold', 'reason' => '有效样本不足门槛 '.$minAttempts.',仅观测不调整。', 'gap' => round($gap, 4), 'delta' => 0.0, 'suggested_difficulty' => round($bankDifficultyNormalized, 4), ], ]; } $delta = max(-$maxStep, min($maxStep, $alpha * $gap)); $suggested = max(0.0, min(1.0, $bankDifficultyNormalized + $delta)); $eps = 1e-6; $action = $delta > $eps ? 'increase' : ($delta < -$eps ? 'decrease' : 'hold'); $reason = match ($action) { 'increase' => '实测(分层+时衰)错误率高于标定,建议小步上调。', 'decrease' => '实测(分层+时衰)错误率低于标定,建议小步下调。', default => 'gap 接近 0,建议保持不变。', }; return [ 'weighted_error_rate' => round($weightedErrorRate, 4), 'effective_attempts' => round($weightedAttempts, 2), 'recommendation' => [ 'action' => $action, 'reason' => $reason, 'gap' => round($gap, 4), 'delta' => round($delta, 4), 'suggested_difficulty' => round($suggested, 4), ], ]; } }