|
@@ -0,0 +1,608 @@
|
|
|
|
|
+<?php
|
|
|
|
|
+
|
|
|
|
|
+namespace App\Services\Analytics;
|
|
|
|
|
+
|
|
|
|
|
+use Illuminate\Support\Carbon;
|
|
|
|
|
+use Illuminate\Support\Facades\DB;
|
|
|
|
|
+use Illuminate\Support\Facades\Schema;
|
|
|
|
|
+
|
|
|
|
|
+/**
|
|
|
|
|
+ * 从做题与错题数据抽取「题库标定难度 vs 实测正确率」等指标,用于检验难度体系是否合理。
|
|
|
|
|
+ *
|
|
|
|
|
+ * 说明:当前库表未见独立「学生逐题自评难易」字段;{@see self::parsePaperDifficultyCategory()}
|
|
|
|
|
+ * 将 papers.difficulty_category 解析为数值,作为「本次练习/学案侧难度选择」的代理变量。
|
|
|
|
|
+ */
|
|
|
|
|
+class QuestionDifficultyCalibrationAnalyzer
|
|
|
|
|
+{
|
|
|
|
|
+ /**
|
|
|
|
|
+ * @param array{
|
|
|
|
|
+ * min_attempts?: int,
|
|
|
|
|
+ * since?: Carbon|null,
|
|
|
|
|
+ * include_mistakes?: bool,
|
|
|
|
|
+ * student_id?: string|int|null,
|
|
|
|
|
+ * question_bank_id?: int|null,
|
|
|
|
|
+ * question_code?: string|null,
|
|
|
|
|
+ * calibration_min_attempts?: int,
|
|
|
|
|
+ * alpha?: float,
|
|
|
|
|
+ * max_step?: float,
|
|
|
|
|
+ * half_life_days?: int
|
|
|
|
|
+ * } $options
|
|
|
|
|
+ * @return array<string, mixed>
|
|
|
|
|
+ */
|
|
|
|
|
+ public function run(array $options = []): array
|
|
|
|
|
+ {
|
|
|
|
|
+ $minAttempts = max(1, (int) ($options['min_attempts'] ?? 5));
|
|
|
|
|
+ $since = $options['since'] ?? null;
|
|
|
|
|
+ $includeMistakes = (bool) ($options['include_mistakes'] ?? true);
|
|
|
|
|
+ $studentId = isset($options['student_id']) && $options['student_id'] !== '' && $options['student_id'] !== null
|
|
|
|
|
+ ? (string) $options['student_id']
|
|
|
|
|
+ : null;
|
|
|
|
|
+ $questionBankId = isset($options['question_bank_id']) ? (int) $options['question_bank_id'] : null;
|
|
|
|
|
+ if ($questionBankId === 0) {
|
|
|
|
|
+ $questionBankId = null;
|
|
|
|
|
+ }
|
|
|
|
|
+ $questionCode = isset($options['question_code']) ? trim((string) $options['question_code']) : '';
|
|
|
|
|
+ if ($questionCode !== '' && Schema::hasTable('questions')) {
|
|
|
|
|
+ $resolved = DB::table('questions')->where('question_code', $questionCode)->value('id');
|
|
|
|
|
+ if ($resolved === null) {
|
|
|
|
|
+ return [
|
|
|
|
|
+ 'ok' => false,
|
|
|
|
|
+ 'error' => '未找到 question_code='.$questionCode.' 对应的题库题目',
|
|
|
|
|
+ ];
|
|
|
|
|
+ }
|
|
|
|
|
+ $questionBankId = (int) $resolved;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if (! Schema::hasTable('paper_questions') || ! Schema::hasTable('papers')) {
|
|
|
|
|
+ return [
|
|
|
|
|
+ 'ok' => false,
|
|
|
|
|
+ 'error' => '缺少必要数据表 paper_questions 或 papers',
|
|
|
|
|
+ ];
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 4 条硬约束参数(可通过命令行覆盖)
|
|
|
|
|
+ $calibrationMinAttempts = max(1, (int) ($options['calibration_min_attempts'] ?? 10));
|
|
|
|
|
+ $alpha = (float) ($options['alpha'] ?? 0.2);
|
|
|
|
|
+ $alpha = max(0.01, min(1.0, $alpha));
|
|
|
|
|
+ $maxStep = (float) ($options['max_step'] ?? 0.03);
|
|
|
|
|
+ $maxStep = max(0.001, min(0.2, $maxStep));
|
|
|
|
|
+ $halfLifeDays = max(1, (int) ($options['half_life_days'] ?? 30));
|
|
|
|
|
+
|
|
|
|
|
+ $perQuestion = $this->aggregatePerQuestion($minAttempts, $since, $studentId, $questionBankId);
|
|
|
|
|
+ $byPaperDifficulty = $this->aggregatePerQuestionByPaperDifficulty($since, $studentId, $questionBankId);
|
|
|
|
|
+ $bankDiffs = [];
|
|
|
|
|
+ $errorRates = [];
|
|
|
|
|
+ foreach ($perQuestion as $row) {
|
|
|
|
|
+ $d = self::normalizeDifficulty($row['bank_difficulty'] ?? null);
|
|
|
|
|
+ if ($d === null) {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+ $n = (int) $row['attempts'];
|
|
|
|
|
+ if ($n < 1) {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+ $acc = (float) $row['correct_count'] / $n;
|
|
|
|
|
+ $bankDiffs[] = $d;
|
|
|
|
|
+ $errorRates[] = 1.0 - $acc;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ $bins = $this->binByDifficulty($perQuestion);
|
|
|
|
|
+ $pearson = $this->pearsonCorrelation($bankDiffs, $errorRates);
|
|
|
|
|
+
|
|
|
|
|
+ $paperLevelRows = $this->rowLevelPaperDifficultyVsOutcome($since, $studentId);
|
|
|
|
|
+
|
|
|
|
|
+ $mistakeByBankId = [];
|
|
|
|
|
+ if ($includeMistakes && Schema::hasTable('mistake_records')) {
|
|
|
|
|
+ $mistakeByBankId = $this->mistakeCountsByQuestionBankId($studentId);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ $merged = [];
|
|
|
|
|
+ foreach ($perQuestion as $row) {
|
|
|
|
|
+ $bid = (int) $row['question_bank_id'];
|
|
|
|
|
+ $norm = self::normalizeDifficulty($row['bank_difficulty'] ?? null);
|
|
|
|
|
+ $emp = $row['attempts'] > 0
|
|
|
|
|
+ ? 1.0 - ((float) $row['correct_count'] / (int) $row['attempts'])
|
|
|
|
|
+ : null;
|
|
|
|
|
+ $gap = ($emp !== null && $norm !== null) ? round($emp - $norm, 4) : null;
|
|
|
|
|
+ $strata = $byPaperDifficulty[$bid] ?? [];
|
|
|
|
|
+ $calibration = $this->buildCalibrationRecommendation(
|
|
|
|
|
+ $norm,
|
|
|
|
|
+ $strata,
|
|
|
|
|
+ $calibrationMinAttempts,
|
|
|
|
|
+ $alpha,
|
|
|
|
|
+ $maxStep,
|
|
|
|
|
+ $halfLifeDays
|
|
|
|
|
+ );
|
|
|
|
|
+ $merged[] = array_merge($row, [
|
|
|
|
|
+ 'wrong_count' => max(0, (int) $row['attempts'] - (int) $row['correct_count']),
|
|
|
|
|
+ 'bank_difficulty_normalized' => $norm,
|
|
|
|
|
+ 'empirical_error_rate' => $emp,
|
|
|
|
|
+ /** 实测错误率 − 题库难度(0–1):越大表示相对标定「更难做对」 */
|
|
|
|
|
+ 'calibration_gap' => $gap,
|
|
|
|
|
+ 'mistake_records_count' => $mistakeByBankId[$bid] ?? 0,
|
|
|
|
|
+ 'paper_difficulty_breakdown' => $strata,
|
|
|
|
|
+ 'calibration_weighted_error_rate' => $calibration['weighted_error_rate'],
|
|
|
|
|
+ 'calibration_effective_attempts' => $calibration['effective_attempts'],
|
|
|
|
|
+ 'calibration_recommendation' => $calibration['recommendation'],
|
|
|
|
|
+ ]);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return [
|
|
|
|
|
+ 'ok' => true,
|
|
|
|
|
+ 'meta' => [
|
|
|
|
|
+ 'min_attempts' => $minAttempts,
|
|
|
|
|
+ 'since' => $since?->toIso8601String(),
|
|
|
|
|
+ 'student_id' => $studentId,
|
|
|
|
|
+ 'question_bank_id' => $questionBankId,
|
|
|
|
|
+ 'question_rows' => count($perQuestion),
|
|
|
|
|
+ 'note' => '无独立「学生逐题自评难易」字段;mistake_records 为错题本行数。下列「每题一行」为 paper_questions 已判分聚合。',
|
|
|
|
|
+ 'calibration_constraints' => [
|
|
|
|
|
+ 'stratified_by' => 'papers.difficulty_category',
|
|
|
|
|
+ 'min_attempts' => $calibrationMinAttempts,
|
|
|
|
|
+ 'alpha' => $alpha,
|
|
|
|
|
+ 'max_step' => $maxStep,
|
|
|
|
|
+ 'time_decay_half_life_days' => $halfLifeDays,
|
|
|
|
|
+ ],
|
|
|
|
|
+ ],
|
|
|
|
|
+ 'summary' => [
|
|
|
|
|
+ 'pearson_bank_difficulty_vs_empirical_error_rate' => $pearson,
|
|
|
|
|
+ 'interpretation' => $this->interpretPearson($pearson),
|
|
|
|
|
+ 'pearson_paper_difficulty_category_vs_incorrect' => $paperLevelRows['pearson_category_vs_incorrect'] ?? null,
|
|
|
|
|
+ 'interpretation_paper_category' => $this->interpretPearson($paperLevelRows['pearson_category_vs_incorrect'] ?? null),
|
|
|
|
|
+ ],
|
|
|
|
|
+ 'bins_by_bank_difficulty' => $bins,
|
|
|
|
|
+ 'paper_difficulty_category_vs_incorrect_rate' => $paperLevelRows,
|
|
|
|
|
+ 'per_question' => $merged,
|
|
|
|
|
+ ];
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * @return list<array<string, mixed>>
|
|
|
|
|
+ */
|
|
|
|
|
+ private function aggregatePerQuestion(int $minAttempts, ?Carbon $since, ?string $studentId, ?int $questionBankId): array
|
|
|
|
|
+ {
|
|
|
|
|
+ $q = DB::table('paper_questions as pq')
|
|
|
|
|
+ ->join('papers as p', 'p.paper_id', '=', 'pq.paper_id')
|
|
|
|
|
+ ->leftJoin('questions as qu', 'qu.id', '=', 'pq.question_bank_id')
|
|
|
|
|
+ ->whereNotNull('pq.is_correct')
|
|
|
|
|
+ ->whereNotNull('pq.question_bank_id');
|
|
|
|
|
+
|
|
|
|
|
+ if ($studentId !== null) {
|
|
|
|
|
+ $q->where('p.student_id', $studentId);
|
|
|
|
|
+ }
|
|
|
|
|
+ if ($questionBankId !== null) {
|
|
|
|
|
+ $q->where('pq.question_bank_id', $questionBankId);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if ($since !== null) {
|
|
|
|
|
+ $q->where(function ($w) use ($since) {
|
|
|
|
|
+ $w->where('pq.updated_at', '>=', $since)
|
|
|
|
|
+ ->orWhere('pq.graded_at', '>=', $since);
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ $rows = $q
|
|
|
|
|
+ ->groupBy('pq.question_bank_id')
|
|
|
|
|
+ ->havingRaw('COUNT(*) >= ?', [$minAttempts])
|
|
|
|
|
+ ->selectRaw('
|
|
|
|
|
+ pq.question_bank_id as question_bank_id,
|
|
|
|
|
+ COUNT(*) as attempts,
|
|
|
|
|
+ SUM(CASE WHEN pq.is_correct = 1 THEN 1 ELSE 0 END) as correct_count,
|
|
|
|
|
+ AVG(pq.difficulty) as avg_paper_question_difficulty,
|
|
|
|
|
+ MAX(qu.difficulty) as bank_difficulty,
|
|
|
|
|
+ MAX(qu.question_code) as question_code
|
|
|
|
|
+ ')
|
|
|
|
|
+ ->get();
|
|
|
|
|
+
|
|
|
|
|
+ return $rows->map(fn ($r) => [
|
|
|
|
|
+ 'question_bank_id' => (int) $r->question_bank_id,
|
|
|
|
|
+ 'question_code' => $r->question_code,
|
|
|
|
|
+ 'attempts' => (int) $r->attempts,
|
|
|
|
|
+ 'correct_count' => (int) $r->correct_count,
|
|
|
|
|
+ 'accuracy' => $r->attempts > 0 ? round((int) $r->correct_count / (int) $r->attempts, 4) : null,
|
|
|
|
|
+ 'avg_paper_question_difficulty' => $r->avg_paper_question_difficulty !== null ? (float) $r->avg_paper_question_difficulty : null,
|
|
|
|
|
+ 'bank_difficulty' => $r->bank_difficulty !== null ? (float) $r->bank_difficulty : null,
|
|
|
|
|
+ ])->all();
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 分层统计:每道题在不同 papers.difficulty_category 下的对错分布。
|
|
|
|
|
+ *
|
|
|
|
|
+ * @return array<int, list<array<string, mixed>>>
|
|
|
|
|
+ */
|
|
|
|
|
+ private function aggregatePerQuestionByPaperDifficulty(?Carbon $since, ?string $studentId, ?int $questionBankId): array
|
|
|
|
|
+ {
|
|
|
|
|
+ $q = DB::table('paper_questions as pq')
|
|
|
|
|
+ ->join('papers as p', 'p.paper_id', '=', 'pq.paper_id')
|
|
|
|
|
+ ->whereNotNull('pq.is_correct')
|
|
|
|
|
+ ->whereNotNull('pq.question_bank_id');
|
|
|
|
|
+
|
|
|
|
|
+ if ($studentId !== null) {
|
|
|
|
|
+ $q->where('p.student_id', $studentId);
|
|
|
|
|
+ }
|
|
|
|
|
+ if ($questionBankId !== null) {
|
|
|
|
|
+ $q->where('pq.question_bank_id', $questionBankId);
|
|
|
|
|
+ }
|
|
|
|
|
+ if ($since !== null) {
|
|
|
|
|
+ $q->where(function ($w) use ($since) {
|
|
|
|
|
+ $w->where('pq.updated_at', '>=', $since)
|
|
|
|
|
+ ->orWhere('pq.graded_at', '>=', $since);
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ $rows = $q->groupBy('pq.question_bank_id', 'p.difficulty_category')
|
|
|
|
|
+ ->selectRaw('
|
|
|
|
|
+ pq.question_bank_id as question_bank_id,
|
|
|
|
|
+ p.difficulty_category as difficulty_category,
|
|
|
|
|
+ COUNT(*) as attempts,
|
|
|
|
|
+ SUM(CASE WHEN pq.is_correct = 1 THEN 1 ELSE 0 END) as correct_count,
|
|
|
|
|
+ SUM(CASE WHEN pq.is_correct = 0 THEN 1 ELSE 0 END) as wrong_count,
|
|
|
|
|
+ MAX(COALESCE(pq.graded_at, pq.updated_at, pq.created_at)) as last_answered_at
|
|
|
|
|
+ ')
|
|
|
|
|
+ ->get();
|
|
|
|
|
+
|
|
|
|
|
+ $out = [];
|
|
|
|
|
+ foreach ($rows as $r) {
|
|
|
|
|
+ $bid = (int) $r->question_bank_id;
|
|
|
|
|
+ $attempts = (int) $r->attempts;
|
|
|
|
|
+ $wrong = (int) $r->wrong_count;
|
|
|
|
|
+ $out[$bid] ??= [];
|
|
|
|
|
+ $out[$bid][] = [
|
|
|
|
|
+ 'difficulty_category' => $r->difficulty_category,
|
|
|
|
|
+ 'difficulty_category_numeric' => self::parsePaperDifficultyCategory((string) ($r->difficulty_category ?? '')),
|
|
|
|
|
+ 'attempts' => $attempts,
|
|
|
|
|
+ 'correct_count' => (int) $r->correct_count,
|
|
|
|
|
+ 'wrong_count' => $wrong,
|
|
|
|
|
+ 'error_rate' => $attempts > 0 ? round($wrong / $attempts, 4) : null,
|
|
|
|
|
+ 'last_answered_at' => $r->last_answered_at,
|
|
|
|
|
+ ];
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return $out;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 逐条作答:学案 difficulty_category(解析为 0–4 等级,再 /4 归一化)与是否做错(0/1)的 Pearson 相关。
|
|
|
|
|
+ *
|
|
|
|
|
+ * @return array{n_rows: int, n_rows_with_category: int, pearson_category_vs_incorrect: ?float, by_category: list<array<string, mixed>>}
|
|
|
|
|
+ */
|
|
|
|
|
+ private function rowLevelPaperDifficultyVsOutcome(?Carbon $since, ?string $studentId): array
|
|
|
|
|
+ {
|
|
|
|
|
+ $q = DB::table('paper_questions as pq')
|
|
|
|
|
+ ->join('papers as p', 'p.paper_id', '=', 'pq.paper_id')
|
|
|
|
|
+ ->whereNotNull('pq.is_correct');
|
|
|
|
|
+
|
|
|
|
|
+ if ($studentId !== null) {
|
|
|
|
|
+ $q->where('p.student_id', $studentId);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if ($since !== null) {
|
|
|
|
|
+ $q->where(function ($w) use ($since) {
|
|
|
|
|
+ $w->where('pq.updated_at', '>=', $since)
|
|
|
|
|
+ ->orWhere('pq.graded_at', '>=', $since);
|
|
|
|
|
+ });
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ $rows = $q->select(['pq.is_correct', 'p.difficulty_category'])->get();
|
|
|
|
|
+
|
|
|
|
|
+ $byCat = [];
|
|
|
|
|
+
|
|
|
|
|
+ foreach ($rows as $r) {
|
|
|
|
|
+ $cat = self::parsePaperDifficultyCategory($r->difficulty_category ?? null);
|
|
|
|
|
+ $key = $cat === null ? '_unknown' : (string) $cat;
|
|
|
|
|
+ if (! isset($byCat[$key])) {
|
|
|
|
|
+ $byCat[$key] = ['category' => $cat, 'n' => 0, 'incorrect' => 0];
|
|
|
|
|
+ }
|
|
|
|
|
+ $byCat[$key]['n']++;
|
|
|
|
|
+ $incorrect = ((int) $r->is_correct) === 0 ? 1 : 0;
|
|
|
|
|
+ $byCat[$key]['incorrect'] += $incorrect;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ $outBy = [];
|
|
|
|
|
+ foreach ($byCat as $v) {
|
|
|
|
|
+ $n = $v['n'];
|
|
|
|
|
+ $outBy[] = [
|
|
|
|
|
+ 'difficulty_category_numeric' => $v['category'],
|
|
|
|
|
+ 'n' => $n,
|
|
|
|
|
+ 'incorrect_rate' => $n > 0 ? round($v['incorrect'] / $n, 4) : null,
|
|
|
|
|
+ ];
|
|
|
|
|
+ }
|
|
|
|
|
+ usort($outBy, fn ($a, $b) => ($a['difficulty_category_numeric'] ?? -1) <=> ($b['difficulty_category_numeric'] ?? -1));
|
|
|
|
|
+
|
|
|
|
|
+ $xs = [];
|
|
|
|
|
+ $ys = [];
|
|
|
|
|
+ foreach ($rows as $r) {
|
|
|
|
|
+ $cat = self::parsePaperDifficultyCategory($r->difficulty_category ?? null);
|
|
|
|
|
+ if ($cat === null) {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+ $xs[] = $cat / 4.0;
|
|
|
|
|
+ $ys[] = ((int) $r->is_correct) === 0 ? 1.0 : 0.0;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return [
|
|
|
|
|
+ 'n_rows' => $rows->count(),
|
|
|
|
|
+ 'n_rows_with_category' => count($xs),
|
|
|
|
|
+ 'pearson_category_vs_incorrect' => $this->pearsonCorrelation($xs, $ys),
|
|
|
|
|
+ 'by_category' => $outBy,
|
|
|
|
|
+ ];
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * @return array<int, int> question_bank_id => mistake 行数(学生维度错题本条目)
|
|
|
|
|
+ */
|
|
|
|
|
+ private function mistakeCountsByQuestionBankId(?string $studentId): array
|
|
|
|
|
+ {
|
|
|
|
|
+ $mq = DB::table('mistake_records')
|
|
|
|
|
+ ->selectRaw('question_id, COUNT(*) as c')
|
|
|
|
|
+ ->groupBy('question_id');
|
|
|
|
|
+ if ($studentId !== null) {
|
|
|
|
|
+ $mq->where('student_id', $studentId);
|
|
|
|
|
+ }
|
|
|
|
|
+ $counts = $mq->pluck('c', 'question_id')->all();
|
|
|
|
|
+
|
|
|
|
|
+ $byBank = [];
|
|
|
|
|
+ foreach ($counts as $qid => $c) {
|
|
|
|
|
+ if (! is_numeric($qid)) {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+ $bankId = (int) $qid;
|
|
|
|
|
+ $byBank[$bankId] = ($byBank[$bankId] ?? 0) + (int) $c;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return $byBank;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * @param list<array<string, mixed>> $perQuestion
|
|
|
|
|
+ * @return list<array<string, mixed>>
|
|
|
|
|
+ */
|
|
|
|
|
+ private function binByDifficulty(array $perQuestion): array
|
|
|
|
|
+ {
|
|
|
|
|
+ $edges = [0.0, 0.25, 0.5, 0.75, 1.0];
|
|
|
|
|
+ $bins = [];
|
|
|
|
|
+ for ($i = 0; $i < count($edges) - 1; $i++) {
|
|
|
|
|
+ $bins[] = [
|
|
|
|
|
+ 'min' => $edges[$i],
|
|
|
|
|
+ 'max' => $edges[$i + 1],
|
|
|
|
|
+ 'n_questions' => 0,
|
|
|
|
|
+ 'total_attempts' => 0,
|
|
|
|
|
+ 'total_correct' => 0,
|
|
|
|
|
+ 'mean_accuracy' => null,
|
|
|
|
|
+ ];
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ foreach ($perQuestion as $row) {
|
|
|
|
|
+ $d = self::normalizeDifficulty($row['bank_difficulty'] ?? null);
|
|
|
|
|
+ if ($d === null) {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+ // [0,0.25), [0.25,0.5), [0.5,0.75), [0.75,1.0]
|
|
|
|
|
+ $binIdx = (int) floor(min(0.999999, max(0.0, $d)) / 0.25);
|
|
|
|
|
+ if ($binIdx > 3) {
|
|
|
|
|
+ $binIdx = 3;
|
|
|
|
|
+ }
|
|
|
|
|
+ if ($binIdx < 0) {
|
|
|
|
|
+ $binIdx = 0;
|
|
|
|
|
+ }
|
|
|
|
|
+ $bins[$binIdx]['n_questions']++;
|
|
|
|
|
+ $bins[$binIdx]['total_attempts'] += (int) $row['attempts'];
|
|
|
|
|
+ $bins[$binIdx]['total_correct'] += (int) $row['correct_count'];
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ foreach ($bins as &$b) {
|
|
|
|
|
+ if ($b['total_attempts'] > 0) {
|
|
|
|
|
+ $b['mean_accuracy'] = round($b['total_correct'] / $b['total_attempts'], 4);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ unset($b);
|
|
|
|
|
+
|
|
|
|
|
+ return $bins;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private function interpretPearson(?float $r): string
|
|
|
|
|
+ {
|
|
|
|
|
+ if ($r === null) {
|
|
|
|
|
+ return '样本不足或难度无变异,无法计算相关系数。';
|
|
|
|
|
+ }
|
|
|
|
|
+ if ($r > 0.15) {
|
|
|
|
|
+ return '题库难度与实测错误率呈正相关:标定越高的题,学生越容易错,方向符合预期。';
|
|
|
|
|
+ }
|
|
|
|
|
+ if ($r < -0.15) {
|
|
|
|
|
+ return '出现负相关:标定「难」的题反而正确率更高,建议检查标定、题型或样本偏差。';
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return '相关较弱:标定难度与实测区分度不明显,可能样本量、标定噪声或题目同质性导致。';
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 将 papers.difficulty_category 解析为 0–4 的等级,再归一化到 0–1(便于与 0–1 题库难度对照)。
|
|
|
|
|
+ */
|
|
|
|
|
+ public static function parsePaperDifficultyCategory(?string $raw): ?float
|
|
|
|
|
+ {
|
|
|
|
|
+ if ($raw === null) {
|
|
|
|
|
+ return null;
|
|
|
|
|
+ }
|
|
|
|
|
+ $s = strtolower(trim((string) $raw));
|
|
|
|
|
+ if ($s === '') {
|
|
|
|
|
+ return null;
|
|
|
|
|
+ }
|
|
|
|
|
+ if (is_numeric($s)) {
|
|
|
|
|
+ $n = (int) $s;
|
|
|
|
|
+
|
|
|
|
|
+ return (float) max(0, min(4, $n));
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ // 与业务侧 0–4 档一致:0 基础 / 1 筑基 / 2 提分 / 3 培优 / 4 竞赛(与 MasteryCalculator 区间命名对齐)
|
|
|
|
|
+ $level = match ($s) {
|
|
|
|
|
+ '0', '零基础', '0基础', '基础', '0级' => 0.0,
|
|
|
|
|
+ '1', '筑基' => 1.0,
|
|
|
|
|
+ '2', '进阶', '中等', '提分' => 2.0,
|
|
|
|
|
+ '3', '培优' => 3.0,
|
|
|
|
|
+ '4', '竞赛' => 4.0,
|
|
|
|
|
+ default => null,
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ return $level;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ public static function normalizeDifficulty(?float $d): ?float
|
|
|
|
|
+ {
|
|
|
|
|
+ if ($d === null) {
|
|
|
|
|
+ return null;
|
|
|
|
|
+ }
|
|
|
|
|
+ $f = (float) $d;
|
|
|
|
|
+
|
|
|
|
|
+ return $f > 1.0 ? $f / 5.0 : $f;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * @param list<float> $x
|
|
|
|
|
+ * @param list<float> $y
|
|
|
|
|
+ */
|
|
|
|
|
+ private function pearsonCorrelation(array $x, array $y): ?float
|
|
|
|
|
+ {
|
|
|
|
|
+ $n = count($x);
|
|
|
|
|
+ if ($n < 3 || count($y) !== $n) {
|
|
|
|
|
+ return null;
|
|
|
|
|
+ }
|
|
|
|
|
+ $mx = array_sum($x) / $n;
|
|
|
|
|
+ $my = array_sum($y) / $n;
|
|
|
|
|
+ $num = 0.0;
|
|
|
|
|
+ $dx = 0.0;
|
|
|
|
|
+ $dy = 0.0;
|
|
|
|
|
+ for ($i = 0; $i < $n; $i++) {
|
|
|
|
|
+ $vx = $x[$i] - $mx;
|
|
|
|
|
+ $vy = $y[$i] - $my;
|
|
|
|
|
+ $num += $vx * $vy;
|
|
|
|
|
+ $dx += $vx * $vx;
|
|
|
|
|
+ $dy += $vy * $vy;
|
|
|
|
|
+ }
|
|
|
|
|
+ $den = sqrt($dx * $dy);
|
|
|
|
|
+
|
|
|
|
|
+ return $den > 1e-12 ? round($num / $den, 4) : null;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ /**
|
|
|
|
|
+ * 在四条硬约束下给出每题的动态难度建议。
|
|
|
|
|
+ *
|
|
|
|
|
+ * 约束:
|
|
|
|
|
+ * 1) 分层:先按 papers.difficulty_category 切分;
|
|
|
|
|
+ * 2) 样本门槛:有效样本不足则不动;
|
|
|
|
|
+ * 3) 平滑 + 限幅:delta = clip(alpha * gap, -maxStep, maxStep);
|
|
|
|
|
+ * 4) 时间衰减:分层样本按最近作答时间加权(半衰期 halfLifeDays)。
|
|
|
|
|
+ *
|
|
|
|
|
+ * @param list<array<string, mixed>> $strata
|
|
|
|
|
+ * @return array{
|
|
|
|
|
+ * weighted_error_rate:?float,
|
|
|
|
|
+ * effective_attempts:float,
|
|
|
|
|
+ * recommendation:array{
|
|
|
|
|
+ * action:string,
|
|
|
|
|
+ * reason:string,
|
|
|
|
|
+ * gap:?float,
|
|
|
|
|
+ * delta:?float,
|
|
|
|
|
+ * suggested_difficulty:?float
|
|
|
|
|
+ * }
|
|
|
|
|
+ * }
|
|
|
|
|
+ */
|
|
|
|
|
+ private function buildCalibrationRecommendation(
|
|
|
|
|
+ ?float $bankDifficultyNormalized,
|
|
|
|
|
+ array $strata,
|
|
|
|
|
+ int $minAttempts,
|
|
|
|
|
+ float $alpha,
|
|
|
|
|
+ float $maxStep,
|
|
|
|
|
+ int $halfLifeDays
|
|
|
|
|
+ ): array {
|
|
|
|
|
+ if ($bankDifficultyNormalized === null) {
|
|
|
|
|
+ return [
|
|
|
|
|
+ 'weighted_error_rate' => null,
|
|
|
|
|
+ 'effective_attempts' => 0.0,
|
|
|
|
|
+ 'recommendation' => [
|
|
|
|
|
+ 'action' => 'hold',
|
|
|
|
|
+ 'reason' => '题库难度为空,无法计算建议。',
|
|
|
|
|
+ 'gap' => null,
|
|
|
|
|
+ 'delta' => null,
|
|
|
|
|
+ 'suggested_difficulty' => null,
|
|
|
|
|
+ ],
|
|
|
|
|
+ ];
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ $now = Carbon::now();
|
|
|
|
|
+ $weightedAttempts = 0.0;
|
|
|
|
|
+ $weightedWrong = 0.0;
|
|
|
|
|
+
|
|
|
|
|
+ foreach ($strata as $s) {
|
|
|
|
|
+ $attempts = (int) ($s['attempts'] ?? 0);
|
|
|
|
|
+ $wrong = (int) ($s['wrong_count'] ?? 0);
|
|
|
|
|
+ if ($attempts <= 0) {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+ $lastAtRaw = $s['last_answered_at'] ?? null;
|
|
|
|
|
+ $days = 0.0;
|
|
|
|
|
+ if ($lastAtRaw) {
|
|
|
|
|
+ try {
|
|
|
|
|
+ $lastAt = Carbon::parse((string) $lastAtRaw);
|
|
|
|
|
+ $days = max(0.0, (float) $lastAt->diffInDays($now));
|
|
|
|
|
+ } catch (\Throwable) {
|
|
|
|
|
+ $days = 0.0;
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ $w = pow(0.5, $days / $halfLifeDays);
|
|
|
|
|
+ $weightedAttempts += $attempts * $w;
|
|
|
|
|
+ $weightedWrong += $wrong * $w;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if ($weightedAttempts <= 0.0) {
|
|
|
|
|
+ return [
|
|
|
|
|
+ 'weighted_error_rate' => null,
|
|
|
|
|
+ 'effective_attempts' => 0.0,
|
|
|
|
|
+ 'recommendation' => [
|
|
|
|
|
+ 'action' => 'hold',
|
|
|
|
|
+ 'reason' => '无有效样本,保持不变。',
|
|
|
|
|
+ 'gap' => null,
|
|
|
|
|
+ 'delta' => null,
|
|
|
|
|
+ 'suggested_difficulty' => round($bankDifficultyNormalized, 4),
|
|
|
|
|
+ ],
|
|
|
|
|
+ ];
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ $weightedErrorRate = $weightedWrong / $weightedAttempts;
|
|
|
|
|
+ $gap = $weightedErrorRate - $bankDifficultyNormalized;
|
|
|
|
|
+
|
|
|
|
|
+ if ($weightedAttempts < $minAttempts) {
|
|
|
|
|
+ return [
|
|
|
|
|
+ 'weighted_error_rate' => round($weightedErrorRate, 4),
|
|
|
|
|
+ 'effective_attempts' => round($weightedAttempts, 2),
|
|
|
|
|
+ 'recommendation' => [
|
|
|
|
|
+ 'action' => 'hold',
|
|
|
|
|
+ 'reason' => '有效样本不足门槛 '.$minAttempts.',仅观测不调整。',
|
|
|
|
|
+ 'gap' => round($gap, 4),
|
|
|
|
|
+ 'delta' => 0.0,
|
|
|
|
|
+ 'suggested_difficulty' => round($bankDifficultyNormalized, 4),
|
|
|
|
|
+ ],
|
|
|
|
|
+ ];
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ $delta = max(-$maxStep, min($maxStep, $alpha * $gap));
|
|
|
|
|
+ $suggested = max(0.0, min(1.0, $bankDifficultyNormalized + $delta));
|
|
|
|
|
+ $eps = 1e-6;
|
|
|
|
|
+ $action = $delta > $eps ? 'increase' : ($delta < -$eps ? 'decrease' : 'hold');
|
|
|
|
|
+ $reason = match ($action) {
|
|
|
|
|
+ 'increase' => '实测(分层+时衰)错误率高于标定,建议小步上调。',
|
|
|
|
|
+ 'decrease' => '实测(分层+时衰)错误率低于标定,建议小步下调。',
|
|
|
|
|
+ default => 'gap 接近 0,建议保持不变。',
|
|
|
|
|
+ };
|
|
|
|
|
+
|
|
|
|
|
+ return [
|
|
|
|
|
+ 'weighted_error_rate' => round($weightedErrorRate, 4),
|
|
|
|
|
+ 'effective_attempts' => round($weightedAttempts, 2),
|
|
|
|
|
+ 'recommendation' => [
|
|
|
|
|
+ 'action' => $action,
|
|
|
|
|
+ 'reason' => $reason,
|
|
|
|
|
+ 'gap' => round($gap, 4),
|
|
|
|
|
+ 'delta' => round($delta, 4),
|
|
|
|
|
+ 'suggested_difficulty' => round($suggested, 4),
|
|
|
|
|
+ ],
|
|
|
|
|
+ ];
|
|
|
|
|
+ }
|
|
|
|
|
+}
|