| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096 |
- <?php
- namespace App\Services\Analytics;
- use Illuminate\Support\Carbon;
- use Illuminate\Support\Facades\Cache;
- use Illuminate\Support\Facades\DB;
- use Illuminate\Support\Facades\Log;
- use Illuminate\Support\Facades\Schema;
- /**
- * 题目动态难度校准服务(分层基线残差 + 贝叶斯收缩 + 时间衰减)
- *
- * 目标:
- * 1) 判卷后实时吸收对错结果;
- * 2) 产出可直接用于组卷的校准难度(0~1);
- * 3) 通过先验约束与限幅,避免短期噪声导致难度抖动。
- */
- class QuestionDifficultyCalibrationService
- {
- private const TABLE = 'question_difficulty_calibrations';
- private const ALGO = 'stratified_residual_eb_v2';
- private const HALF_LIFE_DAYS = 45;
- private const BETA_PRIOR_A = 2.0;
- private const BETA_PRIOR_B = 2.0;
- private const SHRINKAGE_M0_MIN = 8.0;
- private const SHRINKAGE_M0_MAX = 24.0;
- private const RESIDUAL_GAIN_MIN = 1.0;
- private const RESIDUAL_GAIN_MAX = 2.2;
- private const RESIDUAL_SCALE_DENOM_MIN = 0.08;
- private const RESIDUAL_SCALE_DENOM_MAX = 0.20;
- private const RECENT_EVENTS_LIMIT = 30;
- private const MIN_DIFF = 0.01;
- private const MAX_DIFF = 0.99;
- private ?bool $tableReady = null;
- /** @var array<string, array{by_cat:array<string,float>,fallback:float,all_by_cat:array<string,float},> */
- private array $baselineCache = [];
- /**
- * 按一张试卷内已判分题目触发重估。
- *
- * @return int 参与更新的题目数
- */
- public function recalibrateByPaperId(string $paperId): int
- {
- $paperId = trim($paperId);
- if ($paperId === '' || ! $this->isReady()) {
- return 0;
- }
- $questionIds = DB::table('paper_questions')
- ->where('paper_id', $paperId)
- ->whereNotNull('question_bank_id')
- ->whereNotNull('is_correct')
- ->pluck('question_bank_id')
- ->map(fn ($id) => (int) $id)
- ->filter(fn ($id) => $id > 0)
- ->unique()
- ->values()
- ->all();
- return $this->recalibrateQuestionIds($questionIds);
- }
- /**
- * 在线逐题更新(无批量重算):仅更新本次判卷触达的题目。
- *
- * @param list<array<string,mixed>> $questions
- */
- public function updateOnlineFromPaper(string $paperId, array $questions): int
- {
- $paperId = trim($paperId);
- if ($paperId === '' || $questions === [] || ! $this->isReady()) {
- return 0;
- }
- $paper = DB::table('papers')->where('paper_id', $paperId)->first(['difficulty_category']);
- $paperDifficultyCategory = (string) ($paper->difficulty_category ?? 'unknown');
- $qidToOutcome = [];
- foreach ($questions as $question) {
- $qid = (int) ($question['question_id'] ?? $question['question_bank_id'] ?? 0);
- if ($qid <= 0) {
- continue;
- }
- $isCorrectArray = $question['is_correct'] ?? [];
- if (! is_array($isCorrectArray)) {
- $isCorrectArray = [$isCorrectArray ? 1 : 0];
- }
- $totalSteps = count($isCorrectArray);
- if ($totalSteps <= 0) {
- continue;
- }
- $correctSteps = array_sum(array_map(fn ($v) => (int) $v === 1 ? 1 : 0, $isCorrectArray));
- $correctRatio = $correctSteps / max(1, $totalSteps);
- $outcomeError = 1.0 - $correctRatio;
- $qidToOutcome[$qid] = [
- 'outcome_error' => $this->clamp((float) $outcomeError, 0.0, 1.0),
- 'is_fully_correct' => $correctSteps === $totalSteps ? 1 : 0,
- ];
- }
- if ($qidToOutcome === []) {
- return 0;
- }
- $questionIds = array_keys($qidToOutcome);
- $questionTypeRows = DB::table('paper_questions')
- ->where('paper_id', $paperId)
- ->where(function ($q) use ($questionIds) {
- $q->whereIn('question_bank_id', $questionIds)
- ->orWhereIn('question_id', $questionIds);
- })
- ->select(['question_bank_id', 'question_id', 'question_type'])
- ->get();
- $questionTypeByQid = [];
- $canonicalQidByInput = [];
- foreach ($questionTypeRows as $row) {
- $qt = trim((string) ($row->question_type ?? '')) !== '' ? (string) $row->question_type : 'unknown';
- $bankId = (int) ($row->question_bank_id ?? 0);
- $questionId = (int) ($row->question_id ?? 0);
- if ($bankId > 0 && ! isset($questionTypeByQid[$bankId])) {
- $questionTypeByQid[$bankId] = $qt;
- $canonicalQidByInput[$bankId] = $bankId;
- }
- if ($questionId > 0 && ! isset($questionTypeByQid[$questionId])) {
- $questionTypeByQid[$questionId] = $qt;
- }
- if ($questionId > 0 && $bankId > 0) {
- $canonicalQidByInput[$questionId] = $bankId;
- }
- }
- $baseDifficultyByQid = DB::table('questions')
- ->whereIn('id', $questionIds)
- ->pluck('difficulty', 'id')
- ->all();
- $existingLookupIds = array_values(array_unique(array_merge(
- $questionIds,
- array_values($canonicalQidByInput)
- )));
- $existingByQid = DB::table(self::TABLE)
- ->whereIn('question_bank_id', $existingLookupIds)
- ->get()
- ->keyBy('question_bank_id');
- $types = array_values(array_unique(array_values($questionTypeByQid)));
- $baselines = $this->buildGlobalBaselines($types);
- $healthScaleByType = [];
- $now = now();
- $upserts = [];
- foreach ($qidToOutcome as $qid => $outcome) {
- $outcomeError = (float) ($outcome['outcome_error'] ?? 1.0);
- $isFullyCorrect = (int) ($outcome['is_fully_correct'] ?? 0) === 1 ? 1 : 0;
- $canonicalQid = (int) ($canonicalQidByInput[$qid] ?? $qid);
- $existing = $existingByQid->get((string) $canonicalQid);
- if ($existing === null) {
- $existing = $existingByQid->get($canonicalQid);
- }
- $originalDifficulty = $existing !== null
- ? (float) ($existing->original_difficulty ?? 0.5)
- : ($this->normalizeDifficultyValue($baseDifficultyByQid[$qid] ?? null) ?? 0.5);
- $originalDifficulty = $this->clamp($originalDifficulty, self::MIN_DIFF, self::MAX_DIFF);
- $prevDifficulty = $existing !== null
- ? (float) ($existing->calibrated_difficulty ?? $originalDifficulty)
- : $originalDifficulty;
- $prevDifficulty = $this->clamp($prevDifficulty, self::MIN_DIFF, self::MAX_DIFF);
- $prevWeightedAttempts = $existing !== null ? (float) ($existing->weighted_attempts ?? 0.0) : 0.0;
- $prevWeightedWrong = $existing !== null ? (float) ($existing->weighted_wrong ?? 0.0) : 0.0;
- $lastAtRaw = $existing !== null ? ($existing->last_graded_at ?? null) : null;
- $existingMeta = [];
- if ($existing !== null && ! empty($existing->algorithm_meta)) {
- $existingMeta = json_decode((string) $existing->algorithm_meta, true) ?: [];
- }
- $questionType = $questionTypeByQid[$canonicalQid] ?? ($questionTypeByQid[$qid] ?? 'unknown');
- $baselineErr = $this->resolveBaselineErrorRate($questionType, $paperDifficultyCategory, $baselines);
- if (! isset($healthScaleByType[$questionType])) {
- $healthScaleByType[$questionType] = $this->getHealthScaleForType($questionType);
- }
- $healthScale = (float) $healthScaleByType[$questionType];
- $estimate = $this->estimateOnlineBySingleOutcome(
- $originalDifficulty,
- $prevDifficulty,
- $prevWeightedAttempts,
- $prevWeightedWrong,
- $outcomeError,
- $baselineErr,
- $lastAtRaw,
- $healthScale
- );
- $event = $this->buildUpdateEvent(
- $outcomeError,
- $prevDifficulty,
- (float) $estimate['calibrated_difficulty'],
- (float) ($estimate['meta']['expected_error_rate'] ?? $baselineErr),
- (float) ($estimate['meta']['observed_error_rate'] ?? ($estimate['weighted_error_rate'] ?? 0.5)),
- (float) ($estimate['meta']['residual'] ?? 0.0),
- $now
- );
- $meta = array_merge($existingMeta, $estimate['meta'], [
- 'mode' => 'online_single_outcome',
- 'paper_id' => $paperId,
- 'paper_difficulty_category' => $paperDifficultyCategory,
- 'question_type' => $questionType,
- 'baseline_error_rate' => round($baselineErr, 4),
- 'health_scale' => round($healthScale, 4),
- ]);
- $meta = $this->appendRecentEvent($meta, $event);
- $prevAttempts = $existing !== null ? (int) ($existing->attempts ?? 0) : 0;
- $prevCorrectCount = $existing !== null ? (int) ($existing->correct_count ?? 0) : 0;
- $prevWrongCount = $existing !== null ? (int) ($existing->wrong_count ?? 0) : 0;
- $attempts = $prevAttempts + 1;
- $correctCount = $prevCorrectCount + ($isFullyCorrect === 1 ? 1 : 0);
- // wrong_count 与历史 is_correct 口径对齐:仅“全错”计入 wrong_count。
- $wrongCount = $prevWrongCount + ($outcomeError >= 0.9999 ? 1 : 0);
- $upserts[] = [
- 'question_bank_id' => $canonicalQid,
- 'original_difficulty' => round($originalDifficulty, 4),
- 'calibrated_difficulty' => round($estimate['calibrated_difficulty'], 4),
- 'difficulty_delta' => round($estimate['calibrated_difficulty'] - $originalDifficulty, 4),
- 'attempts' => $attempts,
- 'correct_count' => $correctCount,
- 'wrong_count' => $wrongCount,
- 'weighted_attempts' => round($estimate['weighted_attempts'], 4),
- 'weighted_wrong' => round($estimate['weighted_wrong'], 4),
- 'weighted_error_rate' => round($estimate['weighted_error_rate'], 4),
- 'last_graded_at' => $now->toDateTimeString(),
- 'algorithm' => self::ALGO.'_online',
- 'algorithm_meta' => json_encode($meta, JSON_UNESCAPED_UNICODE),
- 'updated_at' => $now,
- 'created_at' => $now,
- ];
- }
- if ($upserts === []) {
- return 0;
- }
- DB::table(self::TABLE)->upsert(
- $upserts,
- ['question_bank_id'],
- [
- 'original_difficulty',
- 'calibrated_difficulty',
- 'difficulty_delta',
- 'attempts',
- 'correct_count',
- 'wrong_count',
- 'weighted_attempts',
- 'weighted_wrong',
- 'weighted_error_rate',
- 'last_graded_at',
- 'algorithm',
- 'algorithm_meta',
- 'updated_at',
- ]
- );
- Log::info('QuestionDifficultyCalibrationService: 在线逐题更新完成', [
- 'paper_id' => $paperId,
- 'updated_question_count' => count($upserts),
- ]);
- return count($upserts);
- }
- /**
- * @param array<int, int|string> $questionIds
- * @return int 参与更新的题目数
- */
- public function recalibrateQuestionIds(array $questionIds): int
- {
- if (! $this->isReady()) {
- return 0;
- }
- $questionIds = collect($questionIds)
- ->map(fn ($id) => (int) $id)
- ->filter(fn ($id) => $id > 0)
- ->unique()
- ->values()
- ->all();
- if ($questionIds === []) {
- return 0;
- }
- $baseDifficultyById = DB::table('questions')
- ->whereIn('id', $questionIds)
- ->pluck('difficulty', 'id')
- ->all();
- $rows = DB::table('paper_questions as pq')
- ->join('papers as p', 'p.paper_id', '=', 'pq.paper_id')
- ->whereIn('pq.question_bank_id', $questionIds)
- ->whereNotNull('pq.is_correct')
- ->select([
- 'pq.question_bank_id',
- 'pq.question_type',
- 'pq.is_correct',
- 'pq.graded_at',
- 'pq.updated_at',
- 'pq.created_at',
- 'p.difficulty_category',
- ])
- ->orderBy('pq.question_bank_id')
- ->get();
- $grouped = [];
- foreach ($rows as $row) {
- $qid = (int) ($row->question_bank_id ?? 0);
- if ($qid <= 0) {
- continue;
- }
- $grouped[$qid] ??= [];
- $grouped[$qid][] = [
- 'question_type' => (string) ($row->question_type ?? ''),
- 'is_correct' => (int) ($row->is_correct ?? 0) === 1 ? 1 : 0,
- 'difficulty_category' => $row->difficulty_category ?? null,
- 'graded_at' => $row->graded_at ?? null,
- 'updated_at' => $row->updated_at ?? null,
- 'created_at' => $row->created_at ?? null,
- ];
- }
- $questionTypeById = [];
- foreach ($grouped as $qid => $attempts) {
- $questionTypeById[$qid] = $this->resolveQuestionType($attempts);
- }
- $baselines = $this->buildGlobalBaselines(array_values($questionTypeById));
- $upserts = [];
- $now = now();
- foreach ($questionIds as $qid) {
- $attempts = $grouped[$qid] ?? [];
- if ($attempts === []) {
- continue;
- }
- $originalDifficulty = $this->normalizeDifficultyValue($baseDifficultyById[$qid] ?? null) ?? 0.5;
- $questionType = $questionTypeById[$qid] ?? 'unknown';
- $estimate = $this->estimateByStratifiedResidual(
- $attempts,
- $originalDifficulty,
- $questionType,
- $baselines
- );
- $upserts[] = [
- 'question_bank_id' => $qid,
- 'original_difficulty' => round($originalDifficulty, 4),
- 'calibrated_difficulty' => round($estimate['calibrated_difficulty'], 4),
- 'difficulty_delta' => round($estimate['calibrated_difficulty'] - $originalDifficulty, 4),
- 'attempts' => $estimate['attempts'],
- 'correct_count' => $estimate['correct_count'],
- 'wrong_count' => $estimate['wrong_count'],
- 'weighted_attempts' => round($estimate['weighted_attempts'], 4),
- 'weighted_wrong' => round($estimate['weighted_wrong'], 4),
- 'weighted_error_rate' => $estimate['weighted_error_rate'] === null
- ? null
- : round($estimate['weighted_error_rate'], 4),
- 'last_graded_at' => $estimate['last_graded_at'],
- 'algorithm' => self::ALGO,
- 'algorithm_meta' => json_encode($estimate['meta'], JSON_UNESCAPED_UNICODE),
- 'updated_at' => $now,
- 'created_at' => $now,
- ];
- }
- if ($upserts === []) {
- return 0;
- }
- DB::table(self::TABLE)->upsert(
- $upserts,
- ['question_bank_id'],
- [
- 'original_difficulty',
- 'calibrated_difficulty',
- 'difficulty_delta',
- 'attempts',
- 'correct_count',
- 'wrong_count',
- 'weighted_attempts',
- 'weighted_wrong',
- 'weighted_error_rate',
- 'last_graded_at',
- 'algorithm',
- 'algorithm_meta',
- 'updated_at',
- ]
- );
- Log::info('QuestionDifficultyCalibrationService: 题目难度已重估入库', [
- 'question_count' => count($upserts),
- 'algorithm' => self::ALGO,
- ]);
- return count($upserts);
- }
- private function resolveQuestionType(array $attempts): string
- {
- foreach ($attempts as $attempt) {
- $type = trim((string) ($attempt['question_type'] ?? ''));
- if ($type !== '') {
- return $type;
- }
- }
- return 'unknown';
- }
- /**
- * @param array<int, string> $questionTypes
- * @return array<string, mixed>
- */
- private function buildGlobalBaselines(array $questionTypes): array
- {
- $cacheKey = implode('|', $questionTypes);
- if (isset($this->baselineCache[$cacheKey])) {
- return $this->baselineCache[$cacheKey];
- }
- $questionTypes = array_values(array_unique(array_filter(array_map(
- fn ($t) => trim((string) $t),
- $questionTypes
- ))));
- sort($questionTypes);
- $cacheKeyPersistent = 'difficulty_baselines_v1:'.md5(implode('|', $questionTypes));
- $result = Cache::remember($cacheKeyPersistent, now()->addMinutes(10), function () use ($questionTypes) {
- $baseQuery = DB::table('paper_questions as pq')
- ->join('papers as p', 'p.paper_id', '=', 'pq.paper_id')
- ->whereNotNull('pq.is_correct');
- $rows = (clone $baseQuery)
- ->when($questionTypes !== [], function ($q) use ($questionTypes) {
- $q->whereIn('pq.question_type', $questionTypes);
- })
- ->selectRaw('
- COALESCE(NULLIF(pq.question_type, ""), "unknown") as question_type,
- COALESCE(NULLIF(CAST(p.difficulty_category as char), ""), "unknown") as difficulty_category,
- COUNT(*) as n,
- SUM(CASE WHEN pq.is_correct = 0 THEN 1 ELSE 0 END) as wrong
- ')
- ->groupBy(DB::raw('COALESCE(NULLIF(pq.question_type, ""), "unknown")'))
- ->groupBy(DB::raw('COALESCE(NULLIF(CAST(p.difficulty_category as char), ""), "unknown")'))
- ->get();
- $allRows = (clone $baseQuery)
- ->selectRaw('
- COALESCE(NULLIF(CAST(p.difficulty_category as char), ""), "unknown") as difficulty_category,
- COUNT(*) as n,
- SUM(CASE WHEN pq.is_correct = 0 THEN 1 ELSE 0 END) as wrong
- ')
- ->groupBy(DB::raw('COALESCE(NULLIF(CAST(p.difficulty_category as char), ""), "unknown")'))
- ->get();
- $result = [
- 'type' => [],
- 'all' => [
- 'by_cat' => [],
- 'fallback' => 0.5,
- ],
- ];
- foreach ($rows as $row) {
- $type = (string) ($row->question_type ?? 'unknown');
- $cat = (string) ($row->difficulty_category ?? 'unknown');
- $n = (int) ($row->n ?? 0);
- $wrong = (int) ($row->wrong ?? 0);
- $result['type'][$type]['by_cat'][$cat] = $this->smoothedRate($wrong, $n);
- $result['type'][$type]['n_total'] = (int) (($result['type'][$type]['n_total'] ?? 0) + $n);
- $result['type'][$type]['wrong_total'] = (int) (($result['type'][$type]['wrong_total'] ?? 0) + $wrong);
- }
- foreach ($result['type'] as $type => $v) {
- $n = (int) ($v['n_total'] ?? 0);
- $wrong = (int) ($v['wrong_total'] ?? 0);
- $result['type'][$type]['fallback'] = $this->smoothedRate($wrong, $n);
- $result['type'][$type]['by_cat'] = $this->enforceMonotonicCategoryRates(
- $result['type'][$type]['by_cat'] ?? []
- );
- }
- $allN = 0;
- $allWrong = 0;
- foreach ($allRows as $row) {
- $cat = (string) ($row->difficulty_category ?? 'unknown');
- $n = (int) ($row->n ?? 0);
- $wrong = (int) ($row->wrong ?? 0);
- $result['all']['by_cat'][$cat] = $this->smoothedRate($wrong, $n);
- $allN += $n;
- $allWrong += $wrong;
- }
- $result['all']['by_cat'] = $this->enforceMonotonicCategoryRates($result['all']['by_cat']);
- $result['all']['fallback'] = $this->smoothedRate($allWrong, $allN);
- return $result;
- });
- $this->baselineCache[$cacheKey] = $result;
- return $result;
- }
- private function resolveBaselineErrorRate(string $questionType, string $difficultyCategory, array $baselines): float
- {
- $type = trim($questionType) !== '' ? trim($questionType) : 'unknown';
- $cat = trim($difficultyCategory) !== '' ? trim($difficultyCategory) : 'unknown';
- $typeByCat = $baselines['type'][$type]['by_cat'] ?? [];
- if (array_key_exists($cat, $typeByCat)) {
- return (float) $typeByCat[$cat];
- }
- if (isset($baselines['type'][$type]['fallback'])) {
- return (float) $baselines['type'][$type]['fallback'];
- }
- if (isset($baselines['all']['by_cat'][$cat])) {
- return (float) $baselines['all']['by_cat'][$cat];
- }
- return (float) ($baselines['all']['fallback'] ?? 0.5);
- }
- private function smoothedRate(int $wrong, int $n): float
- {
- return ($wrong + self::BETA_PRIOR_A) / max(1e-6, $n + self::BETA_PRIOR_A + self::BETA_PRIOR_B);
- }
- /**
- * 约束 difficulty_category 的基线错误率单调递增(0<=1<=2<=...),
- * 保留 unknown 等非数字类别原值。
- *
- * @param array<string,float> $ratesByCategory
- * @return array<string,float>
- */
- private function enforceMonotonicCategoryRates(array $ratesByCategory): array
- {
- if ($ratesByCategory === []) {
- return $ratesByCategory;
- }
- $numeric = [];
- foreach ($ratesByCategory as $cat => $rate) {
- if (preg_match('/^\\d+$/', (string) $cat) === 1) {
- $numeric[(int) $cat] = (float) $rate;
- }
- }
- if ($numeric === []) {
- return $ratesByCategory;
- }
- ksort($numeric);
- $keys = array_keys($numeric);
- $vals = array_values($numeric);
- $adj = $this->isotonicIncreasing($vals);
- foreach ($keys as $i => $cat) {
- $ratesByCategory[(string) $cat] = $adj[$i];
- }
- return $ratesByCategory;
- }
- /**
- * @param array<int,float> $values
- * @return array<int,float>
- */
- private function isotonicIncreasing(array $values): array
- {
- $blocks = [];
- foreach ($values as $v) {
- $blocks[] = ['sum' => (float) $v, 'weight' => 1.0, 'count' => 1];
- while (count($blocks) >= 2) {
- $k = count($blocks);
- $a = $blocks[$k - 2];
- $b = $blocks[$k - 1];
- $avgA = $a['sum'] / $a['weight'];
- $avgB = $b['sum'] / $b['weight'];
- if ($avgA <= $avgB) {
- break;
- }
- $blocks[$k - 2] = [
- 'sum' => $a['sum'] + $b['sum'],
- 'weight' => $a['weight'] + $b['weight'],
- 'count' => $a['count'] + $b['count'],
- ];
- array_pop($blocks);
- }
- }
- $out = [];
- foreach ($blocks as $b) {
- $avg = (float) ($b['sum'] / max(1e-6, $b['weight']));
- for ($i = 0; $i < (int) $b['count']; $i++) {
- $out[] = $this->clamp($avg, self::MIN_DIFF, self::MAX_DIFF);
- }
- }
- return $out;
- }
- /**
- * 单次判卷结果的在线更新。
- *
- * @return array{weighted_attempts:float,weighted_wrong:float,weighted_error_rate:float,calibrated_difficulty:float,meta:array<string,mixed>}
- */
- private function estimateOnlineBySingleOutcome(
- float $originalDifficulty,
- float $prevDifficulty,
- float $prevWeightedAttempts,
- float $prevWeightedWrong,
- float $outcomeError,
- float $baselineErr,
- mixed $lastGradedAtRaw,
- float $healthScale
- ): array {
- $now = Carbon::now();
- $days = 0.0;
- if ($lastGradedAtRaw !== null && (string) $lastGradedAtRaw !== '') {
- try {
- $lastAt = Carbon::parse((string) $lastGradedAtRaw);
- $days = max(0.0, (float) $lastAt->diffInDays($now));
- } catch (\Throwable) {
- $days = 0.0;
- }
- }
- $decay = pow(0.5, $days / self::HALF_LIFE_DAYS);
- $outcomeError = $this->clamp($outcomeError, 0.0, 1.0);
- $wN = max(0.0, $prevWeightedAttempts) * $decay + 1.0;
- $wWrong = max(0.0, $prevWeightedWrong) * $decay + $outcomeError;
- $obsErr = $wN > 0.0 ? ($wWrong / $wN) : 0.5;
- $priorConfidence = min(1.0, max(0.0, $prevWeightedAttempts / 25.0));
- $expectedErr = (1.0 - $priorConfidence) * $baselineErr + $priorConfidence * $prevDifficulty;
- $residual = $this->clamp($obsErr - $expectedErr, -0.45, 0.45);
- $adaptive = $this->buildAdaptivePolicy($wN, $obsErr, $expectedErr, $residual);
- $residualGain = (float) $adaptive['residual_gain'] * $healthScale;
- $residualScaleDenom = (float) $adaptive['residual_scale_denom'];
- $shrinkageM0 = (float) $adaptive['shrinkage_m0'];
- $confidence = (float) ($adaptive['confidence'] ?? 0.0);
- // 在线模式下不做分段门控,始终可更新,但样本少时步长自动更小。
- $maxStep = 0.30 * (0.35 + 0.65 * $confidence) * $healthScale;
- $residualScale = min(1.0, abs($residual) / max(1e-6, $residualScaleDenom));
- $effectiveStep = $maxStep * $residualScale;
- $targetDifficulty = $this->clamp(
- $prevDifficulty + $residualGain * $residual,
- self::MIN_DIFF,
- self::MAX_DIFF
- );
- $candidateDifficulty = $prevDifficulty + $this->clamp(
- $targetDifficulty - $prevDifficulty,
- -$effectiveStep,
- $effectiveStep
- );
- $candidateDifficulty = $this->clamp($candidateDifficulty, self::MIN_DIFF, self::MAX_DIFF);
- $calibratedDifficulty = ($shrinkageM0 * $prevDifficulty + $wN * $candidateDifficulty) / ($shrinkageM0 + $wN);
- $calibratedDifficulty = $this->clamp($calibratedDifficulty, self::MIN_DIFF, self::MAX_DIFF);
- return [
- 'weighted_attempts' => $wN,
- 'weighted_wrong' => $wWrong,
- 'weighted_error_rate' => $obsErr,
- 'calibrated_difficulty' => $calibratedDifficulty,
- 'meta' => [
- 'decay_days' => round($days, 4),
- 'decay_factor' => round($decay, 6),
- 'prev_difficulty' => round($prevDifficulty, 4),
- 'original_difficulty' => round($originalDifficulty, 4),
- 'observed_error_rate' => round($obsErr, 4),
- 'expected_error_rate' => round($expectedErr, 4),
- 'residual' => round($residual, 4),
- 'health_scale_applied' => round($healthScale, 4),
- 'max_step' => round($maxStep, 4),
- 'effective_step' => round($effectiveStep, 4),
- 'target_difficulty' => round($targetDifficulty, 4),
- 'candidate_difficulty' => round($candidateDifficulty, 4),
- 'adaptive' => $adaptive,
- ],
- ];
- }
- /**
- * @param array<int, array<string, mixed>> $attempts
- * @param array<string, mixed> $baselines
- * @return array<string, mixed>
- */
- private function estimateByStratifiedResidual(
- array $attempts,
- float $originalDifficulty,
- string $questionType,
- array $baselines
- ): array {
- $now = Carbon::now();
- $originalDifficulty = $this->clamp($originalDifficulty, self::MIN_DIFF, self::MAX_DIFF);
- $weightedAttempts = 0.0;
- $weightedWrong = 0.0;
- $weightedExpectedWrong = 0.0;
- $correctCount = 0;
- $wrongCount = 0;
- $lastAt = null;
- $byCategory = [];
- foreach ($attempts as $attempt) {
- $isCorrect = (int) ($attempt['is_correct'] ?? 0) === 1 ? 1 : 0;
- $incorrect = 1 - $isCorrect;
- if ($isCorrect === 1) {
- $correctCount++;
- } else {
- $wrongCount++;
- }
- $difficultyCategory = (string) ($attempt['difficulty_category'] ?? 'unknown');
- $baselineErr = $this->resolveBaselineErrorRate($questionType, $difficultyCategory, $baselines);
- $answeredAt = $attempt['graded_at'] ?? $attempt['updated_at'] ?? $attempt['created_at'] ?? null;
- $days = 0.0;
- if ($answeredAt !== null && $answeredAt !== '') {
- try {
- $at = Carbon::parse((string) $answeredAt);
- $days = max(0.0, (float) $at->diffInDays($now));
- if ($lastAt === null || $at->gt($lastAt)) {
- $lastAt = $at;
- }
- } catch (\Throwable) {
- $days = 0.0;
- }
- }
- $w = pow(0.5, $days / self::HALF_LIFE_DAYS);
- $weightedAttempts += $w;
- $weightedWrong += $w * $incorrect;
- $weightedExpectedWrong += $w * $baselineErr;
- $key = trim($difficultyCategory) !== '' ? trim($difficultyCategory) : 'unknown';
- $byCategory[$key] ??= [
- 'attempts' => 0,
- 'wrong' => 0,
- 'weighted_attempts' => 0.0,
- 'weighted_wrong' => 0.0,
- 'baseline_error_rate' => $baselineErr,
- ];
- $byCategory[$key]['attempts']++;
- $byCategory[$key]['wrong'] += $incorrect;
- $byCategory[$key]['weighted_attempts'] += $w;
- $byCategory[$key]['weighted_wrong'] += $w * $incorrect;
- }
- $weightedErrorRate = $weightedAttempts > 0 ? ($weightedWrong / $weightedAttempts) : null;
- $weightedExpectedErrorRate = $weightedAttempts > 0 ? ($weightedExpectedWrong / $weightedAttempts) : null;
- $residual = ($weightedErrorRate !== null && $weightedExpectedErrorRate !== null)
- ? ($weightedErrorRate - $weightedExpectedErrorRate)
- : 0.0;
- $adaptive = $this->buildAdaptivePolicy(
- $weightedAttempts,
- $weightedErrorRate,
- $weightedExpectedErrorRate,
- $residual
- );
- $residualGain = (float) $adaptive['residual_gain'];
- $residualScaleDenom = (float) $adaptive['residual_scale_denom'];
- $shrinkageM0 = (float) $adaptive['shrinkage_m0'];
- if ($weightedAttempts < 8) {
- $stepLimit = 0.0;
- } elseif ($weightedAttempts < 20) {
- $stepLimit = 0.08;
- } elseif ($weightedAttempts < 60) {
- $stepLimit = 0.15;
- } else {
- $stepLimit = 0.25;
- }
- $residualScale = min(1.0, abs($residual) / max(1e-6, $residualScaleDenom));
- $effectiveStep = $stepLimit * $residualScale;
- $targetDifficulty = $this->clamp(
- $originalDifficulty + $residualGain * $residual,
- self::MIN_DIFF,
- self::MAX_DIFF
- );
- $candidateDifficulty = $originalDifficulty + $this->clamp(
- $targetDifficulty - $originalDifficulty,
- -$effectiveStep,
- $effectiveStep
- );
- $candidateDifficulty = $this->clamp($candidateDifficulty, self::MIN_DIFF, self::MAX_DIFF);
- $calibratedDifficulty = ($weightedAttempts < 8)
- ? $originalDifficulty
- : (
- ($shrinkageM0 * $originalDifficulty + $weightedAttempts * $candidateDifficulty)
- / ($shrinkageM0 + $weightedAttempts)
- );
- $calibratedDifficulty = $this->clamp($calibratedDifficulty, self::MIN_DIFF, self::MAX_DIFF);
- foreach ($byCategory as $cat => $stats) {
- $wN = (float) ($stats['weighted_attempts'] ?? 0.0);
- $wWrong = (float) ($stats['weighted_wrong'] ?? 0.0);
- $n = (int) ($stats['attempts'] ?? 0);
- $wrong = (int) ($stats['wrong'] ?? 0);
- $byCategory[$cat]['error_rate'] = $n > 0 ? round($wrong / $n, 4) : null;
- $byCategory[$cat]['weighted_error_rate'] = $wN > 0 ? round($wWrong / $wN, 4) : null;
- $byCategory[$cat]['weighted_attempts'] = round($wN, 4);
- $byCategory[$cat]['weighted_wrong'] = round($wWrong, 4);
- $byCategory[$cat]['baseline_error_rate'] = round((float) ($stats['baseline_error_rate'] ?? 0.5), 4);
- }
- return [
- 'attempts' => count($attempts),
- 'correct_count' => $correctCount,
- 'wrong_count' => $wrongCount,
- 'weighted_attempts' => $weightedAttempts,
- 'weighted_wrong' => $weightedWrong,
- 'weighted_error_rate' => $weightedErrorRate,
- 'last_graded_at' => $lastAt?->toDateTimeString(),
- 'calibrated_difficulty' => $calibratedDifficulty,
- 'meta' => [
- 'algorithm' => self::ALGO,
- 'question_type' => $questionType,
- 'original_difficulty' => round($originalDifficulty, 4),
- 'half_life_days' => self::HALF_LIFE_DAYS,
- 'weighted_expected_error_rate' => $weightedExpectedErrorRate !== null
- ? round($weightedExpectedErrorRate, 4)
- : null,
- 'residual' => round($residual, 4),
- 'residual_gain' => round($residualGain, 4),
- 'residual_scale_denom' => round($residualScaleDenom, 4),
- 'step_limit' => round($stepLimit, 4),
- 'residual_scale' => round($residualScale, 4),
- 'effective_step' => round($effectiveStep, 4),
- 'target_difficulty' => round($targetDifficulty, 4),
- 'candidate_difficulty' => round($candidateDifficulty, 4),
- 'shrinkage_m0' => round($shrinkageM0, 4),
- 'adaptive_policy' => $adaptive,
- 'by_difficulty_category' => $byCategory,
- ],
- ];
- }
- /**
- * 基于使用中的样本质量自动调整超参数,无需人工干预。
- *
- * @return array{residual_gain:float,residual_scale_denom:float,shrinkage_m0:float,confidence:float,signal_strength:float}
- */
- private function buildAdaptivePolicy(
- float $weightedAttempts,
- ?float $weightedErrorRate,
- ?float $weightedExpectedErrorRate,
- float $residual
- ): array {
- $confidence = min(1.0, max(0.0, $weightedAttempts / 80.0));
- // 信号强度由残差大小决定,残差越显著,收敛越快。
- $signalStrength = min(1.0, abs($residual) / 0.25);
- // 观测与期望偏差显著且样本充足时,提高 gain。
- $residualGain = self::RESIDUAL_GAIN_MIN
- + (self::RESIDUAL_GAIN_MAX - self::RESIDUAL_GAIN_MIN) * (0.55 * $confidence + 0.45 * $signalStrength);
- // 样本越充足,越敏感;信号越强,越敏感。
- $residualScaleDenom = self::RESIDUAL_SCALE_DENOM_MAX
- - (self::RESIDUAL_SCALE_DENOM_MAX - self::RESIDUAL_SCALE_DENOM_MIN) * (0.6 * $confidence + 0.4 * $signalStrength);
- // 收缩强度随置信度下降:样本少时强收缩,样本多时弱收缩。
- $shrinkageM0 = self::SHRINKAGE_M0_MAX
- - (self::SHRINKAGE_M0_MAX - self::SHRINKAGE_M0_MIN) * $confidence;
- // 若观测与期望非常接近,适度回拉避免无意义振荡。
- if ($weightedErrorRate !== null && $weightedExpectedErrorRate !== null && abs($weightedErrorRate - $weightedExpectedErrorRate) < 0.01) {
- $residualGain = max(self::RESIDUAL_GAIN_MIN, $residualGain * 0.75);
- $residualScaleDenom = min(self::RESIDUAL_SCALE_DENOM_MAX, $residualScaleDenom * 1.15);
- $shrinkageM0 = min(self::SHRINKAGE_M0_MAX, $shrinkageM0 * 1.10);
- }
- return [
- 'residual_gain' => $this->clamp($residualGain, self::RESIDUAL_GAIN_MIN, self::RESIDUAL_GAIN_MAX),
- 'residual_scale_denom' => $this->clamp($residualScaleDenom, self::RESIDUAL_SCALE_DENOM_MIN, self::RESIDUAL_SCALE_DENOM_MAX),
- 'shrinkage_m0' => $this->clamp($shrinkageM0, self::SHRINKAGE_M0_MIN, self::SHRINKAGE_M0_MAX),
- 'confidence' => round($confidence, 4),
- 'signal_strength' => round($signalStrength, 4),
- ];
- }
- /**
- * @param array<string,mixed> $meta
- * @param array<string,mixed> $event
- * @return array<string,mixed>
- */
- private function appendRecentEvent(array $meta, array $event): array
- {
- $events = $meta['recent_events'] ?? [];
- if (! is_array($events)) {
- $events = [];
- }
- $events[] = $event;
- if (count($events) > self::RECENT_EVENTS_LIMIT) {
- $events = array_slice($events, -self::RECENT_EVENTS_LIMIT);
- }
- $meta['recent_events'] = $events;
- return $meta;
- }
- /**
- * @return array<string,mixed>
- */
- private function buildUpdateEvent(
- float $outcomeError,
- float $predBefore,
- float $predAfter,
- float $expectedErrorRate,
- float $observedErrorRate,
- float $residual,
- Carbon $now
- ): array {
- $outcomeError = $this->clamp($outcomeError, 0.0, 1.0);
- $p0 = $this->clamp($predBefore, 1e-6, 1.0 - 1e-6);
- $p1 = $this->clamp($predAfter, 1e-6, 1.0 - 1e-6);
- $brierBefore = ($p0 - $outcomeError) * ($p0 - $outcomeError);
- $brierAfter = ($p1 - $outcomeError) * ($p1 - $outcomeError);
- $loglossBefore = -($outcomeError * log($p0) + (1.0 - $outcomeError) * log(1.0 - $p0));
- $loglossAfter = -($outcomeError * log($p1) + (1.0 - $outcomeError) * log(1.0 - $p1));
- return [
- 'ts' => $now->toDateTimeString(),
- 'outcome_error' => round($outcomeError, 4),
- 'pred_before' => round($predBefore, 4),
- 'pred_after' => round($predAfter, 4),
- 'expected_error_rate' => round($expectedErrorRate, 4),
- 'observed_error_rate' => round($observedErrorRate, 4),
- 'residual' => round($residual, 4),
- 'abs_residual' => round(abs($residual), 4),
- 'brier_before' => round($brierBefore, 6),
- 'brier_after' => round($brierAfter, 6),
- 'logloss_before' => round($loglossBefore, 6),
- 'logloss_after' => round($loglossAfter, 6),
- ];
- }
- private function getHealthScaleForType(string $questionType): float
- {
- $type = trim($questionType) !== '' ? trim($questionType) : 'unknown';
- $cacheKey = 'difficulty_health_scale:'.$type;
- return Cache::remember($cacheKey, now()->addMinutes(5), function () use ($type) {
- $rows = DB::table(self::TABLE)
- ->where('updated_at', '>=', now()->subDays(14))
- ->select(['algorithm_meta'])
- ->get();
- $currResiduals = [];
- $prevResiduals = [];
- $brierDelta = 0.0;
- $loglossDelta = 0.0;
- $eventCount = 0;
- $nowTs = time();
- $cutTs = $nowTs - 7 * 86400;
- foreach ($rows as $row) {
- $meta = json_decode((string) ($row->algorithm_meta ?? ''), true);
- if (! is_array($meta)) {
- continue;
- }
- if (($meta['question_type'] ?? 'unknown') !== $type) {
- continue;
- }
- $events = $meta['recent_events'] ?? [];
- if (! is_array($events)) {
- continue;
- }
- foreach ($events as $e) {
- if (! is_array($e)) {
- continue;
- }
- $ts = isset($e['ts']) ? strtotime((string) $e['ts']) : false;
- if ($ts === false) {
- continue;
- }
- $absResidual = abs((float) ($e['residual'] ?? 0.0));
- if ($ts >= $cutTs) {
- $currResiduals[] = $absResidual;
- $brierDelta += (float) ($e['brier_after'] ?? 0.0) - (float) ($e['brier_before'] ?? 0.0);
- $loglossDelta += (float) ($e['logloss_after'] ?? 0.0) - (float) ($e['logloss_before'] ?? 0.0);
- $eventCount++;
- } else {
- $prevResiduals[] = $absResidual;
- }
- }
- }
- if ($eventCount < 80) {
- return 1.0;
- }
- $avgBrierDelta = $brierDelta / max(1, $eventCount);
- $avgLoglossDelta = $loglossDelta / max(1, $eventCount);
- $medianCurrent = $this->median($currResiduals);
- $medianPrev = $this->median($prevResiduals);
- $scale = 1.0;
- if ($avgBrierDelta > 0.0 && $avgLoglossDelta > 0.0) {
- $scale *= 0.78;
- }
- if ($avgBrierDelta > 0.003 || $avgLoglossDelta > 0.01) {
- $scale *= 0.82;
- }
- if ($medianPrev !== null && $medianPrev > 0.0 && $medianCurrent !== null && $medianCurrent > $medianPrev * 1.05) {
- $scale *= 0.82;
- }
- $scale = $this->clamp($scale, 0.45, 1.0);
- Log::info('QuestionDifficultyCalibrationService: 在线健康监控快照', [
- 'question_type' => $type,
- 'events_7d' => $eventCount,
- 'avg_brier_delta' => round($avgBrierDelta, 6),
- 'avg_logloss_delta' => round($avgLoglossDelta, 6),
- 'median_abs_residual_7d' => $medianCurrent !== null ? round($medianCurrent, 6) : null,
- 'median_abs_residual_prev_7d' => $medianPrev !== null ? round($medianPrev, 6) : null,
- 'health_scale' => round($scale, 3),
- ]);
- return $scale;
- });
- }
- /**
- * @param array<int,float> $values
- */
- private function median(array $values): ?float
- {
- if ($values === []) {
- return null;
- }
- sort($values);
- $n = count($values);
- $m = intdiv($n, 2);
- if ($n % 2 === 1) {
- return (float) $values[$m];
- }
- return ((float) $values[$m - 1] + (float) $values[$m]) / 2.0;
- }
- private function normalizeDifficultyValue(mixed $value): ?float
- {
- if ($value === null || $value === '') {
- return null;
- }
- $raw = (float) $value;
- if ($raw > 1.0) {
- $raw = $raw / 5.0;
- }
- return $this->clamp($raw, self::MIN_DIFF, self::MAX_DIFF);
- }
- private function clamp(float $value, float $min, float $max): float
- {
- return max($min, min($max, $value));
- }
- private function isReady(): bool
- {
- if ($this->tableReady !== null) {
- return $this->tableReady;
- }
- $this->tableReady = Schema::hasTable('paper_questions')
- && Schema::hasTable('papers')
- && Schema::hasTable('questions')
- && Schema::hasTable(self::TABLE);
- return $this->tableReady;
- }
- }
|