Parcourir la source

Optimize category 3/4 difficulty matching and harden answer floor

yemeishu il y a 2 jours
Parent
commit
62b1e1f91f

+ 37 - 20
app/Services/DifficultyDistributionService.php

@@ -23,14 +23,14 @@ class DifficultyDistributionService
                 $highPercentage = 25;   // >0.5
                 break;
             case 3:
-                $mediumPercentage = 50; // 0.5-0.75
-                $lowPercentage = 25;    // <0.5
-                $highPercentage = 25;   // >0.75
+                $mediumPercentage = 80; // 0.5-0.75
+                $lowPercentage = 10;    // 0.4-0.5
+                $highPercentage = 10;   // >0.75
                 break;
             case 4:
-                $mediumPercentage = 50; // 0.75-1
-                $lowPercentage = 25;    // <0.75
-                $highPercentage = 25;   // >0.75
+                $mediumPercentage = 50; // 0.75-0.9
+                $lowPercentage = 25;    // 0.5-0.75
+                $highPercentage = 25;   // 0.9-1.0
                 break;
             default:
                 $lowPercentage = 25;
@@ -85,16 +85,19 @@ class DifficultyDistributionService
                 ];
             case 3:
                 return [
-                    'primary' => ['min' => 0.5, 'max' => 0.75, 'percentage' => 50],
-                    'low' => ['min' => 0.0, 'max' => 0.5, 'percentage' => 25],
-                    'high' => ['min' => 0.75, 'max' => 1.0, 'percentage' => 25],
-                    'description' => '中等型:0.5-0.75占比50%,<0.5占比25%,>0.75占比25%'
+                    'primary' => ['min' => 0.5, 'max' => 0.75, 'percentage' => 80],
+                    'low' => ['min' => 0.4, 'max' => 0.5, 'percentage' => 10],
+                    'high' => ['min' => 0.75, 'max' => 1.0, 'percentage' => 10],
+                    'fallback_low' => ['min' => 0.35, 'max' => 0.4, 'percentage' => 0],
+                    'description' => '培优型:0.5-0.75占比80%,0.4-0.5占比10%,>0.75占比10%;0.35-0.4仅作为保题量兜底'
                 ];
             case 4:
                 return [
-                    'primary' => ['min' => 0.75, 'max' => 1.0, 'percentage' => 50],
-                    'secondary' => ['min' => 0.0, 'max' => 0.75, 'percentage' => 50],
-                    'description' => '拔高型:0.75-1占比50%,其他占比50%'
+                    'primary' => ['min' => 0.75, 'max' => 0.9, 'percentage' => 50],
+                    'low' => ['min' => 0.5, 'max' => 0.75, 'percentage' => 25],
+                    'high' => ['min' => 0.9, 'max' => 1.0, 'percentage' => 25],
+                    'secondary' => ['min' => 0.0, 'max' => 0.5, 'percentage' => 0],
+                    'description' => '竞赛型:0.75-0.9占比50%,0.5-0.75占比25%,0.9-1.0占比25%'
                 ];
             default:
                 return [
@@ -151,14 +154,26 @@ class DifficultyDistributionService
                 if ($difficulty >= 0.5 && $difficulty <= 0.75) {
                     return 'primary_medium';
                 }
-                if ($difficulty < 0.5) {
+                if ($difficulty >= 0.4 && $difficulty < 0.5) {
                     return 'primary_low';
                 }
+                if ($difficulty >= 0.35 && $difficulty < 0.4) {
+                    return 'secondary';
+                }
+                if ($difficulty < 0.35) {
+                    return 'other';
+                }
                 return 'primary_high';
             case 4:
-                if ($difficulty >= 0.75 && $difficulty <= 1.0) {
+                if ($difficulty >= 0.75 && $difficulty < 0.9) {
                     return 'primary_medium';
                 }
+                if ($difficulty >= 0.5 && $difficulty < 0.75) {
+                    return 'primary_low';
+                }
+                if ($difficulty >= 0.9 && $difficulty <= 1.0) {
+                    return 'primary_high';
+                }
                 return 'secondary';
             default:
                 return 'other';
@@ -198,10 +213,10 @@ class DifficultyDistributionService
                 };
             case 4:
                 return match($level) {
-                    'low' => 'secondary',
+                    'low' => 'primary_low',
                     'medium' => 'primary_medium',
-                    'high' => 'secondary',
-                    default => 'secondary'
+                    'high' => 'primary_high',
+                    default => 'other'
                 };
             default:
                 return 'other';
@@ -211,8 +226,10 @@ class DifficultyDistributionService
     public function getSupplementOrder(int $category): array
     {
         return match ($category) {
-            0, 1, 4 => ['secondary', 'other'],
-            2, 3 => ['primary_medium', 'primary_low', 'primary_high', 'other'],
+            0, 1 => ['secondary', 'other'],
+            2 => ['primary_medium', 'primary_low', 'primary_high', 'other'],
+            3 => ['primary_medium', 'primary_high', 'primary_low', 'secondary', 'other'],
+            4 => ['primary_high', 'primary_medium', 'primary_low', 'secondary', 'other'],
             default => ['other']
         };
     }

+ 17 - 11
app/Services/ExamPdfExportService.php

@@ -1266,19 +1266,25 @@ class ExamPdfExportService
 
     private function difficultyRangeByLevel(?int $level): ?array
     {
-        $map = [
-            0 => ['min' => 0.00, 'max' => 0.10],
-            1 => ['min' => 0.10, 'max' => 0.25],
-            2 => ['min' => 0.25, 'max' => 0.50],
-            3 => ['min' => 0.50, 'max' => 0.75],
-            4 => ['min' => 0.75, 'max' => 1.00],
-        ];
+        if ($level === null) {
+            return null;
+        }
 
-        if ($level === null || ! isset($map[$level])) {
+        // 与组题策略口径对齐:培优(3)允许 0.4~0.5 的低档目标带。
+        if ($level === 3) {
+            return ['min' => 0.40, 'max' => 0.75];
+        }
+
+        $ranges = app(DifficultyDistributionService::class)->getRanges($level);
+        $primary = $ranges['primary'] ?? null;
+        if (! is_array($primary) || ! isset($primary['min'], $primary['max'])) {
             return null;
         }
 
-        return $map[$level];
+        return [
+            'min' => (float) $primary['min'],
+            'max' => (float) $primary['max'],
+        ];
     }
 
     private function mapDifficultyValueToLevel(float $difficulty): int
@@ -1289,10 +1295,10 @@ class ExamPdfExportService
         if ($difficulty < 0.25) {
             return 1;
         }
-        if ($difficulty < 0.50) {
+        if ($difficulty < 0.40) {
             return 2;
         }
-        if ($difficulty < 0.75) {
+        if ($difficulty <= 0.75) {
             return 3;
         }
 

+ 258 - 46
app/Services/LearningAnalyticsService.php

@@ -1555,6 +1555,13 @@ class LearningAnalyticsService
                 }
             }
 
+            $selectedQuestions = $this->enforceAnswerDifficultyFloor(
+                $allQuestions,
+                $selectedQuestions,
+                (int) $difficultyCategory,
+                (int) $totalQuestions
+            );
+
             $requestedKpSelectionStats = $this->buildRequestedKpSelectionStats(
                 $selectedQuestions,
                 $params['kp_code_list_original'] ?? $params['kp_codes'] ?? []
@@ -1788,12 +1795,30 @@ class LearningAnalyticsService
             // 注意: 难度筛选由 QuestionLocalService 的难度分布系统处理
             // 不在这里进行难度筛选,让 QuestionLocalService 做精确的难度分布
 
-            // 【重要】移除数量限制,获取所有符合条件的题目
-            // 不使用limit()限制查询结果,让后续处理逻辑决定最终数量
-            $query->inRandomOrder();
+            $hasSpecificScope = !empty($kpCodes) || !empty($textbookCatalogNodeIds);
+            $queryPoolLimit = 0;
+
+            if ($hasSpecificScope) {
+                // 有明确知识点/章节范围时,保留随机抽样能力。
+                $query->inRandomOrder();
+            } else {
+                // 新学生常见场景:没有掌握度、没有知识点范围。
+                // 避免全表随机+全量加载导致队列任务超时。
+                $queryPoolLimit = max(500, $totalNeeded * 50);
+                $query->orderByDesc('id')->limit($queryPoolLimit);
+            }
 
             $questions = $query->get();
 
+            Log::info('getQuestionsFromBank: query_pool', [
+                'has_specific_scope' => $hasSpecificScope,
+                'kp_codes_count' => count($kpCodes),
+                'textbook_node_count' => is_array($textbookCatalogNodeIds) ? count($textbookCatalogNodeIds) : 0,
+                'total_needed' => $totalNeeded,
+                'query_pool_limit' => $queryPoolLimit,
+                'result_count' => $questions->count(),
+            ]);
+
             // 转换为标准格式
             $formattedQuestions = $questions->map(function ($q) {
                 return [
@@ -3239,7 +3264,22 @@ class LearningAnalyticsService
 
     private function determineQuestionType(array $q): string
     {
-        // 优先根据题目内容判断(而不是数据库字段)
+        // 优先信任结构化题型字段,避免解答题被题干模式误判。
+        $typeField = $q['question_type'] ?? $q['type'] ?? '';
+        if (is_string($typeField)) {
+            $t = strtolower($typeField);
+            if (in_array($t, ['choice', 'single_choice', 'multiple_choice', '选择题', 'choice', 'single_choice', 'multiple_choice'])) {
+                return 'choice';
+            }
+            if (in_array($t, ['fill', 'blank', 'fill_blank', 'fill_in_the_blank', '填空题'])) {
+                return 'fill';
+            }
+            if (in_array($t, ['answer', 'calculation', 'word_problem', 'proof', '解答题'])) {
+                return 'answer';
+            }
+        }
+
+        // 结构化字段不可用时,再根据题目内容启发式判断。
         $stem = $q['stem'] ?? $q['content'] ?? '';
         // 处理 stem 可能是数组的情况
         if (is_array($stem)) {
@@ -3285,20 +3325,7 @@ class LearningAnalyticsService
             }
         }
 
-        // 3. 根据题目已有类型字段判断(作为后备)
-        $typeField = $q['question_type'] ?? $q['type'] ?? '';
-        if (is_string($typeField)) {
-            $t = strtolower($typeField);
-            if (in_array($t, ['choice', 'single_choice', 'multiple_choice', '选择题', 'choice', 'single_choice', 'multiple_choice'])) {
-                return 'choice';
-            }
-            if (in_array($t, ['fill', 'blank', 'fill_blank', 'fill_in_the_blank', '填空题'])) {
-                return 'fill';
-            }
-            if (in_array($t, ['answer', 'calculation', 'word_problem', 'proof', '解答题'])) {
-                return 'answer';
-            }
-        }
+        // 3. 无法识别时回落为解答题
 
         // 4. 根据标签判断
         if (is_string($tags)) {
@@ -3852,27 +3879,31 @@ class LearningAnalyticsService
      */
     private function getDifficultyRangesForCategory(int $difficultyCategory): array
     {
-        return match($difficultyCategory) {
-            1 => [
-                ['min' => 0.0, 'max' => 0.5],  // 基础型:偏向低难度
-                ['min' => 0.5, 'max' => 1.0],
-            ],
-            2 => [
-                ['min' => 0.0, 'max' => 0.5],  // 进阶型:均衡分布
-                ['min' => 0.5, 'max' => 1.0],
-            ],
-            3 => [
-                ['min' => 0.25, 'max' => 0.75], // 中等型:偏向中等难度
-                ['min' => 0.0, 'max' => 1.0],
-            ],
-            4 => [
-                ['min' => 0.5, 'max' => 1.0],  // 拔高型:偏向高难度
-                ['min' => 0.0, 'max' => 0.5],
-            ],
-            default => [
-                ['min' => 0.0, 'max' => 1.0],
-            ]
-        };
+        $ranges = app(DifficultyDistributionService::class)->getRanges($difficultyCategory);
+        $segments = [];
+
+        foreach (['primary', 'high', 'low', 'secondary', 'fallback_low'] as $key) {
+            if (!isset($ranges[$key]) || !is_array($ranges[$key])) {
+                continue;
+            }
+            $min = $ranges[$key]['min'] ?? null;
+            $max = $ranges[$key]['max'] ?? null;
+            $percentage = $ranges[$key]['percentage'] ?? null;
+            if (!is_numeric($min) || !is_numeric($max)) {
+                continue;
+            }
+            // 仅将“目标占比 > 0”的区间用于补题查询,避免 0% 兜底区间误入候选池。
+            if (!is_numeric($percentage) || (float) $percentage <= 0.0) {
+                continue;
+            }
+            $segments[] = ['min' => (float) $min, 'max' => (float) $max];
+        }
+
+        if ($segments === []) {
+            return [['min' => 0.0, 'max' => 1.0]];
+        }
+
+        return $segments;
     }
 
     private function normalizeQuestionStageGrade(int $grade): ?int
@@ -4046,13 +4077,14 @@ class LearningAnalyticsService
 
             // 如果难度分布不满足目标数,从该题型剩余题目补充
             if (count($typeSelected) < $target) {
-                $allTypeBuckets = array_merge(
-                    $buckets['primary_medium'] ?? [],
-                    $buckets['primary_low'] ?? [],
-                    $buckets['primary_high'] ?? [],
-                    $buckets['secondary'] ?? [],
-                    $buckets['other'] ?? []
-                );
+                $supplementBucketOrder = array_values(array_unique(array_merge(
+                    $diffService->getSupplementOrder($difficultyCategory),
+                    ['primary_medium', 'primary_high', 'primary_low', 'secondary', 'other']
+                )));
+                $allTypeBuckets = [];
+                foreach ($supplementBucketOrder as $bucketKey) {
+                    $allTypeBuckets = array_merge($allTypeBuckets, $buckets[$bucketKey] ?? []);
+                }
                 shuffle($allTypeBuckets);
                 foreach ($allTypeBuckets as $q) {
                     if (count($typeSelected) >= $target) break;
@@ -4100,6 +4132,186 @@ class LearningAnalyticsService
         return array_slice($result, 0, $totalQuestions);
     }
 
+    /**
+     * 培优/竞赛卷的解答题更接近真实区分度:不允许用极低难度解答题兜底。
+     *
+     * 总题量优先级仍然很高:低难解答题先尝试替换为同题型合格题;没有同题型时,
+     * 再用任意非违规候选题补齐总数,避免因为题库供给不足直接少题。
+     *
+     * @param  array<int, array<string, mixed>>  $candidatePool
+     * @param  array<int, array<string, mixed>>  $selectedQuestions
+     * @return array<int, array<string, mixed>>
+     */
+    private function enforceAnswerDifficultyFloor(
+        array $candidatePool,
+        array $selectedQuestions,
+        int $difficultyCategory,
+        int $totalQuestions
+    ): array {
+        $floor = $this->answerDifficultyFloor($difficultyCategory);
+        if ($floor === null || empty($selectedQuestions)) {
+            return $selectedQuestions;
+        }
+
+        $selectedIds = [];
+        foreach ($selectedQuestions as $question) {
+            $id = $this->questionIdentity($question);
+            if ($id !== '') {
+                $selectedIds[$id] = true;
+            }
+        }
+
+        $candidates = [];
+        $seen = [];
+        foreach (array_merge($candidatePool, $selectedQuestions) as $question) {
+            $id = $this->questionIdentity($question);
+            if ($id === '' || isset($seen[$id])) {
+                continue;
+            }
+            $seen[$id] = true;
+            $candidates[] = $question;
+        }
+
+        usort($candidates, function (array $a, array $b) use ($difficultyCategory): int {
+            return $this->compareCandidateDifficultyFit($a, $b, $difficultyCategory);
+        });
+
+        $replacementUsed = [];
+        $removed = 0;
+        foreach ($selectedQuestions as $idx => $question) {
+            if (! $this->isLowAnswerQuestion($question, $floor)) {
+                continue;
+            }
+
+            $replacement = $this->takeReplacementQuestion(
+                $candidates,
+                $selectedIds,
+                $replacementUsed,
+                $floor,
+                true
+            );
+
+            if ($replacement === null) {
+                unset($selectedQuestions[$idx]);
+                $removed++;
+                continue;
+            }
+
+            $oldId = $this->questionIdentity($question);
+            if ($oldId !== '') {
+                unset($selectedIds[$oldId]);
+            }
+            $newId = $this->questionIdentity($replacement);
+            if ($newId !== '') {
+                $selectedIds[$newId] = true;
+                $replacementUsed[$newId] = true;
+            }
+            $selectedQuestions[$idx] = $replacement;
+        }
+
+        $selectedQuestions = array_values($selectedQuestions);
+        if (count($selectedQuestions) < $totalQuestions) {
+            foreach ($candidates as $candidate) {
+                if (count($selectedQuestions) >= $totalQuestions) {
+                    break;
+                }
+
+                $id = $this->questionIdentity($candidate);
+                if ($id === '' || isset($selectedIds[$id]) || isset($replacementUsed[$id])) {
+                    continue;
+                }
+                if ($this->isLowAnswerQuestion($candidate, $floor)) {
+                    continue;
+                }
+
+                $selectedQuestions[] = $candidate;
+                $selectedIds[$id] = true;
+            }
+        }
+
+        if ($removed > 0 || count($selectedQuestions) < $totalQuestions) {
+            Log::warning('LearningAnalyticsService: 培优解答题难度护栏触发', [
+                'difficulty_category' => $difficultyCategory,
+                'answer_floor' => $floor,
+                'removed_low_answer_count' => $removed,
+                'final_count' => count($selectedQuestions),
+                'target_count' => $totalQuestions,
+            ]);
+        }
+
+        return array_slice($selectedQuestions, 0, $totalQuestions);
+    }
+
+    private function answerDifficultyFloor(int $difficultyCategory): ?float
+    {
+        return match ($difficultyCategory) {
+            3 => 0.4,
+            4 => 0.5,
+            default => null,
+        };
+    }
+
+    private function isLowAnswerQuestion(array $question, float $floor): bool
+    {
+        return $this->determineQuestionType($question) === 'answer'
+            && (float) ($question['difficulty'] ?? 0.0) < $floor;
+    }
+
+    /**
+     * @param  array<int, array<string, mixed>>  $candidates
+     * @param  array<string, bool>  $selectedIds
+     * @param  array<string, bool>  $replacementUsed
+     */
+    private function takeReplacementQuestion(
+        array $candidates,
+        array $selectedIds,
+        array $replacementUsed,
+        float $floor,
+        bool $sameTypeOnly
+    ): ?array {
+        foreach ($candidates as $candidate) {
+            $id = $this->questionIdentity($candidate);
+            if ($id === '' || isset($selectedIds[$id]) || isset($replacementUsed[$id])) {
+                continue;
+            }
+            if ($sameTypeOnly && $this->determineQuestionType($candidate) !== 'answer') {
+                continue;
+            }
+            if ($this->isLowAnswerQuestion($candidate, $floor)) {
+                continue;
+            }
+
+            return $candidate;
+        }
+
+        return null;
+    }
+
+    private function compareCandidateDifficultyFit(array $a, array $b, int $difficultyCategory): int
+    {
+        $target = match ($difficultyCategory) {
+            3 => 0.62,
+            4 => 0.82,
+            default => 0.5,
+        };
+
+        $da = (float) ($a['difficulty'] ?? 0.5);
+        $db = (float) ($b['difficulty'] ?? 0.5);
+        $scoreA = abs($target - $da);
+        $scoreB = abs($target - $db);
+
+        if ($scoreA !== $scoreB) {
+            return $scoreA <=> $scoreB;
+        }
+
+        return $this->questionIdentity($a) <=> $this->questionIdentity($b);
+    }
+
+    private function questionIdentity(array $question): string
+    {
+        return (string) ($question['id'] ?? $question['question_id'] ?? $question['question_bank_id'] ?? '');
+    }
+
     /**
      * 统计题目列表中各题型的数量
      */

+ 105 - 0
tests/Unit/DifficultyDistributionServiceTest.php

@@ -0,0 +1,105 @@
+<?php
+
+namespace Tests\Unit;
+
+use App\Services\DifficultyDistributionService;
+use App\Services\LearningAnalyticsService;
+use ReflectionClass;
+use Tests\TestCase;
+
+class DifficultyDistributionServiceTest extends TestCase
+{
+    public function test_category_three_distribution_keeps_only_ten_percent_low_band(): void
+    {
+        $service = new DifficultyDistributionService();
+
+        $distribution = $service->calculateDistribution(3, 10);
+
+        $this->assertSame(1, $distribution['low']['count']);
+        $this->assertSame(8, $distribution['medium']['count']);
+        $this->assertSame(1, $distribution['high']['count']);
+        $this->assertSame(10, array_sum(array_column($distribution, 'count')));
+    }
+
+    public function test_category_three_difficulty_buckets_keep_extreme_low_out_of_target_low_band(): void
+    {
+        $service = new DifficultyDistributionService();
+
+        $this->assertSame('other', $service->classifyQuestionByDifficulty(0.06, 3));
+        $this->assertSame('secondary', $service->classifyQuestionByDifficulty(0.36, 3));
+        $this->assertSame('primary_low', $service->classifyQuestionByDifficulty(0.42, 3));
+        $this->assertSame('primary_medium', $service->classifyQuestionByDifficulty(0.62, 3));
+        $this->assertSame('primary_high', $service->classifyQuestionByDifficulty(0.80, 3));
+    }
+
+    public function test_category_three_supplement_order_prefers_higher_difficulty_before_low_fallback(): void
+    {
+        $service = new DifficultyDistributionService();
+
+        $this->assertSame(
+            ['primary_medium', 'primary_high', 'primary_low', 'secondary', 'other'],
+            $service->getSupplementOrder(3)
+        );
+    }
+
+    public function test_category_four_difficulty_buckets_use_split_high_mid_low_ranges(): void
+    {
+        $service = new DifficultyDistributionService();
+
+        $this->assertSame('secondary', $service->classifyQuestionByDifficulty(0.49, 4));
+        $this->assertSame('primary_low', $service->classifyQuestionByDifficulty(0.50, 4));
+        $this->assertSame('primary_medium', $service->classifyQuestionByDifficulty(0.80, 4));
+        $this->assertSame('primary_high', $service->classifyQuestionByDifficulty(0.92, 4));
+    }
+
+    public function test_category_four_supplement_order_prefers_higher_difficulty_first(): void
+    {
+        $service = new DifficultyDistributionService();
+
+        $this->assertSame(
+            ['primary_high', 'primary_medium', 'primary_low', 'secondary', 'other'],
+            $service->getSupplementOrder(4)
+        );
+    }
+
+    public function test_category_three_answer_floor_replaces_low_answer_without_losing_total_count(): void
+    {
+        $service = app(LearningAnalyticsService::class);
+        $reflection = new ReflectionClass($service);
+        $method = $reflection->getMethod('enforceAnswerDifficultyFloor');
+        $method->setAccessible(true);
+
+        $selected = [
+            $this->question(1, 'choice', 0.3),
+            $this->question(2, 'fill', 0.4),
+            $this->question(3, 'answer', 0.06),
+            $this->question(4, 'answer', 0.5),
+        ];
+        $pool = array_merge($selected, [
+            $this->question(5, 'answer', 0.62),
+            $this->question(6, 'fill', 0.58),
+        ]);
+
+        $result = $method->invoke($service, $pool, $selected, 3, 4);
+
+        $this->assertCount(4, $result);
+        $this->assertFalse(collect($result)->contains(
+            fn (array $question) => $question['question_type'] === 'answer' && $question['difficulty'] < 0.4
+        ));
+        $this->assertTrue(collect($result)->contains(
+            fn (array $question) => $question['id'] === 5
+        ));
+    }
+
+    private function question(int $id, string $type, float $difficulty): array
+    {
+        return [
+            'id' => $id,
+            'question_type' => $type,
+            'difficulty' => $difficulty,
+            'stem' => "Question {$id}",
+            'kp_code' => 'KP001',
+            'solution' => 'solution',
+        ];
+    }
+}