|
|
@@ -0,0 +1,88 @@
|
|
|
+<?php
|
|
|
+
|
|
|
+declare(strict_types=1);
|
|
|
+
|
|
|
+require __DIR__.'/../vendor/autoload.php';
|
|
|
+$app = require __DIR__.'/../bootstrap/app.php';
|
|
|
+$kernel = $app->make(Illuminate\Contracts\Console\Kernel::class);
|
|
|
+$kernel->bootstrap();
|
|
|
+
|
|
|
+use Illuminate\Support\Facades\DB;
|
|
|
+
|
|
|
+$limit = isset($argv[1]) ? max(1, (int) $argv[1]) : 50000;
|
|
|
+$outDir = isset($argv[2]) ? rtrim($argv[2], '/'): '/tmp';
|
|
|
+$table = isset($argv[3]) ? trim((string) $argv[3]) : 'questions';
|
|
|
+@mkdir($outDir, 0777, true);
|
|
|
+
|
|
|
+$rows = DB::connection('remote_mysql')
|
|
|
+ ->table($table)
|
|
|
+ ->select('id', 'question_type', 'stem')
|
|
|
+ ->whereNotNull('stem')
|
|
|
+ ->orderByDesc('id')
|
|
|
+ ->limit($limit)
|
|
|
+ ->get();
|
|
|
+
|
|
|
+$issues = [
|
|
|
+ 'unbalanced_dollar' => [],
|
|
|
+ 'suspicious_latex_env' => [],
|
|
|
+ 'mixed_placeholder_inside_math' => [],
|
|
|
+ 'compare_blank_between_math_tokens' => [],
|
|
|
+ 'tail_backslash_dollar_marker' => [],
|
|
|
+];
|
|
|
+
|
|
|
+$push = static function(array &$bucket, object $row, string $reason): void {
|
|
|
+ if (count($bucket) >= 2000) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ $bucket[] = [
|
|
|
+ 'id' => (int) $row->id,
|
|
|
+ 'question_type' => (string) $row->question_type,
|
|
|
+ 'reason' => $reason,
|
|
|
+ 'stem_preview' => mb_substr((string) $row->stem, 0, 220),
|
|
|
+ ];
|
|
|
+};
|
|
|
+
|
|
|
+foreach ($rows as $row) {
|
|
|
+ $stem = (string) $row->stem;
|
|
|
+
|
|
|
+ $dollarCount = substr_count($stem, '$');
|
|
|
+ if (($dollarCount % 2) !== 0) {
|
|
|
+ $push($issues['unbalanced_dollar'], $row, 'odd number of $ delimiters');
|
|
|
+ }
|
|
|
+
|
|
|
+ if (preg_match('/\\\\begin\{[^}]*$/u', $stem) || preg_match('/\\\\end\{[^}]*$/u', $stem)) {
|
|
|
+ $push($issues['suspicious_latex_env'], $row, 'truncated \\begin/\\end block');
|
|
|
+ }
|
|
|
+
|
|
|
+ if (preg_match('/\$(?:[^$]|\\\\.)*(?:\\\\underline\{[^}]*\}|_{2,}|[((](?:\s| | | )*[))])(?:[^$]|\\\\.)*\$/u', $stem)) {
|
|
|
+ $push($issues['mixed_placeholder_inside_math'], $row, 'placeholder token appears inside $...$');
|
|
|
+ }
|
|
|
+
|
|
|
+ if (preg_match('/\$[^$]*\$\s*_{2,}\s*\$[^$]*\$/u', $stem) || preg_match('/\$[^$]*\$\s*[((](?:\s| | | )*[))]\s*\$[^$]*\$/u', $stem)) {
|
|
|
+ $push($issues['compare_blank_between_math_tokens'], $row, 'blank token inserted between two math segments');
|
|
|
+ }
|
|
|
+
|
|
|
+ if (preg_match('/\\\\+\$(?=\s*(?:<[^>]+>\s*)*$)/u', $stem)) {
|
|
|
+ $push($issues['tail_backslash_dollar_marker'], $row, 'tail backslash-dollar marker used as blank placeholder');
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+$summary = [
|
|
|
+ 'table' => $table,
|
|
|
+ 'scan_limit' => $limit,
|
|
|
+ 'scanned_rows' => count($rows),
|
|
|
+ 'counts' => array_map('count', $issues),
|
|
|
+ 'generated_at' => date('c'),
|
|
|
+];
|
|
|
+
|
|
|
+$stamp = date('Ymd_His');
|
|
|
+$summaryPath = "$outDir/question_stem_quality_summary_$stamp.json";
|
|
|
+$detailPath = "$outDir/question_stem_quality_details_$stamp.json";
|
|
|
+file_put_contents($summaryPath, json_encode($summary, JSON_UNESCAPED_UNICODE|JSON_PRETTY_PRINT));
|
|
|
+file_put_contents($detailPath, json_encode($issues, JSON_UNESCAPED_UNICODE|JSON_PRETTY_PRINT));
|
|
|
+
|
|
|
+echo json_encode([
|
|
|
+ 'summary_path' => $summaryPath,
|
|
|
+ 'detail_path' => $detailPath,
|
|
|
+ 'summary' => $summary,
|
|
|
+], JSON_UNESCAPED_UNICODE|JSON_PRETTY_PRINT), "\n";
|