make(Illuminate\Contracts\Console\Kernel::class); $kernel->bootstrap(); use Illuminate\Support\Facades\DB; $limit = isset($argv[1]) ? max(1, (int) $argv[1]) : 50000; $outDir = isset($argv[2]) ? rtrim($argv[2], '/'): '/tmp'; $table = isset($argv[3]) ? trim((string) $argv[3]) : 'questions'; @mkdir($outDir, 0777, true); $rows = DB::connection('remote_mysql') ->table($table) ->select('id', 'question_type', 'stem') ->whereNotNull('stem') ->orderByDesc('id') ->limit($limit) ->get(); $issues = [ 'unbalanced_dollar' => [], 'suspicious_latex_env' => [], 'mixed_placeholder_inside_math' => [], 'compare_blank_between_math_tokens' => [], 'tail_backslash_dollar_marker' => [], ]; $push = static function(array &$bucket, object $row, string $reason): void { if (count($bucket) >= 2000) { return; } $bucket[] = [ 'id' => (int) $row->id, 'question_type' => (string) $row->question_type, 'reason' => $reason, 'stem_preview' => mb_substr((string) $row->stem, 0, 220), ]; }; foreach ($rows as $row) { $stem = (string) $row->stem; $dollarCount = substr_count($stem, '$'); if (($dollarCount % 2) !== 0) { $push($issues['unbalanced_dollar'], $row, 'odd number of $ delimiters'); } if (preg_match('/\\\\begin\{[^}]*$/u', $stem) || preg_match('/\\\\end\{[^}]*$/u', $stem)) { $push($issues['suspicious_latex_env'], $row, 'truncated \\begin/\\end block'); } if (preg_match('/\$(?:[^$]|\\\\.)*(?:\\\\underline\{[^}]*\}|_{2,}|[((](?:\s| | | )*[))])(?:[^$]|\\\\.)*\$/u', $stem)) { $push($issues['mixed_placeholder_inside_math'], $row, 'placeholder token appears inside $...$'); } if (preg_match('/\$[^$]*\$\s*_{2,}\s*\$[^$]*\$/u', $stem) || preg_match('/\$[^$]*\$\s*[((](?:\s| | | )*[))]\s*\$[^$]*\$/u', $stem)) { $push($issues['compare_blank_between_math_tokens'], $row, 'blank token inserted between two math segments'); } if (preg_match('/\\\\+\$(?=\s*(?:<[^>]+>\s*)*$)/u', $stem)) { $push($issues['tail_backslash_dollar_marker'], $row, 'tail backslash-dollar marker used as blank placeholder'); } } $summary = [ 'table' => $table, 'scan_limit' => $limit, 'scanned_rows' => count($rows), 'counts' => array_map('count', $issues), 'generated_at' => date('c'), ]; $stamp = date('Ymd_His'); $summaryPath = "$outDir/question_stem_quality_summary_$stamp.json"; $detailPath = "$outDir/question_stem_quality_details_$stamp.json"; file_put_contents($summaryPath, json_encode($summary, JSON_UNESCAPED_UNICODE|JSON_PRETTY_PRINT)); file_put_contents($detailPath, json_encode($issues, JSON_UNESCAPED_UNICODE|JSON_PRETTY_PRINT)); echo json_encode([ 'summary_path' => $summaryPath, 'detail_path' => $detailPath, 'summary' => $summary, ], JSON_UNESCAPED_UNICODE|JSON_PRETTY_PRINT), "\n";