| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446 |
- <?php
- namespace App\Services;
- use App\Models\Question;
- use DateTimeInterface;
- use Illuminate\Support\Facades\DB;
- use Illuminate\Support\Facades\File;
- use Illuminate\Support\Facades\Schema;
- use Illuminate\Support\Str;
- use Throwable;
- /**
- * 从 JSON 批量导入 questions,并生成可在 MySQL 客户端执行的 INSERT…ON DUPLICATE KEY UPDATE 脚本。
- */
- class QuestionBulkImportService
- {
- /** @var array<int, string>|null */
- private ?array $questionsColumns = null;
- /**
- * @return array<int, array<string, mixed>>
- */
- public function loadRowsFromFile(string $path): array
- {
- if (! is_readable($path)) {
- throw new \InvalidArgumentException('文件不可读:'.$path);
- }
- $raw = file_get_contents($path);
- if ($raw === false || trim($raw) === '') {
- throw new \InvalidArgumentException('文件为空:'.$path);
- }
- $trimmed = ltrim($raw);
- if ($trimmed === '') {
- return [];
- }
- if ($trimmed[0] === '[') {
- $decoded = json_decode($raw, true);
- if (! is_array($decoded)) {
- throw new \InvalidArgumentException('JSON 解析失败(应为对象数组):'.json_last_error_msg());
- }
- return array_values(array_filter($decoded, static fn ($row) => is_array($row)));
- }
- // NDJSON:每行一个 JSON 对象
- $lines = preg_split('/\r\n|\n|\r/', $raw) ?: [];
- $rows = [];
- foreach ($lines as $line) {
- $line = trim($line);
- if ($line === '' || str_starts_with($line, '#')) {
- continue;
- }
- $one = json_decode($line, true);
- if (! is_array($one)) {
- throw new \InvalidArgumentException('NDJSON 解析失败:'.$line);
- }
- $rows[] = $one;
- }
- return $rows;
- }
- /**
- * @param array<string, mixed> $row
- * @return array<string, mixed>
- */
- public function normalizeImportRow(array $row, int $lineIndex): array
- {
- $stem = (string) ($row['stem'] ?? $row['content'] ?? '');
- $kp = isset($row['kp_code']) ? (string) $row['kp_code'] : '';
- $code = isset($row['question_code']) ? trim((string) $row['question_code']) : '';
- if ($code === '') {
- $code = 'QI'.strtoupper(Str::random(12));
- }
- $options = $row['options'] ?? null;
- if (is_string($options) && trim($options) !== '') {
- $decoded = json_decode($options, true);
- $options = is_array($decoded) ? $decoded : null;
- }
- $meta = $row['meta'] ?? null;
- if (is_string($meta) && trim($meta) !== '') {
- $decoded = json_decode($meta, true);
- $meta = is_array($decoded) ? $decoded : null;
- }
- $out = [
- 'question_code' => $code,
- 'kp_code' => $kp !== '' ? $kp : null,
- 'stem' => $stem,
- 'options' => is_array($options) ? $options : null,
- 'answer' => isset($row['answer']) ? (string) $row['answer'] : null,
- 'solution' => isset($row['solution']) ? (string) $row['solution'] : null,
- 'difficulty' => isset($row['difficulty']) ? (float) $row['difficulty'] : null,
- 'question_type' => isset($row['question_type']) ? (string) $row['question_type'] : (isset($row['type']) ? (string) $row['type'] : null),
- 'source' => isset($row['source']) ? (string) $row['source'] : 'json_bulk_import',
- 'tags' => isset($row['tags']) ? (string) $row['tags'] : null,
- 'meta' => is_array($meta) ? $meta : null,
- 'textbook_id' => isset($row['textbook_id']) ? (int) $row['textbook_id'] : null,
- 'source_file_id' => isset($row['source_file_id']) ? (int) $row['source_file_id'] : null,
- 'source_paper_id' => isset($row['source_paper_id']) ? (int) $row['source_paper_id'] : null,
- 'paper_part_id' => isset($row['paper_part_id']) ? (int) $row['paper_part_id'] : null,
- 'kp_id' => isset($row['kp_id']) ? (string) $row['kp_id'] : null,
- 'kp_name' => isset($row['kp_name']) ? (string) $row['kp_name'] : null,
- 'kp_reference' => isset($row['kp_reference']) ? (string) $row['kp_reference'] : null,
- 'grade' => isset($row['grade']) ? (int) $row['grade'] : null,
- 'audit_status' => isset($row['audit_status']) ? (int) $row['audit_status'] : null,
- 'audit_reason' => isset($row['audit_reason']) ? (string) $row['audit_reason'] : null,
- 'title_1' => isset($row['title_1']) ? (string) $row['title_1'] : null,
- 'title_2' => isset($row['title_2']) ? (string) $row['title_2'] : null,
- 'title_3' => isset($row['title_3']) ? (string) $row['title_3'] : null,
- 'create_by' => isset($row['create_by']) ? (string) $row['create_by'] : null,
- 'textbook_catalog_nodes_id' => isset($row['textbook_catalog_nodes_id']) ? (int) $row['textbook_catalog_nodes_id'] : null,
- 'question_category' => isset($row['question_category']) ? (int) $row['question_category'] : null,
- 'step_num' => isset($row['step_num']) ? (int) $row['step_num'] : null,
- 'solution_temp' => isset($row['solution_temp']) ? (string) $row['solution_temp'] : null,
- 'solution_temp2' => isset($row['solution_temp2']) ? (string) $row['solution_temp2'] : null,
- ];
- if (isset($row['id'])) {
- $out['_import_id'] = (int) $row['id'];
- }
- $out['_line'] = $lineIndex;
- return $out;
- }
- /**
- * @param array<int, array<string, mixed>> $rows normalizeImportRow 的输出
- * @return array{created: int, updated: int, skipped: int, errors: array<int, string>}
- */
- public function importToDatabase(array $rows, bool $dryRun = false): array
- {
- $created = 0;
- $updated = 0;
- $skipped = 0;
- $errors = [];
- if (! Schema::hasTable('questions')) {
- return ['created' => 0, 'updated' => 0, 'skipped' => 0, 'errors' => ['questions 表不存在']];
- }
- foreach ($rows as $row) {
- $line = (int) ($row['_line'] ?? 0);
- try {
- $stem = (string) ($row['stem'] ?? '');
- $kp = (string) ($row['kp_code'] ?? '');
- if ($stem === '' || $kp === '') {
- $skipped++;
- $errors[] = "第 {$line} 行:题干或 kp_code 为空,已跳过";
- continue;
- }
- $code = (string) ($row['question_code'] ?? '');
- $payload = $this->filterToTableColumns($row);
- unset($payload['_line'], $payload['_import_id']);
- if ($dryRun) {
- $exists = Question::query()->where('question_code', $code)->exists();
- $exists ? $updated++ : $created++;
- continue;
- }
- $existing = Question::query()->where('question_code', $code)->first();
- $question = Question::query()->firstOrNew(['question_code' => $code]);
- $question->forceFill($payload);
- $question->save();
- if ($existing) {
- $updated++;
- } else {
- $created++;
- }
- } catch (Throwable $e) {
- $skipped++;
- $errors[] = "第 {$line} 行:".$e->getMessage();
- }
- }
- return compact('created', 'updated', 'skipped', 'errors');
- }
- /**
- * @param array<int, array<string, mixed>> $rows
- */
- public function renderMysqlScript(array $rows, bool $includeId = false): string
- {
- $lines = [];
- $lines[] = '-- 由 math_cms QuestionBulkImportService 生成';
- $lines[] = '-- 在目标库执行前请确认 `questions` 表结构一致,并已备份。';
- $lines[] = '-- 冲突键:`question_code`(UNIQUE),使用 ON DUPLICATE KEY UPDATE 合并更新。';
- $lines[] = 'SET NAMES utf8mb4;';
- $lines[] = '';
- $cols = $this->questionsColumnList();
- foreach ($rows as $row) {
- $stem = (string) ($row['stem'] ?? '');
- if ($stem === '') {
- continue;
- }
- $filtered = $this->filterToTableColumns($row);
- if ($includeId && isset($row['_import_id'])) {
- $filtered['id'] = (int) $row['_import_id'];
- }
- $sql = $this->buildSingleUpsertSql($filtered, $cols, $includeId);
- if ($sql !== '') {
- $lines[] = $sql;
- $lines[] = '';
- }
- }
- return rtrim(implode("\n", $lines))."\n";
- }
- /**
- * @param array<int> $ids
- */
- public function exportIdsToMysqlScript(array $ids, bool $includeId = false): string
- {
- if ($ids === []) {
- return "-- 无 id\n";
- }
- $questions = Question::query()->whereIn('id', $ids)->orderBy('id')->get();
- $rows = [];
- foreach ($questions as $q) {
- $arr = $q->toArray();
- $arr['_line'] = 0;
- $rows[] = $this->normalizeImportRow($arr, 0);
- }
- return $this->renderMysqlScript($rows, $includeId);
- }
- /**
- * @param array<string, mixed> $row
- * @param array<int, string> $tableColumns
- */
- private function buildSingleUpsertSql(array $row, array $tableColumns, bool $includeId): string
- {
- $assign = [];
- foreach ($row as $key => $value) {
- if (str_starts_with($key, '_')) {
- continue;
- }
- if (! in_array($key, $tableColumns, true)) {
- continue;
- }
- if ($key === 'id' && ! $includeId) {
- continue;
- }
- $assign[$key] = $value;
- }
- if (! isset($assign['question_code']) || $assign['question_code'] === '') {
- return '';
- }
- $assign = array_filter($assign, static fn ($v) => $v !== null);
- $now = date('Y-m-d H:i:s');
- if (! array_key_exists('created_at', $assign)) {
- $assign['created_at'] = $now;
- }
- if (! array_key_exists('updated_at', $assign)) {
- $assign['updated_at'] = $now;
- }
- $columns = array_keys($assign);
- $values = [];
- foreach ($columns as $col) {
- $values[] = $this->quoteSqlValue($assign[$col], $col);
- }
- $colList = implode('`, `', $columns);
- $valList = implode(', ', $values);
- $updates = [];
- foreach ($columns as $col) {
- if ($col === 'question_code' || $col === 'id' || $col === 'created_at') {
- continue;
- }
- $updates[] = '`'.$col.'` = VALUES(`'.$col.'`)';
- }
- if ($updates === []) {
- $updates[] = '`updated_at` = VALUES(`updated_at`)';
- }
- return 'INSERT INTO `questions` (`'.$colList.'`) VALUES ('.$valList.') ON DUPLICATE KEY UPDATE '.implode(', ', $updates).';';
- }
- /**
- * @param mixed $value
- */
- private function quoteSqlValue($value, string $column): string
- {
- if ($value === null) {
- return 'NULL';
- }
- if ($value instanceof DateTimeInterface) {
- return $this->pdoQuote($value->format('Y-m-d H:i:s'));
- }
- $jsonCols = ['options', 'meta'];
- if (in_array($column, $jsonCols, true)) {
- if (is_array($value)) {
- $value = json_encode($value, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
- }
- return $this->pdoQuote((string) $value);
- }
- if (is_bool($value)) {
- return $value ? '1' : '0';
- }
- if (is_int($value) || is_float($value)) {
- return (string) $value;
- }
- return $this->pdoQuote((string) $value);
- }
- private function pdoQuote(string $value): string
- {
- $pdo = DB::connection()->getPdo();
- return $pdo->quote($value);
- }
- /**
- * @param array<string, mixed> $row
- * @return array<string, mixed>
- */
- private function filterToTableColumns(array $row): array
- {
- $cols = array_flip($this->questionsColumnList());
- $out = [];
- foreach ($row as $k => $v) {
- if ($k === '_line' || $k === '_import_id') {
- continue;
- }
- if (! isset($cols[$k])) {
- continue;
- }
- if ($k === 'id') {
- continue;
- }
- $out[$k] = $v;
- }
- return $out;
- }
- /**
- * @return array<int, string>
- */
- private function questionsColumnList(): array
- {
- if ($this->questionsColumns !== null) {
- return $this->questionsColumns;
- }
- if (! Schema::hasTable('questions')) {
- $this->questionsColumns = [];
- return $this->questionsColumns;
- }
- $this->questionsColumns = Schema::getColumnListing('questions');
- return $this->questionsColumns;
- }
- /**
- * 从本地可读文件路径执行:解析 JSON → 写 SQL → 可选写入 questions。
- *
- * @return array{ok: bool, sql_path?: string, message?: string, stats?: array{created: int, updated: int, skipped: int, errors: array<int, string>}, error?: string}
- */
- public function runImportPipeline(
- string $absolutePath,
- bool $dryRun,
- bool $sqlOnly,
- bool $withId
- ): array {
- try {
- $rawRows = $this->loadRowsFromFile($absolutePath);
- } catch (\Throwable $e) {
- return ['ok' => false, 'error' => $e->getMessage()];
- }
- $rows = [];
- foreach ($rawRows as $i => $raw) {
- if (! is_array($raw)) {
- continue;
- }
- $rows[] = $this->normalizeImportRow($raw, $i + 1);
- }
- if ($rows === []) {
- return ['ok' => false, 'error' => '未解析到题目'];
- }
- $outPath = storage_path('app/exports/questions_import_'.date('YmdHis').'.sql');
- File::ensureDirectoryExists(dirname($outPath));
- File::put($outPath, $this->renderMysqlScript($rows, $withId));
- if ($sqlOnly) {
- return [
- 'ok' => true,
- 'sql_path' => $outPath,
- 'message' => '已生成 SQL(未写本地库)。',
- ];
- }
- $stats = $this->importToDatabase($rows, $dryRun);
- $message = sprintf(
- '新建 %d,更新 %d,跳过 %d。SQL:%s',
- $stats['created'],
- $stats['updated'],
- $stats['skipped'],
- $outPath
- );
- return [
- 'ok' => true,
- 'sql_path' => $outPath,
- 'message' => $message,
- 'stats' => $stats,
- ];
- }
- }
|