QuestionBulkImportService.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. <?php
  2. namespace App\Services;
  3. use App\Models\Question;
  4. use DateTimeInterface;
  5. use Illuminate\Support\Facades\DB;
  6. use Illuminate\Support\Facades\File;
  7. use Illuminate\Support\Facades\Schema;
  8. use Illuminate\Support\Str;
  9. use Throwable;
  10. /**
  11. * 从 JSON 批量导入 questions,并生成可在 MySQL 客户端执行的 INSERT…ON DUPLICATE KEY UPDATE 脚本。
  12. */
  13. class QuestionBulkImportService
  14. {
  15. /** @var array<int, string>|null */
  16. private ?array $questionsColumns = null;
  17. /**
  18. * @return array<int, array<string, mixed>>
  19. */
  20. public function loadRowsFromFile(string $path): array
  21. {
  22. if (! is_readable($path)) {
  23. throw new \InvalidArgumentException('文件不可读:'.$path);
  24. }
  25. $raw = file_get_contents($path);
  26. if ($raw === false || trim($raw) === '') {
  27. throw new \InvalidArgumentException('文件为空:'.$path);
  28. }
  29. $trimmed = ltrim($raw);
  30. if ($trimmed === '') {
  31. return [];
  32. }
  33. if ($trimmed[0] === '[') {
  34. $decoded = json_decode($raw, true);
  35. if (! is_array($decoded)) {
  36. throw new \InvalidArgumentException('JSON 解析失败(应为对象数组):'.json_last_error_msg());
  37. }
  38. return array_values(array_filter($decoded, static fn ($row) => is_array($row)));
  39. }
  40. // NDJSON:每行一个 JSON 对象
  41. $lines = preg_split('/\r\n|\n|\r/', $raw) ?: [];
  42. $rows = [];
  43. foreach ($lines as $line) {
  44. $line = trim($line);
  45. if ($line === '' || str_starts_with($line, '#')) {
  46. continue;
  47. }
  48. $one = json_decode($line, true);
  49. if (! is_array($one)) {
  50. throw new \InvalidArgumentException('NDJSON 解析失败:'.$line);
  51. }
  52. $rows[] = $one;
  53. }
  54. return $rows;
  55. }
  56. /**
  57. * @param array<string, mixed> $row
  58. * @return array<string, mixed>
  59. */
  60. public function normalizeImportRow(array $row, int $lineIndex): array
  61. {
  62. $stem = (string) ($row['stem'] ?? $row['content'] ?? '');
  63. $kp = isset($row['kp_code']) ? (string) $row['kp_code'] : '';
  64. $code = isset($row['question_code']) ? trim((string) $row['question_code']) : '';
  65. if ($code === '') {
  66. $code = 'QI'.strtoupper(Str::random(12));
  67. }
  68. $options = $row['options'] ?? null;
  69. if (is_string($options) && trim($options) !== '') {
  70. $decoded = json_decode($options, true);
  71. $options = is_array($decoded) ? $decoded : null;
  72. }
  73. $meta = $row['meta'] ?? null;
  74. if (is_string($meta) && trim($meta) !== '') {
  75. $decoded = json_decode($meta, true);
  76. $meta = is_array($decoded) ? $decoded : null;
  77. }
  78. $out = [
  79. 'question_code' => $code,
  80. 'kp_code' => $kp !== '' ? $kp : null,
  81. 'stem' => $stem,
  82. 'options' => is_array($options) ? $options : null,
  83. 'answer' => isset($row['answer']) ? (string) $row['answer'] : null,
  84. 'solution' => isset($row['solution']) ? (string) $row['solution'] : null,
  85. 'difficulty' => isset($row['difficulty']) ? (float) $row['difficulty'] : null,
  86. 'question_type' => isset($row['question_type']) ? (string) $row['question_type'] : (isset($row['type']) ? (string) $row['type'] : null),
  87. 'source' => isset($row['source']) ? (string) $row['source'] : 'json_bulk_import',
  88. 'tags' => isset($row['tags']) ? (string) $row['tags'] : null,
  89. 'meta' => is_array($meta) ? $meta : null,
  90. 'textbook_id' => isset($row['textbook_id']) ? (int) $row['textbook_id'] : null,
  91. 'source_file_id' => isset($row['source_file_id']) ? (int) $row['source_file_id'] : null,
  92. 'source_paper_id' => isset($row['source_paper_id']) ? (int) $row['source_paper_id'] : null,
  93. 'paper_part_id' => isset($row['paper_part_id']) ? (int) $row['paper_part_id'] : null,
  94. 'kp_id' => isset($row['kp_id']) ? (string) $row['kp_id'] : null,
  95. 'kp_name' => isset($row['kp_name']) ? (string) $row['kp_name'] : null,
  96. 'kp_reference' => isset($row['kp_reference']) ? (string) $row['kp_reference'] : null,
  97. 'grade' => isset($row['grade']) ? (int) $row['grade'] : null,
  98. 'audit_status' => isset($row['audit_status']) ? (int) $row['audit_status'] : null,
  99. 'audit_reason' => isset($row['audit_reason']) ? (string) $row['audit_reason'] : null,
  100. 'title_1' => isset($row['title_1']) ? (string) $row['title_1'] : null,
  101. 'title_2' => isset($row['title_2']) ? (string) $row['title_2'] : null,
  102. 'title_3' => isset($row['title_3']) ? (string) $row['title_3'] : null,
  103. 'create_by' => isset($row['create_by']) ? (string) $row['create_by'] : null,
  104. 'textbook_catalog_nodes_id' => isset($row['textbook_catalog_nodes_id']) ? (int) $row['textbook_catalog_nodes_id'] : null,
  105. 'question_category' => isset($row['question_category']) ? (int) $row['question_category'] : null,
  106. 'step_num' => isset($row['step_num']) ? (int) $row['step_num'] : null,
  107. 'solution_temp' => isset($row['solution_temp']) ? (string) $row['solution_temp'] : null,
  108. 'solution_temp2' => isset($row['solution_temp2']) ? (string) $row['solution_temp2'] : null,
  109. ];
  110. if (isset($row['id'])) {
  111. $out['_import_id'] = (int) $row['id'];
  112. }
  113. $out['_line'] = $lineIndex;
  114. return $out;
  115. }
  116. /**
  117. * @param array<int, array<string, mixed>> $rows normalizeImportRow 的输出
  118. * @return array{created: int, updated: int, skipped: int, errors: array<int, string>}
  119. */
  120. public function importToDatabase(array $rows, bool $dryRun = false): array
  121. {
  122. $created = 0;
  123. $updated = 0;
  124. $skipped = 0;
  125. $errors = [];
  126. if (! Schema::hasTable('questions')) {
  127. return ['created' => 0, 'updated' => 0, 'skipped' => 0, 'errors' => ['questions 表不存在']];
  128. }
  129. foreach ($rows as $row) {
  130. $line = (int) ($row['_line'] ?? 0);
  131. try {
  132. $stem = (string) ($row['stem'] ?? '');
  133. $kp = (string) ($row['kp_code'] ?? '');
  134. if ($stem === '' || $kp === '') {
  135. $skipped++;
  136. $errors[] = "第 {$line} 行:题干或 kp_code 为空,已跳过";
  137. continue;
  138. }
  139. $code = (string) ($row['question_code'] ?? '');
  140. $payload = $this->filterToTableColumns($row);
  141. unset($payload['_line'], $payload['_import_id']);
  142. if ($dryRun) {
  143. $exists = Question::query()->where('question_code', $code)->exists();
  144. $exists ? $updated++ : $created++;
  145. continue;
  146. }
  147. $existing = Question::query()->where('question_code', $code)->first();
  148. $question = Question::query()->firstOrNew(['question_code' => $code]);
  149. $question->forceFill($payload);
  150. $question->save();
  151. if ($existing) {
  152. $updated++;
  153. } else {
  154. $created++;
  155. }
  156. } catch (Throwable $e) {
  157. $skipped++;
  158. $errors[] = "第 {$line} 行:".$e->getMessage();
  159. }
  160. }
  161. return compact('created', 'updated', 'skipped', 'errors');
  162. }
  163. /**
  164. * @param array<int, array<string, mixed>> $rows
  165. */
  166. public function renderMysqlScript(array $rows, bool $includeId = false): string
  167. {
  168. $lines = [];
  169. $lines[] = '-- 由 math_cms QuestionBulkImportService 生成';
  170. $lines[] = '-- 在目标库执行前请确认 `questions` 表结构一致,并已备份。';
  171. $lines[] = '-- 冲突键:`question_code`(UNIQUE),使用 ON DUPLICATE KEY UPDATE 合并更新。';
  172. $lines[] = 'SET NAMES utf8mb4;';
  173. $lines[] = '';
  174. $cols = $this->questionsColumnList();
  175. foreach ($rows as $row) {
  176. $stem = (string) ($row['stem'] ?? '');
  177. if ($stem === '') {
  178. continue;
  179. }
  180. $filtered = $this->filterToTableColumns($row);
  181. if ($includeId && isset($row['_import_id'])) {
  182. $filtered['id'] = (int) $row['_import_id'];
  183. }
  184. $sql = $this->buildSingleUpsertSql($filtered, $cols, $includeId);
  185. if ($sql !== '') {
  186. $lines[] = $sql;
  187. $lines[] = '';
  188. }
  189. }
  190. return rtrim(implode("\n", $lines))."\n";
  191. }
  192. /**
  193. * @param array<int> $ids
  194. */
  195. public function exportIdsToMysqlScript(array $ids, bool $includeId = false): string
  196. {
  197. if ($ids === []) {
  198. return "-- 无 id\n";
  199. }
  200. $questions = Question::query()->whereIn('id', $ids)->orderBy('id')->get();
  201. $rows = [];
  202. foreach ($questions as $q) {
  203. $arr = $q->toArray();
  204. $arr['_line'] = 0;
  205. $rows[] = $this->normalizeImportRow($arr, 0);
  206. }
  207. return $this->renderMysqlScript($rows, $includeId);
  208. }
  209. /**
  210. * @param array<string, mixed> $row
  211. * @param array<int, string> $tableColumns
  212. */
  213. private function buildSingleUpsertSql(array $row, array $tableColumns, bool $includeId): string
  214. {
  215. $assign = [];
  216. foreach ($row as $key => $value) {
  217. if (str_starts_with($key, '_')) {
  218. continue;
  219. }
  220. if (! in_array($key, $tableColumns, true)) {
  221. continue;
  222. }
  223. if ($key === 'id' && ! $includeId) {
  224. continue;
  225. }
  226. $assign[$key] = $value;
  227. }
  228. if (! isset($assign['question_code']) || $assign['question_code'] === '') {
  229. return '';
  230. }
  231. $assign = array_filter($assign, static fn ($v) => $v !== null);
  232. $now = date('Y-m-d H:i:s');
  233. if (! array_key_exists('created_at', $assign)) {
  234. $assign['created_at'] = $now;
  235. }
  236. if (! array_key_exists('updated_at', $assign)) {
  237. $assign['updated_at'] = $now;
  238. }
  239. $columns = array_keys($assign);
  240. $values = [];
  241. foreach ($columns as $col) {
  242. $values[] = $this->quoteSqlValue($assign[$col], $col);
  243. }
  244. $colList = implode('`, `', $columns);
  245. $valList = implode(', ', $values);
  246. $updates = [];
  247. foreach ($columns as $col) {
  248. if ($col === 'question_code' || $col === 'id' || $col === 'created_at') {
  249. continue;
  250. }
  251. $updates[] = '`'.$col.'` = VALUES(`'.$col.'`)';
  252. }
  253. if ($updates === []) {
  254. $updates[] = '`updated_at` = VALUES(`updated_at`)';
  255. }
  256. return 'INSERT INTO `questions` (`'.$colList.'`) VALUES ('.$valList.') ON DUPLICATE KEY UPDATE '.implode(', ', $updates).';';
  257. }
  258. /**
  259. * @param mixed $value
  260. */
  261. private function quoteSqlValue($value, string $column): string
  262. {
  263. if ($value === null) {
  264. return 'NULL';
  265. }
  266. if ($value instanceof DateTimeInterface) {
  267. return $this->pdoQuote($value->format('Y-m-d H:i:s'));
  268. }
  269. $jsonCols = ['options', 'meta'];
  270. if (in_array($column, $jsonCols, true)) {
  271. if (is_array($value)) {
  272. $value = json_encode($value, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES);
  273. }
  274. return $this->pdoQuote((string) $value);
  275. }
  276. if (is_bool($value)) {
  277. return $value ? '1' : '0';
  278. }
  279. if (is_int($value) || is_float($value)) {
  280. return (string) $value;
  281. }
  282. return $this->pdoQuote((string) $value);
  283. }
  284. private function pdoQuote(string $value): string
  285. {
  286. $pdo = DB::connection()->getPdo();
  287. return $pdo->quote($value);
  288. }
  289. /**
  290. * @param array<string, mixed> $row
  291. * @return array<string, mixed>
  292. */
  293. private function filterToTableColumns(array $row): array
  294. {
  295. $cols = array_flip($this->questionsColumnList());
  296. $out = [];
  297. foreach ($row as $k => $v) {
  298. if ($k === '_line' || $k === '_import_id') {
  299. continue;
  300. }
  301. if (! isset($cols[$k])) {
  302. continue;
  303. }
  304. if ($k === 'id') {
  305. continue;
  306. }
  307. $out[$k] = $v;
  308. }
  309. return $out;
  310. }
  311. /**
  312. * @return array<int, string>
  313. */
  314. private function questionsColumnList(): array
  315. {
  316. if ($this->questionsColumns !== null) {
  317. return $this->questionsColumns;
  318. }
  319. if (! Schema::hasTable('questions')) {
  320. $this->questionsColumns = [];
  321. return $this->questionsColumns;
  322. }
  323. $this->questionsColumns = Schema::getColumnListing('questions');
  324. return $this->questionsColumns;
  325. }
  326. /**
  327. * 从本地可读文件路径执行:解析 JSON → 写 SQL → 可选写入 questions。
  328. *
  329. * @return array{ok: bool, sql_path?: string, message?: string, stats?: array{created: int, updated: int, skipped: int, errors: array<int, string>}, error?: string}
  330. */
  331. public function runImportPipeline(
  332. string $absolutePath,
  333. bool $dryRun,
  334. bool $sqlOnly,
  335. bool $withId
  336. ): array {
  337. try {
  338. $rawRows = $this->loadRowsFromFile($absolutePath);
  339. } catch (\Throwable $e) {
  340. return ['ok' => false, 'error' => $e->getMessage()];
  341. }
  342. $rows = [];
  343. foreach ($rawRows as $i => $raw) {
  344. if (! is_array($raw)) {
  345. continue;
  346. }
  347. $rows[] = $this->normalizeImportRow($raw, $i + 1);
  348. }
  349. if ($rows === []) {
  350. return ['ok' => false, 'error' => '未解析到题目'];
  351. }
  352. $outPath = storage_path('app/exports/questions_import_'.date('YmdHis').'.sql');
  353. File::ensureDirectoryExists(dirname($outPath));
  354. File::put($outPath, $this->renderMysqlScript($rows, $withId));
  355. if ($sqlOnly) {
  356. return [
  357. 'ok' => true,
  358. 'sql_path' => $outPath,
  359. 'message' => '已生成 SQL(未写本地库)。',
  360. ];
  361. }
  362. $stats = $this->importToDatabase($rows, $dryRun);
  363. $message = sprintf(
  364. '新建 %d,更新 %d,跳过 %d。SQL:%s',
  365. $stats['created'],
  366. $stats['updated'],
  367. $stats['skipped'],
  368. $outPath
  369. );
  370. return [
  371. 'ok' => true,
  372. 'sql_path' => $outPath,
  373. 'message' => $message,
  374. 'stats' => $stats,
  375. ];
  376. }
  377. }