QuestionImportService.php 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. <?php
  2. namespace App\Services;
  3. use App\Domain\Import\ImportPipeline;
  4. use App\Domain\Import\ImportResult;
  5. use App\Models\MarkdownImport;
  6. use App\Models\PreQuestionCandidate;
  7. use App\Models\SourceFile;
  8. use Illuminate\Support\Facades\DB;
  9. use Illuminate\Support\Facades\Log;
  10. use Illuminate\Support\Str;
  11. class QuestionImportService
  12. {
  13. public function __construct(private readonly ImportPipeline $pipeline)
  14. {
  15. }
  16. public function importMarkdown(string $path, array $meta = []): ImportResult
  17. {
  18. $markdown = (string) file_get_contents($path);
  19. return $this->importMarkdownContent($markdown, [
  20. 'path' => $path,
  21. 'meta' => $meta,
  22. ]);
  23. }
  24. public function importMarkdownContent(string $markdown, array $context = []): ImportResult
  25. {
  26. return DB::transaction(function () use ($markdown, $context) {
  27. $sourceFile = SourceFile::create([
  28. 'uuid' => (string) Str::uuid(),
  29. 'original_filename' => basename((string) ($context['path'] ?? '')),
  30. 'normalized_filename' => basename((string) ($context['path'] ?? '')),
  31. 'extension' => 'md',
  32. 'storage_path' => $context['path'] ?? null,
  33. 'raw_markdown' => $markdown,
  34. 'file_metadata' => $context['meta'] ?? [],
  35. 'source_type' => 'markdown',
  36. 'filename' => basename((string) ($context['path'] ?? '')),
  37. 'path' => $context['path'] ?? null,
  38. 'meta' => $context['meta'] ?? [],
  39. 'status' => 'pending',
  40. ]);
  41. $import = MarkdownImport::create([
  42. 'file_name' => $sourceFile->original_filename ?: $sourceFile->filename ?: 'markdown',
  43. 'original_markdown' => $markdown,
  44. 'source_type' => 'markdown',
  45. 'source_name' => $context['path'] ?? 'markdown',
  46. 'status' => MarkdownImport::STATUS_PROCESSING,
  47. 'progress_stage' => MarkdownImport::STAGE_SPLITTING,
  48. ]);
  49. return new ImportResult($sourceFile->id, $import->id, 0);
  50. });
  51. }
  52. public function processMarkdownImport(int $importId, int $sourceFileId, string $markdown): int
  53. {
  54. $payload = $this->pipeline->run('markdown', [
  55. 'markdown' => $markdown,
  56. ]);
  57. $blocks = $payload['blocks'] ?? [];
  58. MarkdownImport::where('id', $importId)->update([
  59. 'progress_stage' => MarkdownImport::STAGE_AI_PARSING,
  60. 'progress_total' => count($blocks),
  61. 'progress_current' => 0,
  62. ]);
  63. $parser = app(MarkdownQuestionParser::class);
  64. $created = 0;
  65. foreach ($blocks as $block) {
  66. $candidate = $parser->parseRawMarkdown((string) ($block['raw_markdown'] ?? ''), (int) ($block['index'] ?? 0));
  67. $raw = (string) ($candidate['raw_markdown'] ?? '');
  68. $clean = trim(str_replace("\r", '', $raw));
  69. PreQuestionCandidate::create([
  70. 'import_id' => $importId,
  71. 'source_file_id' => $sourceFileId,
  72. 'order_index' => (int) ($block['sequence'] ?? 0),
  73. 'index' => (int) ($block['index'] ?? 0),
  74. 'raw_markdown' => $raw,
  75. 'raw_hash' => $this->hashContent($raw),
  76. 'raw_text' => strip_tags($raw),
  77. 'clean_markdown' => $clean,
  78. 'clean_hash' => $this->hashContent($clean),
  79. 'stem' => $candidate['stem'] ?? null,
  80. 'options' => $candidate['options'] ?? null,
  81. 'images' => $candidate['images'] ?? [],
  82. 'tables' => $candidate['tables'] ?? [],
  83. 'is_question_candidate' => (bool) ($candidate['is_question_candidate'] ?? false),
  84. 'ai_confidence' => $candidate['ai_confidence'] ?? null,
  85. 'status' => PreQuestionCandidate::STATUS_PENDING,
  86. 'meta' => [
  87. 'split_meta' => $block,
  88. ],
  89. ]);
  90. $created++;
  91. MarkdownImport::where('id', $importId)->update([
  92. 'progress_current' => $created,
  93. ]);
  94. }
  95. MarkdownImport::where('id', $importId)->update([
  96. 'status' => MarkdownImport::STATUS_PARSED,
  97. 'progress_stage' => MarkdownImport::STAGE_PARSED,
  98. ]);
  99. Log::info('Markdown import parsed', [
  100. 'import_id' => $importId,
  101. 'source_file_id' => $sourceFileId,
  102. 'candidates' => $created,
  103. ]);
  104. return $created;
  105. }
  106. private function hashContent(?string $content): ?string
  107. {
  108. $content = trim((string) $content);
  109. if ($content === '') {
  110. return null;
  111. }
  112. return hash('sha1', $content);
  113. }
  114. }