| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135 |
- <?php
- namespace App\Services;
- use App\Domain\Import\ImportPipeline;
- use App\Domain\Import\ImportResult;
- use App\Models\MarkdownImport;
- use App\Models\PreQuestionCandidate;
- use App\Models\SourceFile;
- use Illuminate\Support\Facades\DB;
- use Illuminate\Support\Facades\Log;
- use Illuminate\Support\Str;
- class QuestionImportService
- {
- public function __construct(private readonly ImportPipeline $pipeline)
- {
- }
- public function importMarkdown(string $path, array $meta = []): ImportResult
- {
- $markdown = (string) file_get_contents($path);
- return $this->importMarkdownContent($markdown, [
- 'path' => $path,
- 'meta' => $meta,
- ]);
- }
- public function importMarkdownContent(string $markdown, array $context = []): ImportResult
- {
- return DB::transaction(function () use ($markdown, $context) {
- $sourceFile = SourceFile::create([
- 'uuid' => (string) Str::uuid(),
- 'original_filename' => basename((string) ($context['path'] ?? '')),
- 'normalized_filename' => basename((string) ($context['path'] ?? '')),
- 'extension' => 'md',
- 'storage_path' => $context['path'] ?? null,
- 'raw_markdown' => $markdown,
- 'file_metadata' => $context['meta'] ?? [],
- 'source_type' => 'markdown',
- 'filename' => basename((string) ($context['path'] ?? '')),
- 'path' => $context['path'] ?? null,
- 'meta' => $context['meta'] ?? [],
- 'status' => 'pending',
- ]);
- $import = MarkdownImport::create([
- 'file_name' => $sourceFile->original_filename ?: $sourceFile->filename ?: 'markdown',
- 'original_markdown' => $markdown,
- 'source_type' => 'markdown',
- 'source_name' => $context['path'] ?? 'markdown',
- 'status' => MarkdownImport::STATUS_PROCESSING,
- 'progress_stage' => MarkdownImport::STAGE_SPLITTING,
- ]);
- return new ImportResult($sourceFile->id, $import->id, 0);
- });
- }
- public function processMarkdownImport(int $importId, int $sourceFileId, string $markdown): int
- {
- $payload = $this->pipeline->run('markdown', [
- 'markdown' => $markdown,
- ]);
- $blocks = $payload['blocks'] ?? [];
- MarkdownImport::where('id', $importId)->update([
- 'progress_stage' => MarkdownImport::STAGE_AI_PARSING,
- 'progress_total' => count($blocks),
- 'progress_current' => 0,
- ]);
- $parser = app(MarkdownQuestionParser::class);
- $created = 0;
- foreach ($blocks as $block) {
- $candidate = $parser->parseRawMarkdown((string) ($block['raw_markdown'] ?? ''), (int) ($block['index'] ?? 0));
- $raw = (string) ($candidate['raw_markdown'] ?? '');
- $clean = trim(str_replace("\r", '', $raw));
- PreQuestionCandidate::create([
- 'import_id' => $importId,
- 'source_file_id' => $sourceFileId,
- 'order_index' => (int) ($block['sequence'] ?? 0),
- 'index' => (int) ($block['index'] ?? 0),
- 'raw_markdown' => $raw,
- 'raw_hash' => $this->hashContent($raw),
- 'raw_text' => strip_tags($raw),
- 'clean_markdown' => $clean,
- 'clean_hash' => $this->hashContent($clean),
- 'stem' => $candidate['stem'] ?? null,
- 'options' => $candidate['options'] ?? null,
- 'images' => $candidate['images'] ?? [],
- 'tables' => $candidate['tables'] ?? [],
- 'is_question_candidate' => (bool) ($candidate['is_question_candidate'] ?? false),
- 'ai_confidence' => $candidate['ai_confidence'] ?? null,
- 'status' => PreQuestionCandidate::STATUS_PENDING,
- 'meta' => [
- 'split_meta' => $block,
- ],
- ]);
- $created++;
- MarkdownImport::where('id', $importId)->update([
- 'progress_current' => $created,
- ]);
- }
- MarkdownImport::where('id', $importId)->update([
- 'status' => MarkdownImport::STATUS_PARSED,
- 'progress_stage' => MarkdownImport::STAGE_PARSED,
- ]);
- Log::info('Markdown import parsed', [
- 'import_id' => $importId,
- 'source_file_id' => $sourceFileId,
- 'candidates' => $created,
- ]);
- return $created;
- }
- private function hashContent(?string $content): ?string
- {
- $content = trim((string) $content);
- if ($content === '') {
- return null;
- }
- return hash('sha1', $content);
- }
- }
|