splitIntoPapers($sourceFile->raw_markdown); return DB::transaction(function () use ($sourceFile, $segments) { $sourceFile->papers()->delete(); $papers = collect(); foreach ($segments as $idx => $segment) { $papers->push( SourcePaper::create([ 'uuid' => (string) Str::uuid(), 'source_file_id' => $sourceFile->id, 'order' => $idx + 1, 'title' => $segment['title'] ?? null, 'full_title' => $segment['full_title'] ?? null, 'chapter' => $segment['chapter'] ?? $sourceFile->extracted_metadata['chapter'] ?? null, 'grade' => $segment['grade'] ?? $sourceFile->extracted_metadata['grade'] ?? null, 'term' => $segment['term'] ?? $sourceFile->extracted_metadata['term'] ?? null, 'edition' => $segment['edition'] ?? $sourceFile->extracted_metadata['edition'] ?? null, 'textbook_series' => $segment['textbook_series'] ?? $sourceFile->extracted_metadata['textbook_series'] ?? null, 'source_type' => $segment['source_type'] ?? null, 'source_year' => $segment['source_year'] ?? $sourceFile->extracted_metadata['year'] ?? null, 'raw_markdown' => $segment['raw'], 'detected_metadata' => $segment['meta'] ?? [], ]) ); } return $papers; }); } /** * 基于 Markdown 标题拆分卷子。 */ public function splitIntoPapers(string $markdown): array { $lines = preg_split('/\r\n|\r|\n/', $markdown); $segments = []; $current = ['title' => null, 'buffer' => []]; $paperPattern = '/^(#{1,2})\s*(.+卷|期中|期末|专项|模拟|基础卷|提升卷|练习卷)/u'; foreach ($lines as $line) { if (preg_match($paperPattern, $line, $m)) { if (!empty($current['buffer'])) { $segments[] = [ 'title' => $current['title'], 'full_title' => $current['title'], 'raw' => trim(implode("\n", $current['buffer'])), 'meta' => $this->detectMetaFromTitle($current['title']), ]; } $current = [ 'title' => trim($m[2]), 'buffer' => [$line], ]; } else { $current['buffer'][] = $line; } } if (!empty($current['buffer'])) { $segments[] = [ 'title' => $current['title'], 'full_title' => $current['title'], 'raw' => trim(implode("\n", $current['buffer'])), 'meta' => $this->detectMetaFromTitle($current['title']), ]; } if (empty($segments)) { return [[ 'title' => null, 'full_title' => null, 'raw' => trim($markdown), 'meta' => [], ]]; } return $segments; } protected function detectMetaFromTitle(?string $title): array { if (!$title) { return []; } $meta = []; if (preg_match('/第[一二三四五六七八九十0-9]+章/u', $title, $m)) { $meta['chapter'] = $m[0]; } if (preg_match('/20[0-9]{2}/', $title, $m)) { $meta['source_year'] = $m[0]; } if (Str::contains($title, '期中')) { $meta['source_type'] = 'midterm'; } elseif (Str::contains($title, '期末')) { $meta['source_type'] = 'final'; } elseif (Str::contains($title, '模拟')) { $meta['source_type'] = 'mock'; } return $meta; } }