| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- <?php
- namespace App\Services;
- use App\Models\PaperPart;
- use App\Models\SourcePaper;
- use Illuminate\Support\Collection;
- use Illuminate\Support\Facades\DB;
- use Illuminate\Support\Str;
- class PaperPartExtractorService
- {
- /**
- * 基于卷子 Markdown 拆分题型区块。
- */
- public function extract(SourcePaper $paper): Collection
- {
- $parts = $this->splitIntoParts($paper->raw_markdown);
- return DB::transaction(function () use ($paper, $parts) {
- $paper->parts()->delete();
- $result = collect();
- foreach ($parts as $idx => $part) {
- $result->push(PaperPart::create([
- 'source_paper_id' => $paper->id,
- 'order' => $idx + 1,
- 'title' => $part['title'] ?? null,
- 'type' => $part['type'] ?? null,
- 'raw_markdown' => $part['raw'],
- 'question_count' => $part['question_count'] ?? null,
- 'detected_features' => $part['detected_features'] ?? [],
- ]));
- }
- return $result;
- });
- }
- public function splitIntoParts(string $markdown): array
- {
- $lines = preg_split('/\r\n|\r|\n/', $markdown);
- $segments = [];
- $current = ['title' => null, 'buffer' => []];
- $partPattern = '/^(#{2,3})\s*(第? ?[一二三四五六七八九十0-9IVX]+[部分卷]|选择题|填空题|解答题|综合题|计算题|应用题)/u';
- foreach ($lines as $line) {
- if (preg_match($partPattern, $line, $m)) {
- if (!empty($current['buffer'])) {
- $segments[] = $this->finalizeSegment($current);
- }
- $current = [
- 'title' => trim($m[0], "# \t"),
- 'buffer' => [$line],
- ];
- } else {
- $current['buffer'][] = $line;
- }
- }
- if (!empty($current['buffer'])) {
- $segments[] = $this->finalizeSegment($current);
- }
- if (empty($segments)) {
- return [[
- 'title' => null,
- 'type' => 'mixed',
- 'raw' => trim($markdown),
- 'detected_features' => [],
- ]];
- }
- return $segments;
- }
- protected function finalizeSegment(array $segment): array
- {
- $raw = trim(implode("\n", $segment['buffer']));
- $title = $segment['title'];
- return [
- 'title' => $title,
- 'type' => $this->detectType($title),
- 'raw' => $raw,
- 'detected_features' => [
- 'title' => $title,
- ],
- ];
- }
- protected function detectType(?string $title): ?string
- {
- if (!$title) {
- return null;
- }
- return match (true) {
- Str::contains($title, '选择') => 'choice',
- Str::contains($title, '填空') => 'fill',
- Str::contains($title, ['解答', '简答', '分析']) => 'answer',
- Str::contains($title, ['计算', '推导']) => 'calc',
- default => 'mixed',
- };
- }
- }
|