PaperPartExtractorService.php 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. <?php
  2. namespace App\Services;
  3. use App\Models\PaperPart;
  4. use App\Models\SourcePaper;
  5. use Illuminate\Support\Collection;
  6. use Illuminate\Support\Facades\DB;
  7. use Illuminate\Support\Str;
  8. class PaperPartExtractorService
  9. {
  10. /**
  11. * 基于卷子 Markdown 拆分题型区块。
  12. */
  13. public function extract(SourcePaper $paper): Collection
  14. {
  15. $parts = $this->splitIntoParts($paper->raw_markdown);
  16. return DB::transaction(function () use ($paper, $parts) {
  17. $paper->parts()->delete();
  18. $result = collect();
  19. foreach ($parts as $idx => $part) {
  20. $result->push(PaperPart::create([
  21. 'source_paper_id' => $paper->id,
  22. 'order' => $idx + 1,
  23. 'title' => $part['title'] ?? null,
  24. 'type' => $part['type'] ?? null,
  25. 'raw_markdown' => $part['raw'],
  26. 'question_count' => $part['question_count'] ?? null,
  27. 'detected_features' => $part['detected_features'] ?? [],
  28. ]));
  29. }
  30. return $result;
  31. });
  32. }
  33. public function splitIntoParts(string $markdown): array
  34. {
  35. $lines = preg_split('/\r\n|\r|\n/', $markdown);
  36. $segments = [];
  37. $current = ['title' => null, 'buffer' => []];
  38. $partPattern = '/^(#{2,3})\s*(第? ?[一二三四五六七八九十0-9IVX]+[部分卷]|选择题|填空题|解答题|综合题|计算题|应用题)/u';
  39. foreach ($lines as $line) {
  40. if (preg_match($partPattern, $line, $m)) {
  41. if (!empty($current['buffer'])) {
  42. $segments[] = $this->finalizeSegment($current);
  43. }
  44. $current = [
  45. 'title' => trim($m[0], "# \t"),
  46. 'buffer' => [$line],
  47. ];
  48. } else {
  49. $current['buffer'][] = $line;
  50. }
  51. }
  52. if (!empty($current['buffer'])) {
  53. $segments[] = $this->finalizeSegment($current);
  54. }
  55. if (empty($segments)) {
  56. return [[
  57. 'title' => null,
  58. 'type' => 'mixed',
  59. 'raw' => trim($markdown),
  60. 'detected_features' => [],
  61. ]];
  62. }
  63. return $segments;
  64. }
  65. protected function finalizeSegment(array $segment): array
  66. {
  67. $raw = trim(implode("\n", $segment['buffer']));
  68. $title = $segment['title'];
  69. return [
  70. 'title' => $title,
  71. 'type' => $this->detectType($title),
  72. 'raw' => $raw,
  73. 'detected_features' => [
  74. 'title' => $title,
  75. ],
  76. ];
  77. }
  78. protected function detectType(?string $title): ?string
  79. {
  80. if (!$title) {
  81. return null;
  82. }
  83. return match (true) {
  84. Str::contains($title, '选择') => 'choice',
  85. Str::contains($title, '填空') => 'fill',
  86. Str::contains($title, ['解答', '简答', '分析']) => 'short',
  87. Str::contains($title, ['计算', '推导']) => 'calc',
  88. default => 'mixed',
  89. };
  90. }
  91. }