| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366 |
- <?php
- namespace App\Services;
- use App\Models\MarkdownImport;
- use App\Models\SourcePaper;
- use App\Models\Textbook;
- use App\Models\TextbookCatalog;
- use Illuminate\Support\Arr;
- use Illuminate\Support\Str;
- class ImportInferenceService
- {
- /**
- * 推断学期
- */
- public function inferTerm(string $context): ?string
- {
- if (Str::contains($context, ['上册', '上学期'])) {
- return '上册';
- }
- if (Str::contains($context, ['下册', '下学期'])) {
- return '下册';
- }
- return null;
- }
- /**
- * 推断年级
- */
- public function inferGrade(string $context): ?string
- {
- foreach (['七年级' => '7', '八年级' => '8', '九年级' => '9', '高一' => '10', '高二' => '11', '高三' => '12'] as $label => $value) {
- if (Str::contains($context, $label)) {
- return $value;
- }
- }
- return null;
- }
- /**
- * 推断章节
- */
- public function inferChapter(string $context): ?string
- {
- if (preg_match('/第[一二三四五六七八九十]+章[^\\n]*/u', $context, $match)) {
- return $match[0];
- }
- return null;
- }
- /**
- * 匹配目录节点 ID
- */
- public function matchCatalogNodeId(string $context, ?int $textbookId): ?int
- {
- if (!$textbookId) {
- return null;
- }
- $needle = trim($context);
- if ($needle === '') {
- return null;
- }
- $nodes = TextbookCatalog::query()
- ->where('textbook_id', $textbookId)
- ->orderBy('depth')
- ->orderBy('sort_order')
- ->get(['id', 'title']);
- $chapterNeedle = $this->extractChapterLabel($needle);
- $normalizedNeedle = $this->normalizeText($chapterNeedle ?: $needle);
- $bestId = null;
- $bestScore = 0;
- foreach ($nodes as $node) {
- $title = (string) $node->title;
- if ($title === '') {
- continue;
- }
- $normalizedTitle = $this->normalizeText($title);
- if ($normalizedTitle === '') {
- continue;
- }
- if (Str::contains($normalizedNeedle, $normalizedTitle) || Str::contains($normalizedTitle, $normalizedNeedle)) {
- return (int) $node->id;
- }
- $score = 0;
- $chapterInTitle = $this->extractChapterLabel($title);
- if ($chapterNeedle && $chapterInTitle && $chapterNeedle === $chapterInTitle) {
- $score += 30;
- }
- $similarity = $this->similarityScore($normalizedNeedle, $normalizedTitle);
- $score += $similarity;
- if ($score > $bestScore) {
- $bestScore = $score;
- $bestId = (int) $node->id;
- }
- }
- return $bestScore >= 60 ? $bestId : null;
- }
- /**
- * 从文件名推断教材 (兼容旧接口)
- */
- public function resolveTextbookFromFilename(array $parsed): ?Textbook
- {
- return $this->findBestTextbook([
- 'grade' => $parsed['grade'] ?? null,
- 'term' => $parsed['term'] ?? null,
- 'series' => $parsed['series'] ?? null,
- 'subject' => $parsed['subject'] ?? null,
- ]);
- }
- /**
- * 根据一组属性推断最匹配的教材
- * 根据属性推断最匹配的教材
- */
- public function findBestTextbook(array $attributes): ?Textbook
- {
- // 核心参数获取
- $seriesId = $attributes['series_id'] ?? null;
- if (!$seriesId && !empty($attributes['series'])) {
- $formal = $this->resolveSeries($attributes['series']);
- $seriesId = $formal?->id;
- }
- if (!$seriesId) {
- return null;
- }
- $grade = (string)($attributes['grade'] ?? '');
- $term = (string)($attributes['term'] ?? '');
- $semester = $this->termToSemester($term);
- // 仅在当前系列下筛选
- $textbooks = Textbook::query()->where('series_id', $seriesId)->get();
- if ($textbooks->isEmpty()) {
- return null;
- }
- $best = null;
- $maxScore = -1;
- foreach ($textbooks as $tb) {
- $score = 20; // 基础系列分
- // 年级匹配
- if ($grade && (int)$tb->grade === (int)$grade) {
- $score += 5;
- }
- // 学期匹配
- if ($semester && (int)$tb->semester === $semester) {
- $score += 5;
- }
- if ($score > $maxScore) {
- $maxScore = $score;
- $best = $tb;
- }
- }
- return ($best && $maxScore >= 20) ? $best : null;
- }
- /**
- * 推断卷子类型
- */
- public function inferSourceType(string $context): ?string
- {
- $map = [
- '单元' => '单元测试',
- '月考' => '月考',
- '期中' => '期中考试',
- '期末' => '期末考试',
- '中考' => '中考套卷',
- '模拟' => '模拟考试',
- '考前' => '模拟考试',
- '真题' => '真题卷',
- '周测' => '周练/周测',
- '周练' => '周练/周测',
- '练' => '课时练习',
- ];
- foreach ($map as $key => $val) {
- if (Str::contains($context, $key)) {
- return $val;
- }
- }
- return '综合测试';
- }
- /**
- * 将解析出的系列俗称 (如 北师大版) 匹配到数据库正式系列模型 (如 北师大版(新))
- */
- public function resolveSeries(?string $hint): ?\App\Models\TextbookSeries
- {
- if (!$hint) {
- return null;
- }
- $hint = trim((string)$hint);
- $series = \App\Models\TextbookSeries::query()->get();
-
- $best = null;
- $bestScore = 0;
- foreach ($series as $s) {
- $name = (string)$s->name;
- // 标准化名称:去掉括号里的(新)、(旧)等干扰项
- $standardName = preg_replace('/((新|旧|修订版|实验版|.*制))/u', '', $name);
-
- // 包含匹配:如 "北师大版" 包含在 "北师大版(新)" 剥离后的 "北师大版" 中
- if (Str::contains($name, $hint) || Str::contains($hint, $name) ||
- Str::contains($hint, $standardName) || Str::contains($standardName, $hint)) {
- return $s;
- }
-
- // 模糊得分
- $score = $this->similarityScore($this->normalizeText($hint), $this->normalizeText($name));
- if ($score > $bestScore) {
- $bestScore = $score;
- $best = $s;
- }
- }
- return $bestScore >= 70 ? $best : null;
- }
- /**
- * 获取教材建议
- */
- public function getTextbookSuggestions(SourcePaper $paper, array $parsedImportFilename = []): array
- {
- $title = (string) ($paper->title ?? $paper->full_title ?? '');
- $context = Str::lower($title);
- $grade = $paper->grade ? (int) $paper->grade : ($parsedImportFilename['grade'] ?? null);
- $semester = $this->termToSemester($paper->term) ?? $this->termToSemester($parsedImportFilename['term'] ?? null);
- $seriesHint = $paper->textbook_series ?: ($parsedImportFilename['series'] ?? null);
- $subjectHint = $parsedImportFilename['subject'] ?? null;
- $suggestions = [];
- $textbooks = Textbook::query()->with('series')->get();
- foreach ($textbooks as $textbook) {
- $score = 0;
- if ($grade && (int) $textbook->grade === $grade) {
- $score += 3;
- }
- if ($semester && (int) $textbook->semester === $semester) {
- $score += 3;
- }
- $official = Str::lower((string) $textbook->official_title);
- if ($official !== '' && Str::contains($context, $official)) {
- $score += 4;
- }
- $aliases = $this->normalizeAliases($textbook->aliases ?? []);
- foreach ($aliases as $alias) {
- $alias = Str::lower((string) $alias);
- if ($alias !== '' && Str::contains($context, $alias)) {
- $score += 2;
- }
- }
- $seriesName = $textbook->series?->name ?? null;
- if ($seriesHint && $seriesName && (Str::contains((string) $seriesName, (string) $seriesHint) || Str::contains((string) $seriesHint, (string) $seriesName))) {
- $score += 5;
- }
- if ($subjectHint) {
- $subjectHint = Str::lower((string) $subjectHint);
- $official = Str::lower((string) $textbook->official_title);
- if ($official !== '' && Str::contains($official, $subjectHint)) {
- $score += 1;
- }
- }
- if ($score > 0) {
- $suggestions[] = [
- 'id' => $textbook->id,
- 'title' => $textbook->official_title,
- 'series' => $textbook->series?->name ?? '未归类系列',
- 'grade' => $textbook->grade,
- 'semester' => $textbook->semester,
- 'score' => $score,
- ];
- }
- }
- usort($suggestions, fn ($a, $b) => $b['score'] <=> $a['score']);
- return array_slice($suggestions, 0, 5);
- }
- /**
- * 将学期字符串归一化为 semester 数字 (1=上, 2=下)
- */
- public function termToSemester(?string $term): ?int
- {
- if (!$term) {
- return null;
- }
- if (Str::contains($term, '上')) {
- return 1;
- }
- if (Str::contains($term, '下')) {
- return 2;
- }
- return null;
- }
- private function normalizeText(string $text): string
- {
- $text = Str::lower($text);
- $text = preg_replace('/\\s+/u', '', $text);
- $text = preg_replace('/[,。.、,\\.\\-—_()\\(\\)【】\\[\\]::;;!!??]/u', '', $text);
- return $text ?? '';
- }
- private function extractChapterLabel(string $text): ?string
- {
- if (preg_match('/第\\s*[一二三四五六七八九十0-9]+\\s*[章节]/u', $text, $match)) {
- return trim($match[0]);
- }
- return null;
- }
- private function similarityScore(string $a, string $b): int
- {
- if ($a === '' || $b === '') {
- return 0;
- }
- similar_text($a, $b, $percent);
- return (int) round($percent);
- }
- private function normalizeAliases(array|string|null $aliases): array
- {
- if (is_array($aliases)) {
- return $aliases;
- }
- if (!is_string($aliases) || trim($aliases) === '') {
- return [];
- }
- $decoded = json_decode($aliases, true);
- if (is_array($decoded)) {
- return $decoded;
- }
- return array_values(array_filter(array_map('trim', preg_split('/[,,;;\\n]+/u', $aliases))));
- }
- }
|