ImportInferenceService.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. <?php
  2. namespace App\Services;
  3. use App\Models\MarkdownImport;
  4. use App\Models\SourcePaper;
  5. use App\Models\Textbook;
  6. use App\Models\TextbookCatalog;
  7. use Illuminate\Support\Arr;
  8. use Illuminate\Support\Str;
  9. class ImportInferenceService
  10. {
  11. /**
  12. * 推断学期
  13. */
  14. public function inferTerm(string $context): ?string
  15. {
  16. if (Str::contains($context, ['上册', '上学期'])) {
  17. return '上册';
  18. }
  19. if (Str::contains($context, ['下册', '下学期'])) {
  20. return '下册';
  21. }
  22. return null;
  23. }
  24. /**
  25. * 推断年级
  26. */
  27. public function inferGrade(string $context): ?string
  28. {
  29. foreach (['七年级' => '7', '八年级' => '8', '九年级' => '9', '高一' => '10', '高二' => '11', '高三' => '12'] as $label => $value) {
  30. if (Str::contains($context, $label)) {
  31. return $value;
  32. }
  33. }
  34. return null;
  35. }
  36. /**
  37. * 推断章节
  38. */
  39. public function inferChapter(string $context): ?string
  40. {
  41. if (preg_match('/第[一二三四五六七八九十]+章[^\\n]*/u', $context, $match)) {
  42. return $match[0];
  43. }
  44. return null;
  45. }
  46. /**
  47. * 匹配目录节点 ID
  48. */
  49. public function matchCatalogNodeId(string $context, ?int $textbookId): ?int
  50. {
  51. if (!$textbookId) {
  52. return null;
  53. }
  54. $needle = trim($context);
  55. if ($needle === '') {
  56. return null;
  57. }
  58. $nodes = TextbookCatalog::query()
  59. ->where('textbook_id', $textbookId)
  60. ->orderBy('depth')
  61. ->orderBy('sort_order')
  62. ->get(['id', 'title']);
  63. $chapterNeedle = $this->extractChapterLabel($needle);
  64. $normalizedNeedle = $this->normalizeText($chapterNeedle ?: $needle);
  65. $bestId = null;
  66. $bestScore = 0;
  67. foreach ($nodes as $node) {
  68. $title = (string) $node->title;
  69. if ($title === '') {
  70. continue;
  71. }
  72. $normalizedTitle = $this->normalizeText($title);
  73. if ($normalizedTitle === '') {
  74. continue;
  75. }
  76. if (Str::contains($normalizedNeedle, $normalizedTitle) || Str::contains($normalizedTitle, $normalizedNeedle)) {
  77. return (int) $node->id;
  78. }
  79. $score = 0;
  80. $chapterInTitle = $this->extractChapterLabel($title);
  81. if ($chapterNeedle && $chapterInTitle && $chapterNeedle === $chapterInTitle) {
  82. $score += 30;
  83. }
  84. $similarity = $this->similarityScore($normalizedNeedle, $normalizedTitle);
  85. $score += $similarity;
  86. if ($score > $bestScore) {
  87. $bestScore = $score;
  88. $bestId = (int) $node->id;
  89. }
  90. }
  91. return $bestScore >= 60 ? $bestId : null;
  92. }
  93. /**
  94. * 从文件名推断教材 (兼容旧接口)
  95. */
  96. public function resolveTextbookFromFilename(array $parsed): ?Textbook
  97. {
  98. return $this->findBestTextbook([
  99. 'grade' => $parsed['grade'] ?? null,
  100. 'term' => $parsed['term'] ?? null,
  101. 'series' => $parsed['series'] ?? null,
  102. 'subject' => $parsed['subject'] ?? null,
  103. ]);
  104. }
  105. /**
  106. * 根据一组属性推断最匹配的教材
  107. * 根据属性推断最匹配的教材
  108. */
  109. public function findBestTextbook(array $attributes): ?Textbook
  110. {
  111. // 核心参数获取
  112. $seriesId = $attributes['series_id'] ?? null;
  113. if (!$seriesId && !empty($attributes['series'])) {
  114. $formal = $this->resolveSeries($attributes['series']);
  115. $seriesId = $formal?->id;
  116. }
  117. if (!$seriesId) {
  118. return null;
  119. }
  120. $grade = (string)($attributes['grade'] ?? '');
  121. $term = (string)($attributes['term'] ?? '');
  122. $semester = $this->termToSemester($term);
  123. // 仅在当前系列下筛选
  124. $textbooks = Textbook::query()->where('series_id', $seriesId)->get();
  125. if ($textbooks->isEmpty()) {
  126. return null;
  127. }
  128. $best = null;
  129. $maxScore = -1;
  130. foreach ($textbooks as $tb) {
  131. $score = 20; // 基础系列分
  132. // 年级匹配
  133. if ($grade && (int)$tb->grade === (int)$grade) {
  134. $score += 5;
  135. }
  136. // 学期匹配
  137. if ($semester && (int)$tb->semester === $semester) {
  138. $score += 5;
  139. }
  140. if ($score > $maxScore) {
  141. $maxScore = $score;
  142. $best = $tb;
  143. }
  144. }
  145. return ($best && $maxScore >= 20) ? $best : null;
  146. }
  147. /**
  148. * 推断卷子类型
  149. */
  150. public function inferSourceType(string $context): ?string
  151. {
  152. $map = [
  153. '单元' => '单元测试',
  154. '月考' => '月考',
  155. '期中' => '期中考试',
  156. '期末' => '期末考试',
  157. '中考' => '中考套卷',
  158. '模拟' => '模拟考试',
  159. '考前' => '模拟考试',
  160. '真题' => '真题卷',
  161. '周测' => '周练/周测',
  162. '周练' => '周练/周测',
  163. '练' => '课时练习',
  164. ];
  165. foreach ($map as $key => $val) {
  166. if (Str::contains($context, $key)) {
  167. return $val;
  168. }
  169. }
  170. return '综合测试';
  171. }
  172. /**
  173. * 将解析出的系列俗称 (如 北师大版) 匹配到数据库正式系列模型 (如 北师大版(新))
  174. */
  175. public function resolveSeries(?string $hint): ?\App\Models\TextbookSeries
  176. {
  177. if (!$hint) {
  178. return null;
  179. }
  180. $hint = trim((string)$hint);
  181. $series = \App\Models\TextbookSeries::query()->get();
  182. $best = null;
  183. $bestScore = 0;
  184. foreach ($series as $s) {
  185. $name = (string)$s->name;
  186. // 标准化名称:去掉括号里的(新)、(旧)等干扰项
  187. $standardName = preg_replace('/((新|旧|修订版|实验版|.*制))/u', '', $name);
  188. // 包含匹配:如 "北师大版" 包含在 "北师大版(新)" 剥离后的 "北师大版" 中
  189. if (Str::contains($name, $hint) || Str::contains($hint, $name) ||
  190. Str::contains($hint, $standardName) || Str::contains($standardName, $hint)) {
  191. return $s;
  192. }
  193. // 模糊得分
  194. $score = $this->similarityScore($this->normalizeText($hint), $this->normalizeText($name));
  195. if ($score > $bestScore) {
  196. $bestScore = $score;
  197. $best = $s;
  198. }
  199. }
  200. return $bestScore >= 70 ? $best : null;
  201. }
  202. /**
  203. * 获取教材建议
  204. */
  205. public function getTextbookSuggestions(SourcePaper $paper, array $parsedImportFilename = []): array
  206. {
  207. $title = (string) ($paper->title ?? $paper->full_title ?? '');
  208. $context = Str::lower($title);
  209. $grade = $paper->grade ? (int) $paper->grade : ($parsedImportFilename['grade'] ?? null);
  210. $semester = $this->termToSemester($paper->term) ?? $this->termToSemester($parsedImportFilename['term'] ?? null);
  211. $seriesHint = $paper->textbook_series ?: ($parsedImportFilename['series'] ?? null);
  212. $subjectHint = $parsedImportFilename['subject'] ?? null;
  213. $suggestions = [];
  214. $textbooks = Textbook::query()->with('series')->get();
  215. foreach ($textbooks as $textbook) {
  216. $score = 0;
  217. if ($grade && (int) $textbook->grade === $grade) {
  218. $score += 3;
  219. }
  220. if ($semester && (int) $textbook->semester === $semester) {
  221. $score += 3;
  222. }
  223. $official = Str::lower((string) $textbook->official_title);
  224. if ($official !== '' && Str::contains($context, $official)) {
  225. $score += 4;
  226. }
  227. $aliases = $this->normalizeAliases($textbook->aliases ?? []);
  228. foreach ($aliases as $alias) {
  229. $alias = Str::lower((string) $alias);
  230. if ($alias !== '' && Str::contains($context, $alias)) {
  231. $score += 2;
  232. }
  233. }
  234. $seriesName = $textbook->series?->name ?? null;
  235. if ($seriesHint && $seriesName && (Str::contains((string) $seriesName, (string) $seriesHint) || Str::contains((string) $seriesHint, (string) $seriesName))) {
  236. $score += 5;
  237. }
  238. if ($subjectHint) {
  239. $subjectHint = Str::lower((string) $subjectHint);
  240. $official = Str::lower((string) $textbook->official_title);
  241. if ($official !== '' && Str::contains($official, $subjectHint)) {
  242. $score += 1;
  243. }
  244. }
  245. if ($score > 0) {
  246. $suggestions[] = [
  247. 'id' => $textbook->id,
  248. 'title' => $textbook->official_title,
  249. 'series' => $textbook->series?->name ?? '未归类系列',
  250. 'grade' => $textbook->grade,
  251. 'semester' => $textbook->semester,
  252. 'score' => $score,
  253. ];
  254. }
  255. }
  256. usort($suggestions, fn ($a, $b) => $b['score'] <=> $a['score']);
  257. return array_slice($suggestions, 0, 5);
  258. }
  259. /**
  260. * 将学期字符串归一化为 semester 数字 (1=上, 2=下)
  261. */
  262. public function termToSemester(?string $term): ?int
  263. {
  264. if (!$term) {
  265. return null;
  266. }
  267. if (Str::contains($term, '上')) {
  268. return 1;
  269. }
  270. if (Str::contains($term, '下')) {
  271. return 2;
  272. }
  273. return null;
  274. }
  275. private function normalizeText(string $text): string
  276. {
  277. $text = Str::lower($text);
  278. $text = preg_replace('/\\s+/u', '', $text);
  279. $text = preg_replace('/[,。.、,\\.\\-—_()\\(\\)【】\\[\\]::;;!!??]/u', '', $text);
  280. return $text ?? '';
  281. }
  282. private function extractChapterLabel(string $text): ?string
  283. {
  284. if (preg_match('/第\\s*[一二三四五六七八九十0-9]+\\s*[章节]/u', $text, $match)) {
  285. return trim($match[0]);
  286. }
  287. return null;
  288. }
  289. private function similarityScore(string $a, string $b): int
  290. {
  291. if ($a === '' || $b === '') {
  292. return 0;
  293. }
  294. similar_text($a, $b, $percent);
  295. return (int) round($percent);
  296. }
  297. private function normalizeAliases(array|string|null $aliases): array
  298. {
  299. if (is_array($aliases)) {
  300. return $aliases;
  301. }
  302. if (!is_string($aliases) || trim($aliases) === '') {
  303. return [];
  304. }
  305. $decoded = json_decode($aliases, true);
  306. if (is_array($decoded)) {
  307. return $decoded;
  308. }
  309. return array_values(array_filter(array_map('trim', preg_split('/[,,;;\\n]+/u', $aliases))));
  310. }
  311. }