KatexRenderer.php 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. <?php
  2. namespace App\Services;
  3. use Illuminate\Support\Facades\Log;
  4. use Symfony\Component\Process\Process;
  5. use Symfony\Component\Process\Exception\ProcessFailedException;
  6. /**
  7. * KaTeX 服务端渲染服务
  8. *
  9. * 使用 Node.js 的 KaTeX 库在服务端预渲染 LaTeX 公式
  10. * 避免依赖 Chrome headless 执行 JavaScript
  11. */
  12. class KatexRenderer
  13. {
  14. /**
  15. * Node.js 脚本路径
  16. */
  17. private string $scriptPath;
  18. /**
  19. * 是否启用缓存
  20. */
  21. private bool $cacheEnabled = true;
  22. /**
  23. * 缓存前缀
  24. */
  25. private const CACHE_PREFIX = 'katex_rendered_';
  26. /**
  27. * 缓存时间(秒)
  28. */
  29. private const CACHE_TTL = 86400; // 24小时
  30. public function __construct()
  31. {
  32. $this->scriptPath = base_path('scripts/katex-render.mjs');
  33. }
  34. /**
  35. * 渲染 HTML 中的所有 LaTeX 公式
  36. *
  37. * @param string $html 包含 LaTeX 公式的 HTML
  38. * @return string 渲染后的 HTML
  39. */
  40. public function renderHtml(string $html): string
  41. {
  42. // 检查是否包含需要渲染的公式
  43. if (!$this->containsLatex($html)) {
  44. Log::debug('KatexRenderer: HTML 不包含 LaTeX 公式,跳过渲染');
  45. return $html;
  46. }
  47. // 在渲染前修复公式中的实体与 cases 换行问题
  48. $html = $this->sanitizeLatexInHtml($html);
  49. // 尝试从缓存获取
  50. $cacheKey = $this->getCacheKey($html);
  51. if ($this->cacheEnabled && $cached = cache()->get($cacheKey)) {
  52. Log::debug('KatexRenderer: 从缓存获取渲染结果');
  53. return $cached;
  54. }
  55. // 调用 Node.js 脚本渲染
  56. $rendered = $this->callNodeScript($html);
  57. if (strpos($rendered, 'katex-error') !== false) {
  58. Log::warning('KatexRenderer: 发现未解析公式(katex-error)', [
  59. 'sample' => $this->extractKatexErrorSnippet($rendered),
  60. ]);
  61. }
  62. // 缓存结果
  63. if ($this->cacheEnabled && $rendered !== $html) {
  64. cache()->put($cacheKey, $rendered, self::CACHE_TTL);
  65. }
  66. return $rendered;
  67. }
  68. /**
  69. * 检查 HTML 是否包含 LaTeX 公式
  70. */
  71. private function containsLatex(string $html): bool
  72. {
  73. // 检查常见的 LaTeX 定界符
  74. return preg_match('/\$[^$]+\$|\$\$[\s\S]+?\$\$|\\\\\([\s\S]+?\\\\\)|\\\\\[[\s\S]+?\\\\\]/', $html) === 1;
  75. }
  76. /**
  77. * 调用 Node.js KaTeX 渲染脚本
  78. */
  79. private function callNodeScript(string $html): string
  80. {
  81. // 检查脚本是否存在
  82. if (!file_exists($this->scriptPath)) {
  83. Log::warning('KatexRenderer: 渲染脚本不存在', ['path' => $this->scriptPath]);
  84. return $html;
  85. }
  86. $configuredBinary = trim((string) config('math-render.katex.node_binary', 'node'));
  87. $candidates = array_values(array_unique(array_filter([
  88. $configuredBinary ?: 'node',
  89. 'node', // 兜底,避免误配时直接失败
  90. ])));
  91. $lastError = null;
  92. $lastExitCode = null;
  93. foreach ($candidates as $nodeBinary) {
  94. try {
  95. $process = new Process([$nodeBinary, $this->scriptPath]);
  96. $process->setInput($html);
  97. $process->setTimeout(30); // 30秒超时
  98. $process->run();
  99. if (!$process->isSuccessful()) {
  100. $lastExitCode = $process->getExitCode();
  101. $lastError = trim($process->getErrorOutput()) ?: trim($process->getOutput());
  102. Log::warning('KatexRenderer: Node.js 脚本执行失败,尝试下一个候选', [
  103. 'node_binary' => $nodeBinary,
  104. 'exit_code' => $lastExitCode,
  105. 'error' => $lastError,
  106. ]);
  107. continue;
  108. }
  109. $output = $process->getOutput();
  110. if (empty($output)) {
  111. Log::warning('KatexRenderer: Node.js 脚本输出为空', [
  112. 'node_binary' => $nodeBinary,
  113. ]);
  114. return $html;
  115. }
  116. Log::info('KatexRenderer: LaTeX 公式渲染成功', [
  117. 'node_binary' => $nodeBinary,
  118. 'input_length' => strlen($html),
  119. 'output_length' => strlen($output),
  120. ]);
  121. return $output;
  122. } catch (\Exception $e) {
  123. $lastError = $e->getMessage();
  124. Log::warning('KatexRenderer: 渲染异常,尝试下一个候选', [
  125. 'node_binary' => $nodeBinary,
  126. 'error' => $lastError,
  127. ]);
  128. }
  129. }
  130. Log::error('KatexRenderer: 所有Node候选均执行失败', [
  131. 'node_candidates' => $candidates,
  132. 'last_exit_code' => $lastExitCode,
  133. 'last_error' => $lastError,
  134. ]);
  135. return $html;
  136. }
  137. /**
  138. * 生成缓存键
  139. */
  140. private function getCacheKey(string $html): string
  141. {
  142. return self::CACHE_PREFIX . md5($html);
  143. }
  144. /**
  145. * 禁用缓存(用于调试)
  146. */
  147. public function disableCache(): self
  148. {
  149. $this->cacheEnabled = false;
  150. return $this;
  151. }
  152. /**
  153. * 启用缓存
  154. */
  155. public function enableCache(): self
  156. {
  157. $this->cacheEnabled = true;
  158. return $this;
  159. }
  160. /**
  161. * 清除所有 KaTeX 渲染缓存
  162. */
  163. public function clearCache(): void
  164. {
  165. // 注意:这个方法需要 Redis 或支持通配符删除的缓存驱动
  166. Log::info('KatexRenderer: 缓存清除请求(需要手动清理或使用 Redis)');
  167. }
  168. private function sanitizeLatexInHtml(string $html): string
  169. {
  170. $sanitize = function (string $tex): string {
  171. $decoded = html_entity_decode($tex, ENT_QUOTES, 'UTF-8');
  172. while ($decoded !== $tex) {
  173. $tex = $decoded;
  174. $decoded = html_entity_decode($tex, ENT_QUOTES, 'UTF-8');
  175. }
  176. // 清理公式内部的换行与 <br>,避免 \frac{M}\n{N} 破坏解析
  177. $tex = preg_replace('/<br\\s*\\/?>/i', '', $tex);
  178. $tex = preg_replace('/\\r\\n|\\r|\\n/', '', $tex);
  179. // 处理 KaTeX 不支持的操作符命令
  180. $tex = preg_replace('/\\\\Arg\\b/', '\\\\operatorname{Arg}', $tex);
  181. // 修复漏空格的 \quad/\qquad(如 \quadz、\quadx)
  182. $tex = preg_replace('/\\\\q(u)?ad(?=[A-Za-z0-9])/', '\\\\q$1ad ', $tex);
  183. return $this->fixCasesLineBreaks($tex);
  184. };
  185. // $$...$$
  186. $html = preg_replace_callback('/\$\$([\s\S]*?)\$\$/', function ($m) use ($sanitize) {
  187. return '$$' . $sanitize($m[1]) . '$$';
  188. }, $html);
  189. // $...$ (avoid $$)
  190. $html = preg_replace_callback('/(?<!\$)\$([^$\n]+?)\$(?!\$)/', function ($m) use ($sanitize) {
  191. return '$' . $sanitize($m[1]) . '$';
  192. }, $html);
  193. // \(...\)
  194. $html = preg_replace_callback('/\\\\\(([\s\S]*?)\\\\\)/', function ($m) use ($sanitize) {
  195. return '\\(' . $sanitize($m[1]) . '\\)';
  196. }, $html);
  197. // \[...\]
  198. $html = preg_replace_callback('/\\\\\[([\s\S]*?)\\\\\]/', function ($m) use ($sanitize) {
  199. return '\\[' . $sanitize($m[1]) . '\\]';
  200. }, $html);
  201. return $html;
  202. }
  203. private function fixCasesLineBreaks(string $tex): string
  204. {
  205. return preg_replace_callback('/\\\\begin\{cases\}([\s\S]*?)\\\\end\{cases\}/', function ($m) {
  206. $content = $m[1];
  207. // 将 cases 中被转成单反斜杠的换行恢复为双反斜杠(仅处理紧跟 +/- 的情况)
  208. $content = preg_replace('/(?<!\\\\)\\\\(?=[-+])/', '\\\\\\\\', $content);
  209. // 行首是变量/数字的情况(如 \\3x 或 \\a_1x 或 \\x=...)
  210. $content = preg_replace('/(?<!\\\\)\\\\(?=[0-9])/', '\\\\\\\\', $content);
  211. $content = preg_replace('/(?<!\\\\)\\\\(?=[A-Za-z](?:[_^0-9=<>]|\\s))/', '\\\\\\\\', $content);
  212. return '\\begin{cases}' . $content . '\\end{cases}';
  213. }, $tex);
  214. }
  215. private function extractKatexErrorSnippet(string $html): array
  216. {
  217. if (!preg_match('/<span class="katex-error"[^>]*>(.*?)<\/span>/is', $html, $match)) {
  218. return [];
  219. }
  220. $text = trim(strip_tags($match[1]));
  221. $text = preg_replace('/\s+/', ' ', $text);
  222. return [
  223. 'text' => mb_substr($text, 0, 200),
  224. ];
  225. }
  226. }