KatexRenderer.php 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. <?php
  2. namespace App\Services;
  3. use Illuminate\Support\Facades\Log;
  4. use Symfony\Component\Process\Process;
  5. use Symfony\Component\Process\Exception\ProcessFailedException;
  6. /**
  7. * KaTeX 服务端渲染服务
  8. *
  9. * 使用 Node.js 的 KaTeX 库在服务端预渲染 LaTeX 公式
  10. * 避免依赖 Chrome headless 执行 JavaScript
  11. */
  12. class KatexRenderer
  13. {
  14. /**
  15. * Node.js 脚本路径
  16. */
  17. private string $scriptPath;
  18. /**
  19. * 是否启用缓存
  20. */
  21. private bool $cacheEnabled = true;
  22. /**
  23. * 缓存前缀
  24. */
  25. private const CACHE_PREFIX = 'katex_rendered_';
  26. /**
  27. * 缓存时间(秒)
  28. */
  29. private const CACHE_TTL = 86400; // 24小时
  30. public function __construct()
  31. {
  32. $this->scriptPath = base_path('scripts/katex-render.mjs');
  33. }
  34. /**
  35. * 渲染 HTML 中的所有 LaTeX 公式
  36. *
  37. * @param string $html 包含 LaTeX 公式的 HTML
  38. * @return string 渲染后的 HTML
  39. */
  40. public function renderHtml(string $html): string
  41. {
  42. // 检查是否包含需要渲染的公式
  43. if (!$this->containsLatex($html)) {
  44. Log::debug('KatexRenderer: HTML 不包含 LaTeX 公式,跳过渲染');
  45. return $html;
  46. }
  47. // 在渲染前修复公式中的实体与 cases 换行问题
  48. $html = $this->sanitizeLatexInHtml($html);
  49. // 尝试从缓存获取(缓存异常不应阻断公式渲染主流程)
  50. $cacheKey = $this->getCacheKey($html);
  51. if ($this->cacheEnabled) {
  52. try {
  53. $cached = cache()->get($cacheKey);
  54. if (!empty($cached) && is_string($cached)) {
  55. Log::debug('KatexRenderer: 从缓存获取渲染结果');
  56. return $cached;
  57. }
  58. } catch (\Throwable $e) {
  59. Log::warning('KatexRenderer: 读取缓存失败,继续执行实时渲染', [
  60. 'error' => $e->getMessage(),
  61. ]);
  62. }
  63. }
  64. // 调用 Node.js 脚本渲染
  65. $rendered = $this->callNodeScript($html);
  66. if (strpos($rendered, 'katex-error') !== false) {
  67. Log::warning('KatexRenderer: 发现未解析公式(katex-error)', [
  68. 'sample' => $this->extractKatexErrorSnippet($rendered),
  69. ]);
  70. }
  71. // 缓存结果(缓存异常不影响主流程)
  72. if ($this->cacheEnabled && $rendered !== $html) {
  73. try {
  74. cache()->put($cacheKey, $rendered, self::CACHE_TTL);
  75. } catch (\Throwable $e) {
  76. Log::warning('KatexRenderer: 写入缓存失败,已忽略', [
  77. 'error' => $e->getMessage(),
  78. ]);
  79. }
  80. }
  81. return $rendered;
  82. }
  83. /**
  84. * 检查 HTML 是否包含 LaTeX 公式
  85. */
  86. private function containsLatex(string $html): bool
  87. {
  88. // 检查常见的 LaTeX 定界符
  89. return preg_match('/\$[^$]+\$|\$\$[\s\S]+?\$\$|\\\\\([\s\S]+?\\\\\)|\\\\\[[\s\S]+?\\\\\]/', $html) === 1;
  90. }
  91. /**
  92. * 调用 Node.js KaTeX 渲染脚本
  93. */
  94. private function callNodeScript(string $html): string
  95. {
  96. // 检查脚本是否存在
  97. if (!file_exists($this->scriptPath)) {
  98. Log::warning('KatexRenderer: 渲染脚本不存在', ['path' => $this->scriptPath]);
  99. return $html;
  100. }
  101. $configuredBinary = trim((string) config('math-render.katex.node_binary', 'node'));
  102. $candidates = array_values(array_unique(array_filter([
  103. $configuredBinary ?: 'node',
  104. 'node', // 兜底,避免误配时直接失败
  105. ])));
  106. $lastError = null;
  107. $lastExitCode = null;
  108. foreach ($candidates as $nodeBinary) {
  109. try {
  110. $process = new Process([$nodeBinary, $this->scriptPath]);
  111. $process->setInput($html);
  112. $process->setTimeout(30); // 30秒超时
  113. $process->run();
  114. if (!$process->isSuccessful()) {
  115. $lastExitCode = $process->getExitCode();
  116. $lastError = trim($process->getErrorOutput()) ?: trim($process->getOutput());
  117. Log::warning('KatexRenderer: Node.js 脚本执行失败,尝试下一个候选', [
  118. 'node_binary' => $nodeBinary,
  119. 'exit_code' => $lastExitCode,
  120. 'error' => $lastError,
  121. ]);
  122. continue;
  123. }
  124. $output = $process->getOutput();
  125. if (empty($output)) {
  126. Log::warning('KatexRenderer: Node.js 脚本输出为空', [
  127. 'node_binary' => $nodeBinary,
  128. ]);
  129. return $html;
  130. }
  131. Log::info('KatexRenderer: LaTeX 公式渲染成功', [
  132. 'node_binary' => $nodeBinary,
  133. 'input_length' => strlen($html),
  134. 'output_length' => strlen($output),
  135. ]);
  136. return $output;
  137. } catch (\Exception $e) {
  138. $lastError = $e->getMessage();
  139. Log::warning('KatexRenderer: 渲染异常,尝试下一个候选', [
  140. 'node_binary' => $nodeBinary,
  141. 'error' => $lastError,
  142. ]);
  143. }
  144. }
  145. Log::error('KatexRenderer: 所有Node候选均执行失败', [
  146. 'node_candidates' => $candidates,
  147. 'last_exit_code' => $lastExitCode,
  148. 'last_error' => $lastError,
  149. ]);
  150. return $html;
  151. }
  152. /**
  153. * 生成缓存键
  154. */
  155. private function getCacheKey(string $html): string
  156. {
  157. return self::CACHE_PREFIX . md5($html);
  158. }
  159. /**
  160. * 禁用缓存(用于调试)
  161. */
  162. public function disableCache(): self
  163. {
  164. $this->cacheEnabled = false;
  165. return $this;
  166. }
  167. /**
  168. * 启用缓存
  169. */
  170. public function enableCache(): self
  171. {
  172. $this->cacheEnabled = true;
  173. return $this;
  174. }
  175. /**
  176. * 清除所有 KaTeX 渲染缓存
  177. */
  178. public function clearCache(): void
  179. {
  180. // 注意:这个方法需要 Redis 或支持通配符删除的缓存驱动
  181. Log::info('KatexRenderer: 缓存清除请求(需要手动清理或使用 Redis)');
  182. }
  183. private function sanitizeLatexInHtml(string $html): string
  184. {
  185. $sanitize = function (string $tex): string {
  186. $decoded = html_entity_decode($tex, ENT_QUOTES, 'UTF-8');
  187. while ($decoded !== $tex) {
  188. $tex = $decoded;
  189. $decoded = html_entity_decode($tex, ENT_QUOTES, 'UTF-8');
  190. }
  191. // 清理公式内部的换行与 <br>,避免 \frac{M}\n{N} 破坏解析
  192. $tex = preg_replace('/<br\\s*\\/?>/i', '', $tex);
  193. $tex = preg_replace('/\\r\\n|\\r|\\n/', '', $tex);
  194. // 处理 KaTeX 不支持的操作符命令
  195. $tex = preg_replace('/\\\\Arg\\b/', '\\\\operatorname{Arg}', $tex);
  196. // 修复漏空格的 \quad/\qquad(如 \quadz、\quadx)
  197. $tex = preg_replace('/\\\\q(u)?ad(?=[A-Za-z0-9])/', '\\\\q$1ad ', $tex);
  198. return $this->fixCasesLineBreaks($tex);
  199. };
  200. // $$...$$
  201. $html = preg_replace_callback('/\$\$([\s\S]*?)\$\$/', function ($m) use ($sanitize) {
  202. return '$$' . $sanitize($m[1]) . '$$';
  203. }, $html);
  204. // $...$ (avoid $$)
  205. $html = preg_replace_callback('/(?<!\$)\$([^$\n]+?)\$(?!\$)/', function ($m) use ($sanitize) {
  206. return '$' . $sanitize($m[1]) . '$';
  207. }, $html);
  208. // \(...\)
  209. $html = preg_replace_callback('/\\\\\(([\s\S]*?)\\\\\)/', function ($m) use ($sanitize) {
  210. return '\\(' . $sanitize($m[1]) . '\\)';
  211. }, $html);
  212. // \[...\]
  213. $html = preg_replace_callback('/\\\\\[([\s\S]*?)\\\\\]/', function ($m) use ($sanitize) {
  214. return '\\[' . $sanitize($m[1]) . '\\]';
  215. }, $html);
  216. return $html;
  217. }
  218. private function fixCasesLineBreaks(string $tex): string
  219. {
  220. return preg_replace_callback('/\\\\begin\{cases\}([\s\S]*?)\\\\end\{cases\}/', function ($m) {
  221. $content = $m[1];
  222. // 将 cases 中被转成单反斜杠的换行恢复为双反斜杠(仅处理紧跟 +/- 的情况)
  223. $content = preg_replace('/(?<!\\\\)\\\\(?=[-+])/', '\\\\\\\\', $content);
  224. // 行首是变量/数字的情况(如 \\3x 或 \\a_1x 或 \\x=...)
  225. $content = preg_replace('/(?<!\\\\)\\\\(?=[0-9])/', '\\\\\\\\', $content);
  226. $content = preg_replace('/(?<!\\\\)\\\\(?=[A-Za-z](?:[_^0-9=<>]|\\s))/', '\\\\\\\\', $content);
  227. return '\\begin{cases}' . $content . '\\end{cases}';
  228. }, $tex);
  229. }
  230. private function extractKatexErrorSnippet(string $html): array
  231. {
  232. if (!preg_match('/<span class="katex-error"[^>]*>(.*?)<\/span>/is', $html, $match)) {
  233. return [];
  234. }
  235. $text = trim(strip_tags($match[1]));
  236. $text = preg_replace('/\s+/', ' ', $text);
  237. return [
  238. 'text' => mb_substr($text, 0, 200),
  239. ];
  240. }
  241. }