KatexRenderer.php 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. <?php
  2. namespace App\Services;
  3. use Illuminate\Support\Facades\Log;
  4. use Symfony\Component\Process\Process;
  5. use Symfony\Component\Process\Exception\ProcessFailedException;
  6. /**
  7. * KaTeX 服务端渲染服务
  8. *
  9. * 使用 Node.js 的 KaTeX 库在服务端预渲染 LaTeX 公式
  10. * 避免依赖 Chrome headless 执行 JavaScript
  11. */
  12. class KatexRenderer
  13. {
  14. /**
  15. * Node.js 脚本路径
  16. */
  17. private string $scriptPath;
  18. /**
  19. * 是否启用缓存
  20. */
  21. private bool $cacheEnabled = true;
  22. /**
  23. * 缓存前缀
  24. */
  25. private const CACHE_PREFIX = 'katex_rendered_';
  26. /**
  27. * 缓存时间(秒)
  28. */
  29. private const CACHE_TTL = 86400; // 24小时
  30. public function __construct()
  31. {
  32. $this->scriptPath = base_path('scripts/katex-render.mjs');
  33. }
  34. /**
  35. * 渲染 HTML 中的所有 LaTeX 公式
  36. *
  37. * @param string $html 包含 LaTeX 公式的 HTML
  38. * @return string 渲染后的 HTML
  39. */
  40. public function renderHtml(string $html): string
  41. {
  42. // 检查是否包含需要渲染的公式
  43. if (!$this->containsLatex($html)) {
  44. Log::debug('KatexRenderer: HTML 不包含 LaTeX 公式,跳过渲染');
  45. return $html;
  46. }
  47. // 在渲染前修复公式中的实体与 cases 换行问题
  48. $html = $this->sanitizeLatexInHtml($html);
  49. // 尝试从缓存获取
  50. $cacheKey = $this->getCacheKey($html);
  51. if ($this->cacheEnabled && $cached = cache()->get($cacheKey)) {
  52. Log::debug('KatexRenderer: 从缓存获取渲染结果');
  53. return $cached;
  54. }
  55. // 调用 Node.js 脚本渲染
  56. $rendered = $this->callNodeScript($html);
  57. if (strpos($rendered, 'katex-error') !== false) {
  58. Log::warning('KatexRenderer: 发现未解析公式(katex-error)', [
  59. 'sample' => $this->extractKatexErrorSnippet($rendered),
  60. ]);
  61. }
  62. // 缓存结果
  63. if ($this->cacheEnabled && $rendered !== $html) {
  64. cache()->put($cacheKey, $rendered, self::CACHE_TTL);
  65. }
  66. return $rendered;
  67. }
  68. /**
  69. * 检查 HTML 是否包含 LaTeX 公式
  70. */
  71. private function containsLatex(string $html): bool
  72. {
  73. // 检查常见的 LaTeX 定界符
  74. return preg_match('/\$[^$]+\$|\$\$[\s\S]+?\$\$|\\\\\([\s\S]+?\\\\\)|\\\\\[[\s\S]+?\\\\\]/', $html) === 1;
  75. }
  76. /**
  77. * 调用 Node.js KaTeX 渲染脚本
  78. */
  79. private function callNodeScript(string $html): string
  80. {
  81. // 检查脚本是否存在
  82. if (!file_exists($this->scriptPath)) {
  83. Log::warning('KatexRenderer: 渲染脚本不存在', ['path' => $this->scriptPath]);
  84. return $html;
  85. }
  86. try {
  87. // 创建进程
  88. $process = new Process(['node', $this->scriptPath]);
  89. $process->setInput($html);
  90. $process->setTimeout(30); // 30秒超时
  91. // 执行
  92. $process->run();
  93. // 检查是否成功
  94. if (!$process->isSuccessful()) {
  95. Log::warning('KatexRenderer: Node.js 脚本执行失败', [
  96. 'exit_code' => $process->getExitCode(),
  97. 'error' => $process->getErrorOutput(),
  98. ]);
  99. return $html;
  100. }
  101. $output = $process->getOutput();
  102. // 验证输出
  103. if (empty($output)) {
  104. Log::warning('KatexRenderer: Node.js 脚本输出为空');
  105. return $html;
  106. }
  107. Log::info('KatexRenderer: LaTeX 公式渲染成功', [
  108. 'input_length' => strlen($html),
  109. 'output_length' => strlen($output),
  110. ]);
  111. return $output;
  112. } catch (\Exception $e) {
  113. Log::error('KatexRenderer: 渲染异常', [
  114. 'error' => $e->getMessage(),
  115. ]);
  116. return $html;
  117. }
  118. }
  119. /**
  120. * 生成缓存键
  121. */
  122. private function getCacheKey(string $html): string
  123. {
  124. return self::CACHE_PREFIX . md5($html);
  125. }
  126. /**
  127. * 禁用缓存(用于调试)
  128. */
  129. public function disableCache(): self
  130. {
  131. $this->cacheEnabled = false;
  132. return $this;
  133. }
  134. /**
  135. * 启用缓存
  136. */
  137. public function enableCache(): self
  138. {
  139. $this->cacheEnabled = true;
  140. return $this;
  141. }
  142. /**
  143. * 清除所有 KaTeX 渲染缓存
  144. */
  145. public function clearCache(): void
  146. {
  147. // 注意:这个方法需要 Redis 或支持通配符删除的缓存驱动
  148. Log::info('KatexRenderer: 缓存清除请求(需要手动清理或使用 Redis)');
  149. }
  150. private function sanitizeLatexInHtml(string $html): string
  151. {
  152. $sanitize = function (string $tex): string {
  153. $decoded = html_entity_decode($tex, ENT_QUOTES, 'UTF-8');
  154. while ($decoded !== $tex) {
  155. $tex = $decoded;
  156. $decoded = html_entity_decode($tex, ENT_QUOTES, 'UTF-8');
  157. }
  158. // 清理公式内部的换行与 <br>,避免 \frac{M}\n{N} 破坏解析
  159. $tex = preg_replace('/<br\\s*\\/?>/i', '', $tex);
  160. $tex = preg_replace('/\\r\\n|\\r|\\n/', '', $tex);
  161. return $this->fixCasesLineBreaks($tex);
  162. };
  163. // $$...$$
  164. $html = preg_replace_callback('/\$\$([\s\S]*?)\$\$/', function ($m) use ($sanitize) {
  165. return '$$' . $sanitize($m[1]) . '$$';
  166. }, $html);
  167. // $...$ (avoid $$)
  168. $html = preg_replace_callback('/(?<!\$)\$([^$\n]+?)\$(?!\$)/', function ($m) use ($sanitize) {
  169. return '$' . $sanitize($m[1]) . '$';
  170. }, $html);
  171. // \(...\)
  172. $html = preg_replace_callback('/\\\\\(([\s\S]*?)\\\\\)/', function ($m) use ($sanitize) {
  173. return '\\(' . $sanitize($m[1]) . '\\)';
  174. }, $html);
  175. // \[...\]
  176. $html = preg_replace_callback('/\\\\\[([\s\S]*?)\\\\\]/', function ($m) use ($sanitize) {
  177. return '\\[' . $sanitize($m[1]) . '\\]';
  178. }, $html);
  179. return $html;
  180. }
  181. private function fixCasesLineBreaks(string $tex): string
  182. {
  183. return preg_replace_callback('/\\\\begin\{cases\}([\s\S]*?)\\\\end\{cases\}/', function ($m) {
  184. $content = $m[1];
  185. // 将 cases 中被转成单反斜杠的换行恢复为双反斜杠(仅处理紧跟 +/- 的情况)
  186. $content = preg_replace('/(?<!\\\\)\\\\(?=[-+])/', '\\\\\\\\', $content);
  187. return '\\begin{cases}' . $content . '\\end{cases}';
  188. }, $tex);
  189. }
  190. private function extractKatexErrorSnippet(string $html): array
  191. {
  192. if (!preg_match('/<span class="katex-error"[^>]*>(.*?)<\/span>/is', $html, $match)) {
  193. return [];
  194. }
  195. $text = trim(strip_tags($match[1]));
  196. $text = preg_replace('/\s+/', ' ', $text);
  197. return [
  198. 'text' => mb_substr($text, 0, 200),
  199. ];
  200. }
  201. }