| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241 |
- <?php
- namespace App\Services;
- use Illuminate\Support\Facades\Log;
- use Symfony\Component\Process\Process;
- use Symfony\Component\Process\Exception\ProcessFailedException;
- /**
- * KaTeX 服务端渲染服务
- *
- * 使用 Node.js 的 KaTeX 库在服务端预渲染 LaTeX 公式
- * 避免依赖 Chrome headless 执行 JavaScript
- */
- class KatexRenderer
- {
- /**
- * Node.js 脚本路径
- */
- private string $scriptPath;
- /**
- * 是否启用缓存
- */
- private bool $cacheEnabled = true;
- /**
- * 缓存前缀
- */
- private const CACHE_PREFIX = 'katex_rendered_';
- /**
- * 缓存时间(秒)
- */
- private const CACHE_TTL = 86400; // 24小时
- public function __construct()
- {
- $this->scriptPath = base_path('scripts/katex-render.mjs');
- }
- /**
- * 渲染 HTML 中的所有 LaTeX 公式
- *
- * @param string $html 包含 LaTeX 公式的 HTML
- * @return string 渲染后的 HTML
- */
- public function renderHtml(string $html): string
- {
- // 检查是否包含需要渲染的公式
- if (!$this->containsLatex($html)) {
- Log::debug('KatexRenderer: HTML 不包含 LaTeX 公式,跳过渲染');
- return $html;
- }
- // 在渲染前修复公式中的实体与 cases 换行问题
- $html = $this->sanitizeLatexInHtml($html);
- // 尝试从缓存获取
- $cacheKey = $this->getCacheKey($html);
- if ($this->cacheEnabled && $cached = cache()->get($cacheKey)) {
- Log::debug('KatexRenderer: 从缓存获取渲染结果');
- return $cached;
- }
- // 调用 Node.js 脚本渲染
- $rendered = $this->callNodeScript($html);
- if (strpos($rendered, 'katex-error') !== false) {
- Log::warning('KatexRenderer: 发现未解析公式(katex-error)', [
- 'sample' => $this->extractKatexErrorSnippet($rendered),
- ]);
- }
- // 缓存结果
- if ($this->cacheEnabled && $rendered !== $html) {
- cache()->put($cacheKey, $rendered, self::CACHE_TTL);
- }
- return $rendered;
- }
- /**
- * 检查 HTML 是否包含 LaTeX 公式
- */
- private function containsLatex(string $html): bool
- {
- // 检查常见的 LaTeX 定界符
- return preg_match('/\$[^$]+\$|\$\$[\s\S]+?\$\$|\\\\\([\s\S]+?\\\\\)|\\\\\[[\s\S]+?\\\\\]/', $html) === 1;
- }
- /**
- * 调用 Node.js KaTeX 渲染脚本
- */
- private function callNodeScript(string $html): string
- {
- // 检查脚本是否存在
- if (!file_exists($this->scriptPath)) {
- Log::warning('KatexRenderer: 渲染脚本不存在', ['path' => $this->scriptPath]);
- return $html;
- }
- try {
- // 创建进程
- $process = new Process(['node', $this->scriptPath]);
- $process->setInput($html);
- $process->setTimeout(30); // 30秒超时
- // 执行
- $process->run();
- // 检查是否成功
- if (!$process->isSuccessful()) {
- Log::warning('KatexRenderer: Node.js 脚本执行失败', [
- 'exit_code' => $process->getExitCode(),
- 'error' => $process->getErrorOutput(),
- ]);
- return $html;
- }
- $output = $process->getOutput();
- // 验证输出
- if (empty($output)) {
- Log::warning('KatexRenderer: Node.js 脚本输出为空');
- return $html;
- }
- Log::info('KatexRenderer: LaTeX 公式渲染成功', [
- 'input_length' => strlen($html),
- 'output_length' => strlen($output),
- ]);
- return $output;
- } catch (\Exception $e) {
- Log::error('KatexRenderer: 渲染异常', [
- 'error' => $e->getMessage(),
- ]);
- return $html;
- }
- }
- /**
- * 生成缓存键
- */
- private function getCacheKey(string $html): string
- {
- return self::CACHE_PREFIX . md5($html);
- }
- /**
- * 禁用缓存(用于调试)
- */
- public function disableCache(): self
- {
- $this->cacheEnabled = false;
- return $this;
- }
- /**
- * 启用缓存
- */
- public function enableCache(): self
- {
- $this->cacheEnabled = true;
- return $this;
- }
- /**
- * 清除所有 KaTeX 渲染缓存
- */
- public function clearCache(): void
- {
- // 注意:这个方法需要 Redis 或支持通配符删除的缓存驱动
- Log::info('KatexRenderer: 缓存清除请求(需要手动清理或使用 Redis)');
- }
- private function sanitizeLatexInHtml(string $html): string
- {
- $sanitize = function (string $tex): string {
- $decoded = html_entity_decode($tex, ENT_QUOTES, 'UTF-8');
- while ($decoded !== $tex) {
- $tex = $decoded;
- $decoded = html_entity_decode($tex, ENT_QUOTES, 'UTF-8');
- }
- // 清理公式内部的换行与 <br>,避免 \frac{M}\n{N} 破坏解析
- $tex = preg_replace('/<br\\s*\\/?>/i', '', $tex);
- $tex = preg_replace('/\\r\\n|\\r|\\n/', '', $tex);
- return $this->fixCasesLineBreaks($tex);
- };
- // $$...$$
- $html = preg_replace_callback('/\$\$([\s\S]*?)\$\$/', function ($m) use ($sanitize) {
- return '$$' . $sanitize($m[1]) . '$$';
- }, $html);
- // $...$ (avoid $$)
- $html = preg_replace_callback('/(?<!\$)\$([^$\n]+?)\$(?!\$)/', function ($m) use ($sanitize) {
- return '$' . $sanitize($m[1]) . '$';
- }, $html);
- // \(...\)
- $html = preg_replace_callback('/\\\\\(([\s\S]*?)\\\\\)/', function ($m) use ($sanitize) {
- return '\\(' . $sanitize($m[1]) . '\\)';
- }, $html);
- // \[...\]
- $html = preg_replace_callback('/\\\\\[([\s\S]*?)\\\\\]/', function ($m) use ($sanitize) {
- return '\\[' . $sanitize($m[1]) . '\\]';
- }, $html);
- return $html;
- }
- private function fixCasesLineBreaks(string $tex): string
- {
- return preg_replace_callback('/\\\\begin\{cases\}([\s\S]*?)\\\\end\{cases\}/', function ($m) {
- $content = $m[1];
- // 将 cases 中被转成单反斜杠的换行恢复为双反斜杠(仅处理紧跟 +/- 的情况)
- $content = preg_replace('/(?<!\\\\)\\\\(?=[-+])/', '\\\\\\\\', $content);
- return '\\begin{cases}' . $content . '\\end{cases}';
- }, $tex);
- }
- private function extractKatexErrorSnippet(string $html): array
- {
- if (!preg_match('/<span class="katex-error"[^>]*>(.*?)<\/span>/is', $html, $match)) {
- return [];
- }
- $text = trim(strip_tags($match[1]));
- $text = preg_replace('/\s+/', ' ', $text);
- return [
- 'text' => mb_substr($text, 0, 200),
- ];
- }
- }
|