| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556 |
- <?php
- namespace App\Support;
- class TextEncoding
- {
- /**
- * Normalize unknown-encoding text to valid UTF-8 for safe JSON/JS rendering.
- */
- public static function toUtf8(string $content): string
- {
- // Remove null bytes that can break downstream processing.
- $content = str_replace("\0", '', $content);
- if ($content === '') {
- return $content;
- }
- if (function_exists('mb_check_encoding') && mb_check_encoding($content, 'UTF-8')) {
- return $content;
- }
- $candidates = [
- 'UTF-8',
- 'GB18030',
- 'GBK',
- 'GB2312',
- 'BIG5',
- 'ISO-8859-1',
- 'Windows-1252',
- ];
- if (function_exists('mb_detect_encoding') && function_exists('mb_convert_encoding')) {
- $detected = mb_detect_encoding($content, $candidates, true);
- if (is_string($detected) && $detected !== '' && strtoupper($detected) !== 'UTF-8') {
- $converted = @mb_convert_encoding($content, 'UTF-8', $detected);
- if (is_string($converted) && $converted !== '' && mb_check_encoding($converted, 'UTF-8')) {
- return $converted;
- }
- }
- }
- // Fallback: best-effort iconv conversion (drops invalid bytes).
- foreach (['GB18030', 'GBK', 'GB2312', 'BIG5', 'ISO-8859-1', 'Windows-1252'] as $from) {
- $converted = @iconv($from, 'UTF-8//IGNORE', $content);
- if (is_string($converted) && $converted !== '' && function_exists('mb_check_encoding') && mb_check_encoding($converted, 'UTF-8')) {
- return $converted;
- }
- }
- // Last resort: strip invalid sequences.
- $converted = @iconv('UTF-8', 'UTF-8//IGNORE', $content);
- return is_string($converted) ? $converted : '';
- }
- }
|