方案: 将特殊字母数字通过unicode值将其映射至正常字母数字
<?php
class UnicodeUTF8Tool {
/**
* [utf8ToUnicode utf8 转换为 unicode]
*/
public static function utf8ToUnicode($raw_string) {
while (!empty($raw_string)) {
#字符串第一个字节的字节数
$first_character_byte_num = self::returnCharacterByteNum($raw_string);
#第一个字符
$character = substr($raw_string, 0, $first_character_byte_num);
#剩余的字符
$raw_string = substr($raw_string, $first_character_byte_num);
#第一个字符的unicode序号
$unicode = self::characterToUnicode($character);
#第一个字符的unicode 十六进制
$unicode_hex = dechex($unicode);
$unicode_hex = '\u'. self::hexAddZero($unicode_hex);
#第一个字符的信息
$info = compact('character', 'unicode', 'unicode_hex');
$res[] = $info;
}
return $res;
}
/**
* [processCharacter 特殊字符处理转正常]
*/
public static function processCharacter($raw_string) {
$res = '';
while (!empty($raw_string)) {
#字符串第一个字节的字节数
$first_character_byte_num = self::returnCharacterByteNum($raw_string);
#第一个字符
$character = substr($raw_string, 0, $first_character_byte_num);
#剩余的字符
$raw_string = substr($raw_string, $first_character_byte_num);
#第一个字符的unicode序号
$unicode = self::characterToUnicode($character);
if($unicode>= 120812 && $unicode <= 120821){//数字映射
$unicode = $unicode - 120764;
$character = self::unicodeToCharacter($unicode);
} elseif ($unicode>= 120276 && $unicode <= 120301) {//大写字母映射
$unicode = $unicode - 120211;
$character = self::unicodeToCharacter($unicode);
} elseif ($unicode>= 120302 && $unicode <= 120327) {//小写字母映射
$unicode = $unicode - 120205;
$character = self::unicodeToCharacter($unicode);
}
$res .= $character;
}
return $res;
}
/**
* [unicodeToCharacter unicode 转换为 utf8]
*/
public static function unicodeToCharacter($unicode_num) {
if($unicode_num <= 0x7F ){
return chr($unicode_num);
}
if($unicode_num <= 0x7FF){
return chr(($unicode_num >> 6) + 192).
chr(($unicode_num & 63) + 128);
}
if($unicode_num <= 0xFFFF){
return chr(($unicode_num >> 12) + 224).
chr((($unicode_num >> 6) & 63) + 128).
chr(($unicode_num & 63) + 128);
}
if($unicode_num <= 0x1FFFFF){
return chr(($unicode_num >> 18) + 240).
chr((($unicode_num >> 12) & 63) + 128).
chr((($unicode_num >> 6) & 63)+ 128).
chr(($unicode_num & 63) + 128);
}
return '';
}
/**
* [hexAddZero 十六进制补0]
*/
public static function hexAddZero($num){
if(strlen($num) == 1) return '000'. $num;
if(strlen($num) == 2) return '00'. $num;
if(strlen($num) == 3) return '0'. $num;
return $num;
}
/**
* [characterToUnicode 单个utf8 字符转换为 unicode]
*/
public static function characterToUnicode($character) {
#取表情的第一个字节
$character_first = ord($character[0]);
if ($character_first >=0 && $character_first <= 127){
return $character_first;
}
#取表情的第二个字节
$character_second = ord($character[1]);
if ($character_first >= 192 && $character_first <= 223){
return ($character_first - 192) * 64 + ($character_second - 128);
}
#取表情的第三个字节
$character_third = ord($character[2]);
if ($character_first >= 224 && $character_first <= 239){
return ($character_first-224)*4096 + ($character_second - 128)*64 + ($character_third - 128);
}
#取表情的第四个字节
$character_fourth = ord($character[3]);
if ($character_first >= 240 && $character_first <= 247) {
return ($character_first - 240) * 262144 + ($character_second - 128) * 4096 + ($character_third - 128) * 64 + ($character_fourth - 128);
}
return false;
}
/**
* [returnCharacterLength 返回字符的字节数]
*/
public static function returnCharacterByteNum($character) {
#根据第一个字节大小判断几个字节
$num = ord($character[0]);
if ($num >= 0 && $num <= 127){
return 1;
}
if ($num >= 192 && $num <= 223){
return 2;
}
if ($num >= 224 && $num <= 239){
return 3;
}
if ($num >= 240 && $num <= 247) {
return 4;
}
}
}
查询unicode值链接:https://codepoints.net/mathematical_alphanumeric_symbols
代码来源:https://github.com/xaozhuge/php_class/blob/master/UnicodeUTF8Model.class.php
|