1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
| <?php
class Unicode {
public $glue = "";
public function str_to_unicode($input, $input_charset = 'gbk') { $input = iconv($input_charset, "gbk", $input); preg_match_all("/[\x80-\xff]?./", $input, $ar); $b = array_map(array($this, 'utf8_unicode_'), $ar[0]); $outstr = join($this->glue, $b); return $outstr; } private function utf8_unicode_($c, $input_charset = 'gbk') { $c = iconv($input_charset, 'utf-8', $c); return $this->utf8_unicode($c); } private function utf8_unicode($c) { switch(strlen($c)) { case 1: $n = ord($c[0]); break; case 2: $n = (ord($c[0]) & 0x3f) << 6; $n += ord($c[1]) & 0x3f; break; case 3: $n = (ord($c[0]) & 0x1f) << 12; $n += (ord($c[1]) & 0x3f) << 6; $n += ord($c[2]) & 0x3f; break; case 4: $n = (ord($c[0]) & 0x0f) << 18; $n += (ord($c[1]) & 0x3f) << 12; $n += (ord($c[2]) & 0x3f) << 6; $n += ord($c[3]) & 0x3f; break; } return "\u".base_convert($n, 10, 16); }
public function str_from_unicode($str, $out_charset = 'gbk') { $str = preg_replace_callback("|\\\u([0-9a-f]{1,4})|", array($this, 'unicode2utf8_'), $str); $str = iconv("UTF-8", $out_charset, $str); return $str; } private function unicode2utf8_($c) { return $this->unicode2utf8($c[1]); } private function unicode2utf8($c) { $c = base_convert($c, 16, 10); $str=""; if ($c < 0x80) { $str.=chr($c); } else if ($c < 0x800) { $str.=chr(0xC0 | $c>>6); $str.=chr(0x80 | $c & 0x3F); } else if ($c < 0x10000) { $str.=chr(0xE0 | $c>>12); $str.=chr(0x80 | $c>>6 & 0x3F); $str.=chr(0x80 | $c & 0x3F); } else if ($c < 0x200000) { $str.=chr(0xF0 | $c>>18); $str.=chr(0x80 | $c>>12 & 0x3F); $str.=chr(0x80 | $c>>6 & 0x3F); $str.=chr(0x80 | $c & 0x3F); } return $str; } }
|