中文字符串和Unicode互转

将GBK,UTF8字符串转化为Unicode编码的类

unicode.class.php

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
<?php
/**
* 提供GBK,UTF8转化为Unicode编码,
* Unicode转化为GBK,UTF8编码字符串类库
*
* @author wang chong(wangchong1985@gmail.com)
* @link http://www.wangchong.org
* @version 1.0.0 (2011-04-15)
* @package php-Unicode
*/
class Unicode
{
/**
* 自定义str_to_unicode后的连接符
* @var string
*/
public $glue = "";
/**
* 将字符串转换成unicode编码
*
* @param string $input
* @param string $input_charset
* @return string
*/
public function str_to_unicode($input, $input_charset = 'gbk')
{
$input = iconv($input_charset, "gbk", $input);
preg_match_all("/[\x80-\xff]?./", $input, $ar);
$b = array_map(array($this, 'utf8_unicode_'), $ar[0]);
$outstr = join($this->glue, $b);
return $outstr;
}
private function utf8_unicode_($c, $input_charset = 'gbk')
{
$c = iconv($input_charset, 'utf-8', $c);
return $this->utf8_unicode($c);
}
// utf8 -> unicode
private function utf8_unicode($c)
{
switch(strlen($c)) {
case 1:
//return $c;
$n = ord($c[0]);
break;
case 2:
$n = (ord($c[0]) & 0x3f) << 6;
$n += ord($c[1]) & 0x3f;
break;
case 3:
$n = (ord($c[0]) & 0x1f) << 12;
$n += (ord($c[1]) & 0x3f) << 6;
$n += ord($c[2]) & 0x3f;
break;
case 4:
$n = (ord($c[0]) & 0x0f) << 18;
$n += (ord($c[1]) & 0x3f) << 12;
$n += (ord($c[2]) & 0x3f) << 6;
$n += ord($c[3]) & 0x3f;
break;
}
return "\u".base_convert($n, 10, 16);
}
/**
* 将unicode字符转换成普通编码字符
*
* @param string $str
* @param string $out_charset
* @return string
*/
public function str_from_unicode($str, $out_charset = 'gbk')
{
$str = preg_replace_callback("|\\\u([0-9a-f]{1,4})|", array($this, 'unicode2utf8_'), $str);
$str = iconv("UTF-8", $out_charset, $str);
return $str;
}
private function unicode2utf8_($c)
{
return $this->unicode2utf8($c[1]);
}
private function unicode2utf8($c)
{
$c = base_convert($c, 16, 10);
$str="";
if ($c < 0x80) {
$str.=chr($c);
} else if ($c < 0x800) {
$str.=chr(0xC0 | $c>>6);
$str.=chr(0x80 | $c & 0x3F);
} else if ($c < 0x10000) {
$str.=chr(0xE0 | $c>>12);
$str.=chr(0x80 | $c>>6 & 0x3F);
$str.=chr(0x80 | $c & 0x3F);
} else if ($c < 0x200000) {
$str.=chr(0xF0 | $c>>18);
$str.=chr(0x80 | $c>>12 & 0x3F);
$str.=chr(0x80 | $c>>6 & 0x3F);
$str.=chr(0x80 | $c & 0x3F);
}
return $str;
}
}

unicode.example.php

1
2
3
4
5
6
7
8
9
10
11
12
13
<?php

include 'unicode.class.php';

$str = '这是一个测试Demo!~!wangchong1985@gmail.com';

$convert = new Unicode();

//convert to Unicode
var_dump($str = $convert->str_to_unicode($str, 'UTF-8'));

//convert to UTF-8
var_dump($str = $convert->str_from_unicode($str, 'UTF-8'))

转自:https://github.com/wangchll/PHP-Unicode-Convert