PHP 中文字符串截取方法汇总
2011年3月7日
1 条评论
通过指定编码进行字符串截取:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | /** * @todo 中文截取,支持gb2312,gbk,utf-8,big5 * * @param string $str 要截取的字串 * @param int $start 截取起始位置 * @param int $length 截取长度 * @param string $charset utf-8|gb2312|gbk|big5 编码 * @param $suffix 是否加尾缀 */ function CsubStrPro($str, $start = 0, $length, $charset = "utf-8", $suffix = true) { if (function_exists ( "mb_substr" )) return mb_substr ( $str, $start, $length, $charset ); $re ['utf-8'] = "/[\x01-\x7f]|[\xc2-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xff][\x80-\xbf]{3}/"; $re ['gb2312'] = "/[\x01-\x7f]|[\xb0-\xf7][\xa0-\xfe]/"; $re ['gbk'] = "/[\x01-\x7f]|[\x81-\xfe][\x40-\xfe]/"; $re ['big5'] = "/[\x01-\x7f]|[\x81-\xfe]([\x40-\x7e]|\xa1-\xfe])/"; preg_match_all ( $re [$charset], $str, $match ); $slice = join ( "", array_slice ( $match [0], $start, $length ) ); if ($suffix) return $slice . "…"; return $slice; } |
2:自动识别 GBK 和UTF-8 编码的字符串
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | function cutStr($sourcestr, $cutstart=0, $cutlength) { $returnstr = ''; $i = 0; $n = 0; $str_length = strlen ( $sourcestr ); //字符串的字节数 while ( ($n < $cutlength) and ($i <= $str_length) ) { $temp_str = substr ( $sourcestr, $i, 1 ); $ascnum = Ord ( $temp_str ); //得到字符串中第$i位字符的ascii码 if ($ascnum >= 224) //如果ASCII位高与224, { $returnstr = $returnstr . substr ( $sourcestr, $i, 3 ); //根据UTF-8编码规范,将3个连续的字符计为单个字符 $i = $i + 3; //实际Byte计为3 $n ++; //字串长度计1 } elseif ($ascnum >= 192) //如果ASCII位高与192, { $returnstr = $returnstr . substr ( $sourcestr, $i, 2 ); //根据UTF-8编码规范,将2个连续的字符计为单个字符 $i = $i + 2; //实际Byte计为2 $n ++; //字串长度计1 } elseif ($ascnum >= 65 && $ascnum <= 90) //如果是大写字母, { $returnstr = $returnstr . substr ( $sourcestr, $i, 1 ); $i = $i + 1; //实际的Byte数仍计1个 $n ++; //但考虑整体美观,大写字母计成一个高位字符 } else //其他情况下,包括小写字母和半角标点符号, { $returnstr = $returnstr . substr ( $sourcestr, $i, 1 ); $i = $i + 1; //实际的Byte数计1个 $n = $n + 0.5; //小写字母和半角标点等与半个高位字符宽... } } if ($str_length > $i) { $returnstr = $returnstr . "..."; //超过长度时在尾处加上省略号 } return $returnstr; } |
Recent Comments