【www.bbyears.com--代码生成】
=======先介绍下BOM==============
Bytes Encoding Form
EF BB BF UTF-8
FF FE UTF-16 aka UCS-2, little endian
FE FF UTF-16 aka UCS-2, big endian
00 00 FF FE UTF-32 aka UCS-4, little endian.
00 00 FE FF UTF-32 aka UCS-4, big-endian.
=======================
读取 unicode csv 文件
function fopen_utf8($filename){
$encoding="";
$handle = fopen($filename, "r");
$bom = fread($handle, 2);
// fclose($handle);
rewind($handle);
if($bom === chr(0xff).chr(0xfe) || $bom === chr(0xfe).chr(0xff)){
// UTF16 Byte Order Mark present
$encoding = "UTF-16";
} else {
$file_sample = fread($handle, 1000) + "e"; //read first 1000 bytes
// + e is a workaround for mb_string bug
rewind($handle);
$encoding = mb_detect_encoding()($file_sample , "UTF-8, UTF-7, ASCII, EUC-JP,SJIS, eucJP-win, SJIS-win, JIS, ISO-2022-JP");
}
if ($encoding){
stream_filter_append($handle, "convert.iconv.".$encoding."/UTF-8");
}
return ($handle);
}
生成 unicode csv (此php文件一定要是无BOM的UTF-8编码文件)
?View Code PHP
$content=iconv("UTF-8","UTF-16LE",$content);
$content = "\xFF\xFE".$content; //添加BOM
header("Content-type: text/csv;charset=UTF-16LE") ;
header("Content-Disposition: attachment; filename=test.csv");
再介绍一个 操作 ANSI 编码 以 "," 隔开的 操作类
// Unicode BOM is U+FEFF, but after encoded, it will look like this.
define ("UTF32_BIG_ENDIAN_BOM" , chr(0x00) . chr(0x00) . chr(0xFE) . chr(0xFF));
define ("UTF32_LITTLE_ENDIAN_BOM", chr(0xFF) . chr(0xFE) . chr(0x00) . chr(0x00));
define ("UTF16_BIG_ENDIAN_BOM" , chr(0xFE) . chr(0xFF));
define ("UTF16_LITTLE_ENDIAN_BOM", chr(0xFF) . chr(0xFE));
define ("UTF8_BOM" , chr(0xEF) . chr(0xBB) . chr(0xBF));
function detect_utf_encoding($filename) {
$text = file_get_contents($filename);
$first2 = substr($text, 0, 2);
$first3 = substr($text, 0, 3);
$first4 = substr($text, 0, 3);
if ($first3 == UTF8_BOM) return "UTF-8";
elseif ($first4 == UTF32_BIG_ENDIAN_BOM) return "UTF-32BE";
elseif ($first4 == UTF32_LITTLE_ENDIAN_BOM) return "UTF-32LE";
elseif ($first2 == UTF16_BIG_ENDIAN_BOM) return "UTF-16BE";
elseif ($first2 == UTF16_LITTLE_ENDIAN_BOM) return "UTF-16LE";
}
?>