rlm@3: | rlm@3: // | Allan Hansen | rlm@3: // +----------------------------------------------------------------------+ rlm@3: // | module.lib.iconv_replacement.php | rlm@3: // | getID3() library file. | rlm@3: // | dependencies: NONE, required by getid3.php if no iconv() present. | rlm@3: // +----------------------------------------------------------------------+ rlm@3: // rlm@3: // $Id: module.lib.iconv_replacement.php,v 1.4 2006/11/02 10:48:02 ah Exp $ rlm@3: rlm@3: rlm@3: class getid3_iconv_replacement rlm@3: { rlm@3: rlm@3: public static function iconv($in_charset, $out_charset, $string) { rlm@3: rlm@3: if ($in_charset == $out_charset) { rlm@3: return $string; rlm@3: } rlm@3: rlm@3: static $supported_charsets = array ( rlm@3: 'ISO-8859-1' => 'iso88591', rlm@3: 'UTF-8' => 'utf8', rlm@3: 'UTF-16BE' => 'utf16be', rlm@3: 'UTF-16LE' => 'utf16le', rlm@3: 'UTF-16' => 'utf16' rlm@3: ); rlm@3: rlm@3: // Convert rlm@3: $function_name = 'iconv_' . @$supported_charsets[$in_charset] . '_' . @$supported_charsets[$out_charset]; rlm@3: rlm@3: if (is_callable(array('getid3_iconv_replacement', $function_name))) { rlm@3: return getid3_iconv_replacement::$function_name($string); rlm@3: } rlm@3: rlm@3: // Invalid charset used rlm@3: if (!@$supported_charsets[$in_charset]) { rlm@3: throw new getid3_exception('PHP does not have iconv() support - cannot use ' . $in_charset . ' charset.'); rlm@3: } rlm@3: rlm@3: if (!@$supported_charsets[$out_charset]) { rlm@3: throw new getid3_exception('PHP does not have iconv() support - cannot use ' . $out_charset . ' charset.'); rlm@3: } rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: public static function iconv_int_utf8($charval) { rlm@3: if ($charval < 128) { rlm@3: // 0bbbbbbb rlm@3: $newcharstring = chr($charval); rlm@3: } elseif ($charval < 2048) { rlm@3: // 110bbbbb 10bbbbbb rlm@3: $newcharstring = chr(($charval >> 6) | 0xC0); rlm@3: $newcharstring .= chr(($charval & 0x3F) | 0x80); rlm@3: } elseif ($charval < 65536) { rlm@3: // 1110bbbb 10bbbbbb 10bbbbbb rlm@3: $newcharstring = chr(($charval >> 12) | 0xE0); rlm@3: $newcharstring .= chr(($charval >> 6) | 0xC0); rlm@3: $newcharstring .= chr(($charval & 0x3F) | 0x80); rlm@3: } else { rlm@3: // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb rlm@3: $newcharstring = chr(($charval >> 18) | 0xF0); rlm@3: $newcharstring .= chr(($charval >> 12) | 0xC0); rlm@3: $newcharstring .= chr(($charval >> 6) | 0xC0); rlm@3: $newcharstring .= chr(($charval & 0x3F) | 0x80); rlm@3: } rlm@3: return $newcharstring; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // ISO-8859-1 => UTF-8 rlm@3: public static function iconv_iso88591_utf8($string, $bom=false) { rlm@3: if (function_exists('utf8_encode')) { rlm@3: return utf8_encode($string); rlm@3: } rlm@3: // utf8_encode() unavailable, use getID3()'s iconv() conversions (possibly PHP is compiled without XML support) rlm@3: $newcharstring = ''; rlm@3: if ($bom) { rlm@3: $newcharstring .= "\xEF\xBB\xBF"; rlm@3: } rlm@3: for ($i = 0; $i < strlen($string); $i++) { rlm@3: $charval = ord($string{$i}); rlm@3: $newcharstring .= getid3_iconv_replacement::iconv_int_utf8($charval); rlm@3: } rlm@3: return $newcharstring; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // ISO-8859-1 => UTF-16BE rlm@3: public static function iconv_iso88591_utf16be($string, $bom=false) { rlm@3: $newcharstring = ''; rlm@3: if ($bom) { rlm@3: $newcharstring .= "\xFE\xFF"; rlm@3: } rlm@3: for ($i = 0; $i < strlen($string); $i++) { rlm@3: $newcharstring .= "\x00".$string{$i}; rlm@3: } rlm@3: return $newcharstring; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // ISO-8859-1 => UTF-16LE rlm@3: public static function iconv_iso88591_utf16le($string, $bom=false) { rlm@3: $newcharstring = ''; rlm@3: if ($bom) { rlm@3: $newcharstring .= "\xFF\xFE"; rlm@3: } rlm@3: for ($i = 0; $i < strlen($string); $i++) { rlm@3: $newcharstring .= $string{$i}."\x00"; rlm@3: } rlm@3: return $newcharstring; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // ISO-8859-1 => UTF-16 rlm@3: public static function iconv_iso88591_utf16($string) { rlm@3: return getid3_lib::iconv_iso88591_utf16le($string, true); rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-8 => ISO-8859-1 rlm@3: public static function iconv_utf8_iso88591($string) { rlm@3: if (function_exists('utf8_decode')) { rlm@3: return utf8_decode($string); rlm@3: } rlm@3: // utf8_decode() unavailable, use getID3()'s iconv() conversions (possibly PHP is compiled without XML support) rlm@3: $newcharstring = ''; rlm@3: $offset = 0; rlm@3: $stringlength = strlen($string); rlm@3: while ($offset < $stringlength) { rlm@3: if ((ord($string{$offset}) | 0x07) == 0xF7) { rlm@3: // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb rlm@3: $charval = ((ord($string{($offset + 0)}) & 0x07) << 18) & rlm@3: ((ord($string{($offset + 1)}) & 0x3F) << 12) & rlm@3: ((ord($string{($offset + 2)}) & 0x3F) << 6) & rlm@3: (ord($string{($offset + 3)}) & 0x3F); rlm@3: $offset += 4; rlm@3: } elseif ((ord($string{$offset}) | 0x0F) == 0xEF) { rlm@3: // 1110bbbb 10bbbbbb 10bbbbbb rlm@3: $charval = ((ord($string{($offset + 0)}) & 0x0F) << 12) & rlm@3: ((ord($string{($offset + 1)}) & 0x3F) << 6) & rlm@3: (ord($string{($offset + 2)}) & 0x3F); rlm@3: $offset += 3; rlm@3: } elseif ((ord($string{$offset}) | 0x1F) == 0xDF) { rlm@3: // 110bbbbb 10bbbbbb rlm@3: $charval = ((ord($string{($offset + 0)}) & 0x1F) << 6) & rlm@3: (ord($string{($offset + 1)}) & 0x3F); rlm@3: $offset += 2; rlm@3: } elseif ((ord($string{$offset}) | 0x7F) == 0x7F) { rlm@3: // 0bbbbbbb rlm@3: $charval = ord($string{$offset}); rlm@3: $offset += 1; rlm@3: } else { rlm@3: // error? throw some kind of warning here? rlm@3: $charval = false; rlm@3: $offset += 1; rlm@3: } rlm@3: if ($charval !== false) { rlm@3: $newcharstring .= (($charval < 256) ? chr($charval) : '?'); rlm@3: } rlm@3: } rlm@3: return $newcharstring; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-8 => UTF-16BE rlm@3: public static function iconv_utf8_utf16be($string, $bom=false) { rlm@3: $newcharstring = ''; rlm@3: if ($bom) { rlm@3: $newcharstring .= "\xFE\xFF"; rlm@3: } rlm@3: $offset = 0; rlm@3: $stringlength = strlen($string); rlm@3: while ($offset < $stringlength) { rlm@3: if ((ord($string{$offset}) | 0x07) == 0xF7) { rlm@3: // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb rlm@3: $charval = ((ord($string{($offset + 0)}) & 0x07) << 18) & rlm@3: ((ord($string{($offset + 1)}) & 0x3F) << 12) & rlm@3: ((ord($string{($offset + 2)}) & 0x3F) << 6) & rlm@3: (ord($string{($offset + 3)}) & 0x3F); rlm@3: $offset += 4; rlm@3: } elseif ((ord($string{$offset}) | 0x0F) == 0xEF) { rlm@3: // 1110bbbb 10bbbbbb 10bbbbbb rlm@3: $charval = ((ord($string{($offset + 0)}) & 0x0F) << 12) & rlm@3: ((ord($string{($offset + 1)}) & 0x3F) << 6) & rlm@3: (ord($string{($offset + 2)}) & 0x3F); rlm@3: $offset += 3; rlm@3: } elseif ((ord($string{$offset}) | 0x1F) == 0xDF) { rlm@3: // 110bbbbb 10bbbbbb rlm@3: $charval = ((ord($string{($offset + 0)}) & 0x1F) << 6) & rlm@3: (ord($string{($offset + 1)}) & 0x3F); rlm@3: $offset += 2; rlm@3: } elseif ((ord($string{$offset}) | 0x7F) == 0x7F) { rlm@3: // 0bbbbbbb rlm@3: $charval = ord($string{$offset}); rlm@3: $offset += 1; rlm@3: } else { rlm@3: // error? throw some kind of warning here? rlm@3: $charval = false; rlm@3: $offset += 1; rlm@3: } rlm@3: if ($charval !== false) { rlm@3: $newcharstring .= (($charval < 65536) ? getid3_lib::BigEndian2String($charval, 2) : "\x00".'?'); rlm@3: } rlm@3: } rlm@3: return $newcharstring; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-8 => UTF-16LE rlm@3: public static function iconv_utf8_utf16le($string, $bom=false) { rlm@3: $newcharstring = ''; rlm@3: if ($bom) { rlm@3: $newcharstring .= "\xFF\xFE"; rlm@3: } rlm@3: $offset = 0; rlm@3: $stringlength = strlen($string); rlm@3: while ($offset < $stringlength) { rlm@3: if ((ord($string{$offset}) | 0x07) == 0xF7) { rlm@3: // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb rlm@3: $charval = ((ord($string{($offset + 0)}) & 0x07) << 18) & rlm@3: ((ord($string{($offset + 1)}) & 0x3F) << 12) & rlm@3: ((ord($string{($offset + 2)}) & 0x3F) << 6) & rlm@3: (ord($string{($offset + 3)}) & 0x3F); rlm@3: $offset += 4; rlm@3: } elseif ((ord($string{$offset}) | 0x0F) == 0xEF) { rlm@3: // 1110bbbb 10bbbbbb 10bbbbbb rlm@3: $charval = ((ord($string{($offset + 0)}) & 0x0F) << 12) & rlm@3: ((ord($string{($offset + 1)}) & 0x3F) << 6) & rlm@3: (ord($string{($offset + 2)}) & 0x3F); rlm@3: $offset += 3; rlm@3: } elseif ((ord($string{$offset}) | 0x1F) == 0xDF) { rlm@3: // 110bbbbb 10bbbbbb rlm@3: $charval = ((ord($string{($offset + 0)}) & 0x1F) << 6) & rlm@3: (ord($string{($offset + 1)}) & 0x3F); rlm@3: $offset += 2; rlm@3: } elseif ((ord($string{$offset}) | 0x7F) == 0x7F) { rlm@3: // 0bbbbbbb rlm@3: $charval = ord($string{$offset}); rlm@3: $offset += 1; rlm@3: } else { rlm@3: // error? maybe throw some warning here? rlm@3: $charval = false; rlm@3: $offset += 1; rlm@3: } rlm@3: if ($charval !== false) { rlm@3: $newcharstring .= (($charval < 65536) ? getid3_lib::LittleEndian2String($charval, 2) : '?'."\x00"); rlm@3: } rlm@3: } rlm@3: return $newcharstring; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-8 => UTF-16 rlm@3: public static function iconv_utf8_utf16($string) { rlm@3: return getid3_lib::iconv_utf8_utf16le($string, true); rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-16BE => ISO-8859-1 rlm@3: public static function iconv_utf16be_iso88591($string) { rlm@3: if (substr($string, 0, 2) == "\xFE\xFF") { rlm@3: // strip BOM rlm@3: $string = substr($string, 2); rlm@3: } rlm@3: $newcharstring = ''; rlm@3: for ($i = 0; $i < strlen($string); $i += 2) { rlm@3: $charval = getid3_lib::BigEndian2Int(substr($string, $i, 2)); rlm@3: $newcharstring .= (($charval < 256) ? chr($charval) : '?'); rlm@3: } rlm@3: return $newcharstring; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-16BE => UTF-8 rlm@3: public static function iconv_utf16be_utf8($string) { rlm@3: if (substr($string, 0, 2) == "\xFE\xFF") { rlm@3: // strip BOM rlm@3: $string = substr($string, 2); rlm@3: } rlm@3: $newcharstring = ''; rlm@3: for ($i = 0; $i < strlen($string); $i += 2) { rlm@3: $charval = getid3_lib::BigEndian2Int(substr($string, $i, 2)); rlm@3: $newcharstring .= getid3_iconv_replacement::iconv_int_utf8($charval); rlm@3: } rlm@3: return $newcharstring; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-16BE => UTF-16LE rlm@3: public static function iconv_utf16be_utf16le($string) { rlm@3: return getid3_iconv_replacement::iconv_utf8_utf16le(getid3_iconv_replacement::iconv_utf16be_utf8($string)); rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-16BE => UTF-16 rlm@3: public static function iconv_utf16be_utf16($string) { rlm@3: return getid3_iconv_replacement::iconv_utf8_utf16(getid3_iconv_replacement::iconv_utf16be_utf8($string)); rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-16LE => ISO-8859-1 rlm@3: public static function iconv_utf16le_iso88591($string) { rlm@3: if (substr($string, 0, 2) == "\xFF\xFE") { rlm@3: // strip BOM rlm@3: $string = substr($string, 2); rlm@3: } rlm@3: $newcharstring = ''; rlm@3: for ($i = 0; $i < strlen($string); $i += 2) { rlm@3: $charval = getid3_lib::LittleEndian2Int(substr($string, $i, 2)); rlm@3: $newcharstring .= (($charval < 256) ? chr($charval) : '?'); rlm@3: } rlm@3: return $newcharstring; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-16LE => UTF-8 rlm@3: public static function iconv_utf16le_utf8($string) { rlm@3: if (substr($string, 0, 2) == "\xFF\xFE") { rlm@3: // strip BOM rlm@3: $string = substr($string, 2); rlm@3: } rlm@3: $newcharstring = ''; rlm@3: for ($i = 0; $i < strlen($string); $i += 2) { rlm@3: $charval = getid3_lib::LittleEndian2Int(substr($string, $i, 2)); rlm@3: $newcharstring .= getid3_iconv_replacement::iconv_int_utf8($charval); rlm@3: } rlm@3: return $newcharstring; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-16LE => UTF-16BE rlm@3: public static function iconv_utf16le_utf16be($string) { rlm@3: return getid3_iconv_replacement::iconv_utf8_utf16be(getid3_iconv_replacement::iconv_utf16le_utf8($string)); rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-16LE => UTF-16 rlm@3: public static function iconv_utf16le_utf16($string) { rlm@3: return getid3_iconv_replacement::iconv_utf8_utf16(getid3_iconv_replacement::iconv_utf16le_utf8($string)); rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-16 => ISO-8859-1 rlm@3: public static function iconv_utf16_iso88591($string) { rlm@3: $bom = substr($string, 0, 2); rlm@3: if ($bom == "\xFE\xFF") { rlm@3: return getid3_lib::iconv_utf16be_iso88591(substr($string, 2)); rlm@3: } elseif ($bom == "\xFF\xFE") { rlm@3: return getid3_lib::iconv_utf16le_iso88591(substr($string, 2)); rlm@3: } rlm@3: return $string; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-16 => UTF-8 rlm@3: public static function iconv_utf16_utf8($string) { rlm@3: $bom = substr($string, 0, 2); rlm@3: if ($bom == "\xFE\xFF") { rlm@3: return getid3_iconv_replacement::iconv_utf16be_utf8(substr($string, 2)); rlm@3: } elseif ($bom == "\xFF\xFE") { rlm@3: return getid3_iconv_replacement::iconv_utf16le_utf8(substr($string, 2)); rlm@3: } rlm@3: return $string; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-16 => UTF-16BE rlm@3: public static function iconv_utf16_utf16be($string) { rlm@3: return getid3_iconv_replacement::iconv_utf8_utf16be(getid3_iconv_replacement::iconv_utf16_utf8($string)); rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // UTF-16 => UTF-16LE rlm@3: public static function iconv_utf16_utf16le($string) { rlm@3: return getid3_iconv_replacement::iconv_utf8_utf16le(getid3_iconv_replacement::iconv_utf16_utf8($string)); rlm@3: } rlm@3: rlm@3: } rlm@3: rlm@3: ?>