annotate e2gallerypro/e2upload/Backend/Assets/getid3/module.lib.iconv_replacement.php @ 9:325fe78243c9 judyates

[svn r10] all the good stuff from my week at mom's.
author rlm
date Sun, 11 Apr 2010 22:05:32 -0400
parents 3f6b44aa6b35
children
rev   line source
rlm@3 1 <?php
rlm@3 2 // +----------------------------------------------------------------------+
rlm@3 3 // | PHP version 5 |
rlm@3 4 // +----------------------------------------------------------------------+
rlm@3 5 // | Copyright (c) 2002-2006 James Heinrich, Allan Hansen |
rlm@3 6 // +----------------------------------------------------------------------+
rlm@3 7 // | This source file is subject to version 2 of the GPL license, |
rlm@3 8 // | that is bundled with this package in the file license.txt and is |
rlm@3 9 // | available through the world-wide-web at the following url: |
rlm@3 10 // | http://www.gnu.org/copyleft/gpl.html |
rlm@3 11 // +----------------------------------------------------------------------+
rlm@3 12 // | getID3() - http://getid3.sourceforge.net or http://www.getid3.org |
rlm@3 13 // +----------------------------------------------------------------------+
rlm@3 14 // | Authors: James Heinrich <infoØgetid3*org> |
rlm@3 15 // | Allan Hansen <ahØartemis*dk> |
rlm@3 16 // +----------------------------------------------------------------------+
rlm@3 17 // | module.lib.iconv_replacement.php |
rlm@3 18 // | getID3() library file. |
rlm@3 19 // | dependencies: NONE, required by getid3.php if no iconv() present. |
rlm@3 20 // +----------------------------------------------------------------------+
rlm@3 21 //
rlm@3 22 // $Id: module.lib.iconv_replacement.php,v 1.4 2006/11/02 10:48:02 ah Exp $
rlm@3 23
rlm@3 24
rlm@3 25 class getid3_iconv_replacement
rlm@3 26 {
rlm@3 27
rlm@3 28 public static function iconv($in_charset, $out_charset, $string) {
rlm@3 29
rlm@3 30 if ($in_charset == $out_charset) {
rlm@3 31 return $string;
rlm@3 32 }
rlm@3 33
rlm@3 34 static $supported_charsets = array (
rlm@3 35 'ISO-8859-1' => 'iso88591',
rlm@3 36 'UTF-8' => 'utf8',
rlm@3 37 'UTF-16BE' => 'utf16be',
rlm@3 38 'UTF-16LE' => 'utf16le',
rlm@3 39 'UTF-16' => 'utf16'
rlm@3 40 );
rlm@3 41
rlm@3 42 // Convert
rlm@3 43 $function_name = 'iconv_' . @$supported_charsets[$in_charset] . '_' . @$supported_charsets[$out_charset];
rlm@3 44
rlm@3 45 if (is_callable(array('getid3_iconv_replacement', $function_name))) {
rlm@3 46 return getid3_iconv_replacement::$function_name($string);
rlm@3 47 }
rlm@3 48
rlm@3 49 // Invalid charset used
rlm@3 50 if (!@$supported_charsets[$in_charset]) {
rlm@3 51 throw new getid3_exception('PHP does not have iconv() support - cannot use ' . $in_charset . ' charset.');
rlm@3 52 }
rlm@3 53
rlm@3 54 if (!@$supported_charsets[$out_charset]) {
rlm@3 55 throw new getid3_exception('PHP does not have iconv() support - cannot use ' . $out_charset . ' charset.');
rlm@3 56 }
rlm@3 57 }
rlm@3 58
rlm@3 59
rlm@3 60
rlm@3 61 public static function iconv_int_utf8($charval) {
rlm@3 62 if ($charval < 128) {
rlm@3 63 // 0bbbbbbb
rlm@3 64 $newcharstring = chr($charval);
rlm@3 65 } elseif ($charval < 2048) {
rlm@3 66 // 110bbbbb 10bbbbbb
rlm@3 67 $newcharstring = chr(($charval >> 6) | 0xC0);
rlm@3 68 $newcharstring .= chr(($charval & 0x3F) | 0x80);
rlm@3 69 } elseif ($charval < 65536) {
rlm@3 70 // 1110bbbb 10bbbbbb 10bbbbbb
rlm@3 71 $newcharstring = chr(($charval >> 12) | 0xE0);
rlm@3 72 $newcharstring .= chr(($charval >> 6) | 0xC0);
rlm@3 73 $newcharstring .= chr(($charval & 0x3F) | 0x80);
rlm@3 74 } else {
rlm@3 75 // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
rlm@3 76 $newcharstring = chr(($charval >> 18) | 0xF0);
rlm@3 77 $newcharstring .= chr(($charval >> 12) | 0xC0);
rlm@3 78 $newcharstring .= chr(($charval >> 6) | 0xC0);
rlm@3 79 $newcharstring .= chr(($charval & 0x3F) | 0x80);
rlm@3 80 }
rlm@3 81 return $newcharstring;
rlm@3 82 }
rlm@3 83
rlm@3 84
rlm@3 85
rlm@3 86 // ISO-8859-1 => UTF-8
rlm@3 87 public static function iconv_iso88591_utf8($string, $bom=false) {
rlm@3 88 if (function_exists('utf8_encode')) {
rlm@3 89 return utf8_encode($string);
rlm@3 90 }
rlm@3 91 // utf8_encode() unavailable, use getID3()'s iconv() conversions (possibly PHP is compiled without XML support)
rlm@3 92 $newcharstring = '';
rlm@3 93 if ($bom) {
rlm@3 94 $newcharstring .= "\xEF\xBB\xBF";
rlm@3 95 }
rlm@3 96 for ($i = 0; $i < strlen($string); $i++) {
rlm@3 97 $charval = ord($string{$i});
rlm@3 98 $newcharstring .= getid3_iconv_replacement::iconv_int_utf8($charval);
rlm@3 99 }
rlm@3 100 return $newcharstring;
rlm@3 101 }
rlm@3 102
rlm@3 103
rlm@3 104
rlm@3 105 // ISO-8859-1 => UTF-16BE
rlm@3 106 public static function iconv_iso88591_utf16be($string, $bom=false) {
rlm@3 107 $newcharstring = '';
rlm@3 108 if ($bom) {
rlm@3 109 $newcharstring .= "\xFE\xFF";
rlm@3 110 }
rlm@3 111 for ($i = 0; $i < strlen($string); $i++) {
rlm@3 112 $newcharstring .= "\x00".$string{$i};
rlm@3 113 }
rlm@3 114 return $newcharstring;
rlm@3 115 }
rlm@3 116
rlm@3 117
rlm@3 118
rlm@3 119 // ISO-8859-1 => UTF-16LE
rlm@3 120 public static function iconv_iso88591_utf16le($string, $bom=false) {
rlm@3 121 $newcharstring = '';
rlm@3 122 if ($bom) {
rlm@3 123 $newcharstring .= "\xFF\xFE";
rlm@3 124 }
rlm@3 125 for ($i = 0; $i < strlen($string); $i++) {
rlm@3 126 $newcharstring .= $string{$i}."\x00";
rlm@3 127 }
rlm@3 128 return $newcharstring;
rlm@3 129 }
rlm@3 130
rlm@3 131
rlm@3 132
rlm@3 133 // ISO-8859-1 => UTF-16
rlm@3 134 public static function iconv_iso88591_utf16($string) {
rlm@3 135 return getid3_lib::iconv_iso88591_utf16le($string, true);
rlm@3 136 }
rlm@3 137
rlm@3 138
rlm@3 139
rlm@3 140 // UTF-8 => ISO-8859-1
rlm@3 141 public static function iconv_utf8_iso88591($string) {
rlm@3 142 if (function_exists('utf8_decode')) {
rlm@3 143 return utf8_decode($string);
rlm@3 144 }
rlm@3 145 // utf8_decode() unavailable, use getID3()'s iconv() conversions (possibly PHP is compiled without XML support)
rlm@3 146 $newcharstring = '';
rlm@3 147 $offset = 0;
rlm@3 148 $stringlength = strlen($string);
rlm@3 149 while ($offset < $stringlength) {
rlm@3 150 if ((ord($string{$offset}) | 0x07) == 0xF7) {
rlm@3 151 // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
rlm@3 152 $charval = ((ord($string{($offset + 0)}) & 0x07) << 18) &
rlm@3 153 ((ord($string{($offset + 1)}) & 0x3F) << 12) &
rlm@3 154 ((ord($string{($offset + 2)}) & 0x3F) << 6) &
rlm@3 155 (ord($string{($offset + 3)}) & 0x3F);
rlm@3 156 $offset += 4;
rlm@3 157 } elseif ((ord($string{$offset}) | 0x0F) == 0xEF) {
rlm@3 158 // 1110bbbb 10bbbbbb 10bbbbbb
rlm@3 159 $charval = ((ord($string{($offset + 0)}) & 0x0F) << 12) &
rlm@3 160 ((ord($string{($offset + 1)}) & 0x3F) << 6) &
rlm@3 161 (ord($string{($offset + 2)}) & 0x3F);
rlm@3 162 $offset += 3;
rlm@3 163 } elseif ((ord($string{$offset}) | 0x1F) == 0xDF) {
rlm@3 164 // 110bbbbb 10bbbbbb
rlm@3 165 $charval = ((ord($string{($offset + 0)}) & 0x1F) << 6) &
rlm@3 166 (ord($string{($offset + 1)}) & 0x3F);
rlm@3 167 $offset += 2;
rlm@3 168 } elseif ((ord($string{$offset}) | 0x7F) == 0x7F) {
rlm@3 169 // 0bbbbbbb
rlm@3 170 $charval = ord($string{$offset});
rlm@3 171 $offset += 1;
rlm@3 172 } else {
rlm@3 173 // error? throw some kind of warning here?
rlm@3 174 $charval = false;
rlm@3 175 $offset += 1;
rlm@3 176 }
rlm@3 177 if ($charval !== false) {
rlm@3 178 $newcharstring .= (($charval < 256) ? chr($charval) : '?');
rlm@3 179 }
rlm@3 180 }
rlm@3 181 return $newcharstring;
rlm@3 182 }
rlm@3 183
rlm@3 184
rlm@3 185
rlm@3 186 // UTF-8 => UTF-16BE
rlm@3 187 public static function iconv_utf8_utf16be($string, $bom=false) {
rlm@3 188 $newcharstring = '';
rlm@3 189 if ($bom) {
rlm@3 190 $newcharstring .= "\xFE\xFF";
rlm@3 191 }
rlm@3 192 $offset = 0;
rlm@3 193 $stringlength = strlen($string);
rlm@3 194 while ($offset < $stringlength) {
rlm@3 195 if ((ord($string{$offset}) | 0x07) == 0xF7) {
rlm@3 196 // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
rlm@3 197 $charval = ((ord($string{($offset + 0)}) & 0x07) << 18) &
rlm@3 198 ((ord($string{($offset + 1)}) & 0x3F) << 12) &
rlm@3 199 ((ord($string{($offset + 2)}) & 0x3F) << 6) &
rlm@3 200 (ord($string{($offset + 3)}) & 0x3F);
rlm@3 201 $offset += 4;
rlm@3 202 } elseif ((ord($string{$offset}) | 0x0F) == 0xEF) {
rlm@3 203 // 1110bbbb 10bbbbbb 10bbbbbb
rlm@3 204 $charval = ((ord($string{($offset + 0)}) & 0x0F) << 12) &
rlm@3 205 ((ord($string{($offset + 1)}) & 0x3F) << 6) &
rlm@3 206 (ord($string{($offset + 2)}) & 0x3F);
rlm@3 207 $offset += 3;
rlm@3 208 } elseif ((ord($string{$offset}) | 0x1F) == 0xDF) {
rlm@3 209 // 110bbbbb 10bbbbbb
rlm@3 210 $charval = ((ord($string{($offset + 0)}) & 0x1F) << 6) &
rlm@3 211 (ord($string{($offset + 1)}) & 0x3F);
rlm@3 212 $offset += 2;
rlm@3 213 } elseif ((ord($string{$offset}) | 0x7F) == 0x7F) {
rlm@3 214 // 0bbbbbbb
rlm@3 215 $charval = ord($string{$offset});
rlm@3 216 $offset += 1;
rlm@3 217 } else {
rlm@3 218 // error? throw some kind of warning here?
rlm@3 219 $charval = false;
rlm@3 220 $offset += 1;
rlm@3 221 }
rlm@3 222 if ($charval !== false) {
rlm@3 223 $newcharstring .= (($charval < 65536) ? getid3_lib::BigEndian2String($charval, 2) : "\x00".'?');
rlm@3 224 }
rlm@3 225 }
rlm@3 226 return $newcharstring;
rlm@3 227 }
rlm@3 228
rlm@3 229
rlm@3 230
rlm@3 231 // UTF-8 => UTF-16LE
rlm@3 232 public static function iconv_utf8_utf16le($string, $bom=false) {
rlm@3 233 $newcharstring = '';
rlm@3 234 if ($bom) {
rlm@3 235 $newcharstring .= "\xFF\xFE";
rlm@3 236 }
rlm@3 237 $offset = 0;
rlm@3 238 $stringlength = strlen($string);
rlm@3 239 while ($offset < $stringlength) {
rlm@3 240 if ((ord($string{$offset}) | 0x07) == 0xF7) {
rlm@3 241 // 11110bbb 10bbbbbb 10bbbbbb 10bbbbbb
rlm@3 242 $charval = ((ord($string{($offset + 0)}) & 0x07) << 18) &
rlm@3 243 ((ord($string{($offset + 1)}) & 0x3F) << 12) &
rlm@3 244 ((ord($string{($offset + 2)}) & 0x3F) << 6) &
rlm@3 245 (ord($string{($offset + 3)}) & 0x3F);
rlm@3 246 $offset += 4;
rlm@3 247 } elseif ((ord($string{$offset}) | 0x0F) == 0xEF) {
rlm@3 248 // 1110bbbb 10bbbbbb 10bbbbbb
rlm@3 249 $charval = ((ord($string{($offset + 0)}) & 0x0F) << 12) &
rlm@3 250 ((ord($string{($offset + 1)}) & 0x3F) << 6) &
rlm@3 251 (ord($string{($offset + 2)}) & 0x3F);
rlm@3 252 $offset += 3;
rlm@3 253 } elseif ((ord($string{$offset}) | 0x1F) == 0xDF) {
rlm@3 254 // 110bbbbb 10bbbbbb
rlm@3 255 $charval = ((ord($string{($offset + 0)}) & 0x1F) << 6) &
rlm@3 256 (ord($string{($offset + 1)}) & 0x3F);
rlm@3 257 $offset += 2;
rlm@3 258 } elseif ((ord($string{$offset}) | 0x7F) == 0x7F) {
rlm@3 259 // 0bbbbbbb
rlm@3 260 $charval = ord($string{$offset});
rlm@3 261 $offset += 1;
rlm@3 262 } else {
rlm@3 263 // error? maybe throw some warning here?
rlm@3 264 $charval = false;
rlm@3 265 $offset += 1;
rlm@3 266 }
rlm@3 267 if ($charval !== false) {
rlm@3 268 $newcharstring .= (($charval < 65536) ? getid3_lib::LittleEndian2String($charval, 2) : '?'."\x00");
rlm@3 269 }
rlm@3 270 }
rlm@3 271 return $newcharstring;
rlm@3 272 }
rlm@3 273
rlm@3 274
rlm@3 275
rlm@3 276 // UTF-8 => UTF-16
rlm@3 277 public static function iconv_utf8_utf16($string) {
rlm@3 278 return getid3_lib::iconv_utf8_utf16le($string, true);
rlm@3 279 }
rlm@3 280
rlm@3 281
rlm@3 282
rlm@3 283 // UTF-16BE => ISO-8859-1
rlm@3 284 public static function iconv_utf16be_iso88591($string) {
rlm@3 285 if (substr($string, 0, 2) == "\xFE\xFF") {
rlm@3 286 // strip BOM
rlm@3 287 $string = substr($string, 2);
rlm@3 288 }
rlm@3 289 $newcharstring = '';
rlm@3 290 for ($i = 0; $i < strlen($string); $i += 2) {
rlm@3 291 $charval = getid3_lib::BigEndian2Int(substr($string, $i, 2));
rlm@3 292 $newcharstring .= (($charval < 256) ? chr($charval) : '?');
rlm@3 293 }
rlm@3 294 return $newcharstring;
rlm@3 295 }
rlm@3 296
rlm@3 297
rlm@3 298
rlm@3 299 // UTF-16BE => UTF-8
rlm@3 300 public static function iconv_utf16be_utf8($string) {
rlm@3 301 if (substr($string, 0, 2) == "\xFE\xFF") {
rlm@3 302 // strip BOM
rlm@3 303 $string = substr($string, 2);
rlm@3 304 }
rlm@3 305 $newcharstring = '';
rlm@3 306 for ($i = 0; $i < strlen($string); $i += 2) {
rlm@3 307 $charval = getid3_lib::BigEndian2Int(substr($string, $i, 2));
rlm@3 308 $newcharstring .= getid3_iconv_replacement::iconv_int_utf8($charval);
rlm@3 309 }
rlm@3 310 return $newcharstring;
rlm@3 311 }
rlm@3 312
rlm@3 313
rlm@3 314
rlm@3 315 // UTF-16BE => UTF-16LE
rlm@3 316 public static function iconv_utf16be_utf16le($string) {
rlm@3 317 return getid3_iconv_replacement::iconv_utf8_utf16le(getid3_iconv_replacement::iconv_utf16be_utf8($string));
rlm@3 318 }
rlm@3 319
rlm@3 320
rlm@3 321
rlm@3 322 // UTF-16BE => UTF-16
rlm@3 323 public static function iconv_utf16be_utf16($string) {
rlm@3 324 return getid3_iconv_replacement::iconv_utf8_utf16(getid3_iconv_replacement::iconv_utf16be_utf8($string));
rlm@3 325 }
rlm@3 326
rlm@3 327
rlm@3 328
rlm@3 329 // UTF-16LE => ISO-8859-1
rlm@3 330 public static function iconv_utf16le_iso88591($string) {
rlm@3 331 if (substr($string, 0, 2) == "\xFF\xFE") {
rlm@3 332 // strip BOM
rlm@3 333 $string = substr($string, 2);
rlm@3 334 }
rlm@3 335 $newcharstring = '';
rlm@3 336 for ($i = 0; $i < strlen($string); $i += 2) {
rlm@3 337 $charval = getid3_lib::LittleEndian2Int(substr($string, $i, 2));
rlm@3 338 $newcharstring .= (($charval < 256) ? chr($charval) : '?');
rlm@3 339 }
rlm@3 340 return $newcharstring;
rlm@3 341 }
rlm@3 342
rlm@3 343
rlm@3 344
rlm@3 345 // UTF-16LE => UTF-8
rlm@3 346 public static function iconv_utf16le_utf8($string) {
rlm@3 347 if (substr($string, 0, 2) == "\xFF\xFE") {
rlm@3 348 // strip BOM
rlm@3 349 $string = substr($string, 2);
rlm@3 350 }
rlm@3 351 $newcharstring = '';
rlm@3 352 for ($i = 0; $i < strlen($string); $i += 2) {
rlm@3 353 $charval = getid3_lib::LittleEndian2Int(substr($string, $i, 2));
rlm@3 354 $newcharstring .= getid3_iconv_replacement::iconv_int_utf8($charval);
rlm@3 355 }
rlm@3 356 return $newcharstring;
rlm@3 357 }
rlm@3 358
rlm@3 359
rlm@3 360
rlm@3 361 // UTF-16LE => UTF-16BE
rlm@3 362 public static function iconv_utf16le_utf16be($string) {
rlm@3 363 return getid3_iconv_replacement::iconv_utf8_utf16be(getid3_iconv_replacement::iconv_utf16le_utf8($string));
rlm@3 364 }
rlm@3 365
rlm@3 366
rlm@3 367
rlm@3 368 // UTF-16LE => UTF-16
rlm@3 369 public static function iconv_utf16le_utf16($string) {
rlm@3 370 return getid3_iconv_replacement::iconv_utf8_utf16(getid3_iconv_replacement::iconv_utf16le_utf8($string));
rlm@3 371 }
rlm@3 372
rlm@3 373
rlm@3 374
rlm@3 375 // UTF-16 => ISO-8859-1
rlm@3 376 public static function iconv_utf16_iso88591($string) {
rlm@3 377 $bom = substr($string, 0, 2);
rlm@3 378 if ($bom == "\xFE\xFF") {
rlm@3 379 return getid3_lib::iconv_utf16be_iso88591(substr($string, 2));
rlm@3 380 } elseif ($bom == "\xFF\xFE") {
rlm@3 381 return getid3_lib::iconv_utf16le_iso88591(substr($string, 2));
rlm@3 382 }
rlm@3 383 return $string;
rlm@3 384 }
rlm@3 385
rlm@3 386
rlm@3 387
rlm@3 388 // UTF-16 => UTF-8
rlm@3 389 public static function iconv_utf16_utf8($string) {
rlm@3 390 $bom = substr($string, 0, 2);
rlm@3 391 if ($bom == "\xFE\xFF") {
rlm@3 392 return getid3_iconv_replacement::iconv_utf16be_utf8(substr($string, 2));
rlm@3 393 } elseif ($bom == "\xFF\xFE") {
rlm@3 394 return getid3_iconv_replacement::iconv_utf16le_utf8(substr($string, 2));
rlm@3 395 }
rlm@3 396 return $string;
rlm@3 397 }
rlm@3 398
rlm@3 399
rlm@3 400
rlm@3 401 // UTF-16 => UTF-16BE
rlm@3 402 public static function iconv_utf16_utf16be($string) {
rlm@3 403 return getid3_iconv_replacement::iconv_utf8_utf16be(getid3_iconv_replacement::iconv_utf16_utf8($string));
rlm@3 404 }
rlm@3 405
rlm@3 406
rlm@3 407
rlm@3 408 // UTF-16 => UTF-16LE
rlm@3 409 public static function iconv_utf16_utf16le($string) {
rlm@3 410 return getid3_iconv_replacement::iconv_utf8_utf16le(getid3_iconv_replacement::iconv_utf16_utf8($string));
rlm@3 411 }
rlm@3 412
rlm@3 413 }
rlm@3 414
rlm@3 415 ?>