rlm@3: | rlm@3: // | Allan Hansen | rlm@3: // +----------------------------------------------------------------------+ rlm@3: // | module.archive.zip.php | rlm@3: // | Module for analyzing pkZip files | rlm@3: // | dependencies: NONE | rlm@3: // +----------------------------------------------------------------------+ rlm@3: // rlm@3: // $Id: module.archive.zip.php,v 1.4 2006/11/02 10:48:00 ah Exp $ rlm@3: rlm@3: rlm@3: rlm@3: class getid3_zip extends getid3_handler rlm@3: { rlm@3: rlm@3: public function Analyze() { rlm@3: rlm@3: $getid3 = $this->getid3; rlm@3: rlm@3: $getid3->info['zip'] = array (); rlm@3: $info_zip = &$getid3->info['zip']; rlm@3: rlm@3: $getid3->info['fileformat'] = 'zip'; rlm@3: rlm@3: $info_zip['encoding'] = 'ISO-8859-1'; rlm@3: $info_zip['files'] = array (); rlm@3: $info_zip['compressed_size'] = $info_zip['uncompressed_size'] = $info_zip['entries_count'] = 0; rlm@3: rlm@3: $eocd_search_data = ''; rlm@3: $eocd_search_counter = 0; rlm@3: while ($eocd_search_counter++ < 512) { rlm@3: rlm@3: fseek($getid3->fp, -128 * $eocd_search_counter, SEEK_END); rlm@3: $eocd_search_data = fread($getid3->fp, 128).$eocd_search_data; rlm@3: rlm@3: if (strstr($eocd_search_data, 'PK'."\x05\x06")) { rlm@3: rlm@3: $eocd_position = strpos($eocd_search_data, 'PK'."\x05\x06"); rlm@3: fseek($getid3->fp, (-128 * $eocd_search_counter) + $eocd_position, SEEK_END); rlm@3: $info_zip['end_central_directory'] = $this->ZIPparseEndOfCentralDirectory(); rlm@3: rlm@3: fseek($getid3->fp, $info_zip['end_central_directory']['directory_offset'], SEEK_SET); rlm@3: $info_zip['entries_count'] = 0; rlm@3: while ($central_directoryentry = $this->ZIPparseCentralDirectory($getid3->fp)) { rlm@3: $info_zip['central_directory'][] = $central_directoryentry; rlm@3: $info_zip['entries_count']++; rlm@3: $info_zip['compressed_size'] += $central_directoryentry['compressed_size']; rlm@3: $info_zip['uncompressed_size'] += $central_directoryentry['uncompressed_size']; rlm@3: rlm@3: if ($central_directoryentry['uncompressed_size'] > 0) { rlm@3: $info_zip['files'] = getid3_zip::array_merge_clobber($info_zip['files'], getid3_zip::CreateDeepArray($central_directoryentry['filename'], '/', $central_directoryentry['uncompressed_size'])); rlm@3: } rlm@3: } rlm@3: rlm@3: if ($info_zip['entries_count'] == 0) { rlm@3: throw new getid3_exception('No Central Directory entries found (truncated file?)'); rlm@3: } rlm@3: rlm@3: if (!empty($info_zip['end_central_directory']['comment'])) { rlm@3: $info_zip['comments']['comment'][] = $info_zip['end_central_directory']['comment']; rlm@3: } rlm@3: rlm@3: if (isset($info_zip['central_directory'][0]['compression_method'])) { rlm@3: $info_zip['compression_method'] = $info_zip['central_directory'][0]['compression_method']; rlm@3: } rlm@3: if (isset($info_zip['central_directory'][0]['flags']['compression_speed'])) { rlm@3: $info_zip['compression_speed'] = $info_zip['central_directory'][0]['flags']['compression_speed']; rlm@3: } rlm@3: if (isset($info_zip['compression_method']) && ($info_zip['compression_method'] == 'store') && !isset($info_zip['compression_speed'])) { rlm@3: $info_zip['compression_speed'] = 'store'; rlm@3: } rlm@3: rlm@3: return true; rlm@3: } rlm@3: } rlm@3: rlm@3: if ($this->getZIPentriesFilepointer()) { rlm@3: rlm@3: // central directory couldn't be found and/or parsed rlm@3: // scan through actual file data entries, recover as much as possible from probable trucated file rlm@3: if (@$info_zip['compressed_size'] > ($getid3->info['filesize'] - 46 - 22)) { rlm@3: throw new getid3_exception('Warning: Truncated file! - Total compressed file sizes ('.$info_zip['compressed_size'].' bytes) is greater than filesize minus Central Directory and End Of Central Directory structures ('.($getid3->info['filesize'] - 46 - 22).' bytes)'); rlm@3: } rlm@3: throw new getid3_exception('Cannot find End Of Central Directory - returned list of files in [zip][entries] array may not be complete'); rlm@3: } rlm@3: rlm@3: //throw new getid3_exception('Cannot find End Of Central Directory (truncated file?)'); rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: private function getZIPHeaderFilepointerTopDown() { rlm@3: rlm@3: // shortcut rlm@3: $getid3 = $this->getid3; rlm@3: rlm@3: $getid3->info['fileformat'] = 'zip'; rlm@3: rlm@3: $getid3->info['zip'] = array (); rlm@3: $info_zip['compressed_size'] = $info_zip['uncompressed_size'] = $info_zip['entries_count'] = 0; rlm@3: rlm@3: rewind($getid3->fp); rlm@3: while ($fileentry = $this->ZIPparseLocalFileHeader()) { rlm@3: $info_zip['entries'][] = $fileentry; rlm@3: $info_zip['entries_count']++; rlm@3: } rlm@3: if ($info_zip['entries_count'] == 0) { rlm@3: throw new getid3_exception('No Local File Header entries found'); rlm@3: } rlm@3: rlm@3: $info_zip['entries_count'] = 0; rlm@3: while ($central_directoryentry = $this->ZIPparseCentralDirectory($getid3->fp)) { rlm@3: $info_zip['central_directory'][] = $central_directoryentry; rlm@3: $info_zip['entries_count']++; rlm@3: $info_zip['compressed_size'] += $central_directoryentry['compressed_size']; rlm@3: $info_zip['uncompressed_size'] += $central_directoryentry['uncompressed_size']; rlm@3: } rlm@3: if ($info_zip['entries_count'] == 0) { rlm@3: throw new getid3_exception('No Central Directory entries found (truncated file?)'); rlm@3: } rlm@3: rlm@3: if ($eocd = $this->ZIPparseEndOfCentralDirectory()) { rlm@3: $info_zip['end_central_directory'] = $eocd; rlm@3: } else { rlm@3: throw new getid3_exception('No End Of Central Directory entry found (truncated file?)'); rlm@3: } rlm@3: rlm@3: if (!@$info_zip['end_central_directory']['comment']) { rlm@3: $info_zip['comments']['comment'][] = $info_zip['end_central_directory']['comment']; rlm@3: } rlm@3: rlm@3: return true; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: private function getZIPentriesFilepointer() { rlm@3: rlm@3: // shortcut rlm@3: $getid3 = $this->getid3; rlm@3: rlm@3: $getid3->info['zip'] = array (); rlm@3: $info_zip['compressed_size'] = $info_zip['uncompressed_size'] = $info_zip['entries_count'] = 0; rlm@3: rlm@3: rewind($getid3->fp); rlm@3: while ($fileentry = $this->ZIPparseLocalFileHeader($getid3->fp)) { rlm@3: $info_zip['entries'][] = $fileentry; rlm@3: $info_zip['entries_count']++; rlm@3: $info_zip['compressed_size'] += $fileentry['compressed_size']; rlm@3: $info_zip['uncompressed_size'] += $fileentry['uncompressed_size']; rlm@3: } rlm@3: if ($info_zip['entries_count'] == 0) { rlm@3: throw new getid3_exception('No Local File Header entries found'); rlm@3: } rlm@3: rlm@3: return true; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: private function ZIPparseLocalFileHeader() { rlm@3: rlm@3: // shortcut rlm@3: $getid3 = $this->getid3; rlm@3: rlm@3: $local_file_header['offset'] = ftell($getid3->fp); rlm@3: rlm@3: $zip_local_file_header = fread($getid3->fp, 30); rlm@3: rlm@3: $local_file_header['raw']['signature'] = getid3_lib::LittleEndian2Int(substr($zip_local_file_header, 0, 4)); rlm@3: rlm@3: // Invalid Local File Header Signature rlm@3: if ($local_file_header['raw']['signature'] != 0x04034B50) { rlm@3: fseek($getid3->fp, $local_file_header['offset'], SEEK_SET); // seek back to where filepointer originally was so it can be handled properly rlm@3: return false; rlm@3: } rlm@3: rlm@3: getid3_lib::ReadSequence('LittleEndian2Int', $local_file_header['raw'], $zip_local_file_header, 4, rlm@3: array ( rlm@3: 'extract_version' => 2, rlm@3: 'general_flags' => 2, rlm@3: 'compression_method' => 2, rlm@3: 'last_mod_file_time' => 2, rlm@3: 'last_mod_file_date' => 2, rlm@3: 'crc_32' => 2, rlm@3: 'compressed_size' => 2, rlm@3: 'uncompressed_size' => 2, rlm@3: 'filename_length' => 2, rlm@3: 'extra_field_length' => 2 rlm@3: ) rlm@3: ); rlm@3: rlm@3: $local_file_header['extract_version'] = sprintf('%1.1f', $local_file_header['raw']['extract_version'] / 10); rlm@3: $local_file_header['host_os'] = $this->ZIPversionOSLookup(($local_file_header['raw']['extract_version'] & 0xFF00) >> 8); rlm@3: $local_file_header['compression_method'] = $this->ZIPcompressionMethodLookup($local_file_header['raw']['compression_method']); rlm@3: $local_file_header['compressed_size'] = $local_file_header['raw']['compressed_size']; rlm@3: $local_file_header['uncompressed_size'] = $local_file_header['raw']['uncompressed_size']; rlm@3: $local_file_header['flags'] = $this->ZIPparseGeneralPurposeFlags($local_file_header['raw']['general_flags'], $local_file_header['raw']['compression_method']); rlm@3: $local_file_header['last_modified_timestamp'] = $this->DOStime2UNIXtime($local_file_header['raw']['last_mod_file_date'], $local_file_header['raw']['last_mod_file_time']); rlm@3: rlm@3: $filename_extra_field_length = $local_file_header['raw']['filename_length'] + $local_file_header['raw']['extra_field_length']; rlm@3: if ($filename_extra_field_length > 0) { rlm@3: $zip_local_file_header .= fread($getid3->fp, $filename_extra_field_length); rlm@3: rlm@3: if ($local_file_header['raw']['filename_length'] > 0) { rlm@3: $local_file_header['filename'] = substr($zip_local_file_header, 30, $local_file_header['raw']['filename_length']); rlm@3: } rlm@3: if ($local_file_header['raw']['extra_field_length'] > 0) { rlm@3: $local_file_header['raw']['extra_field_data'] = substr($zip_local_file_header, 30 + $local_file_header['raw']['filename_length'], $local_file_header['raw']['extra_field_length']); rlm@3: } rlm@3: } rlm@3: rlm@3: $local_file_header['data_offset'] = ftell($getid3->fp); rlm@3: fseek($getid3->fp, $local_file_header['raw']['compressed_size'], SEEK_CUR); rlm@3: rlm@3: if ($local_file_header['flags']['data_descriptor_used']) { rlm@3: $data_descriptor = fread($getid3->fp, 12); rlm@3: rlm@3: getid3_lib::ReadSequence('LittleEndian2Int', $local_file_header['data_descriptor'], $data_descriptor, 0, rlm@3: array ( rlm@3: 'crc_32' => 4, rlm@3: 'compressed_size' => 4, rlm@3: 'uncompressed_size' => 4 rlm@3: ) rlm@3: ); rlm@3: } rlm@3: rlm@3: return $local_file_header; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: private function ZIPparseCentralDirectory() { rlm@3: rlm@3: // shortcut rlm@3: $getid3 = $this->getid3; rlm@3: rlm@3: $central_directory['offset'] = ftell($getid3->fp); rlm@3: rlm@3: $zip_central_directory = fread($getid3->fp, 46); rlm@3: rlm@3: $central_directory['raw']['signature'] = getid3_lib::LittleEndian2Int(substr($zip_central_directory, 0, 4)); rlm@3: rlm@3: // invalid Central Directory Signature rlm@3: if ($central_directory['raw']['signature'] != 0x02014B50) { rlm@3: fseek($getid3->fp, $central_directory['offset'], SEEK_SET); // seek back to where filepointer originally was so it can be handled properly rlm@3: return false; rlm@3: } rlm@3: rlm@3: getid3_lib::ReadSequence('LittleEndian2Int', $central_directory['raw'], $zip_central_directory, 4, rlm@3: array ( rlm@3: 'create_version' => 2, rlm@3: 'extract_version' => 2, rlm@3: 'general_flags' => 2, rlm@3: 'compression_method' => 2, rlm@3: 'last_mod_file_time' => 2, rlm@3: 'last_mod_file_date' => 2, rlm@3: 'crc_32' => 4, rlm@3: 'compressed_size' => 4, rlm@3: 'uncompressed_size' => 4, rlm@3: 'filename_length' => 2, rlm@3: 'extra_field_length' => 2, rlm@3: 'file_comment_length' => 2, rlm@3: 'disk_number_start' => 2, rlm@3: 'internal_file_attrib' => 2, rlm@3: 'external_file_attrib' => 4, rlm@3: 'local_header_offset' => 4 rlm@3: ) rlm@3: ); rlm@3: rlm@3: $central_directory['entry_offset'] = $central_directory['raw']['local_header_offset']; rlm@3: $central_directory['create_version'] = sprintf('%1.1f', $central_directory['raw']['create_version'] / 10); rlm@3: $central_directory['extract_version'] = sprintf('%1.1f', $central_directory['raw']['extract_version'] / 10); rlm@3: $central_directory['host_os'] = $this->ZIPversionOSLookup(($central_directory['raw']['extract_version'] & 0xFF00) >> 8); rlm@3: $central_directory['compression_method'] = $this->ZIPcompressionMethodLookup($central_directory['raw']['compression_method']); rlm@3: $central_directory['compressed_size'] = $central_directory['raw']['compressed_size']; rlm@3: $central_directory['uncompressed_size'] = $central_directory['raw']['uncompressed_size']; rlm@3: $central_directory['flags'] = $this->ZIPparseGeneralPurposeFlags($central_directory['raw']['general_flags'], $central_directory['raw']['compression_method']); rlm@3: $central_directory['last_modified_timestamp'] = $this->DOStime2UNIXtime($central_directory['raw']['last_mod_file_date'], $central_directory['raw']['last_mod_file_time']); rlm@3: rlm@3: $filename_extra_field_comment_length = $central_directory['raw']['filename_length'] + $central_directory['raw']['extra_field_length'] + $central_directory['raw']['file_comment_length']; rlm@3: if ($filename_extra_field_comment_length > 0) { rlm@3: $filename_extra_field_comment = fread($getid3->fp, $filename_extra_field_comment_length); rlm@3: rlm@3: if ($central_directory['raw']['filename_length'] > 0) { rlm@3: $central_directory['filename']= substr($filename_extra_field_comment, 0, $central_directory['raw']['filename_length']); rlm@3: } rlm@3: if ($central_directory['raw']['extra_field_length'] > 0) { rlm@3: $central_directory['raw']['extra_field_data'] = substr($filename_extra_field_comment, $central_directory['raw']['filename_length'], $central_directory['raw']['extra_field_length']); rlm@3: } rlm@3: if ($central_directory['raw']['file_comment_length'] > 0) { rlm@3: $central_directory['file_comment'] = substr($filename_extra_field_comment, $central_directory['raw']['filename_length'] + $central_directory['raw']['extra_field_length'], $central_directory['raw']['file_comment_length']); rlm@3: } rlm@3: } rlm@3: rlm@3: return $central_directory; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: private function ZIPparseEndOfCentralDirectory() { rlm@3: rlm@3: // shortcut rlm@3: $getid3 = $this->getid3; rlm@3: rlm@3: $end_of_central_directory['offset'] = ftell($getid3->fp); rlm@3: rlm@3: $zip_end_of_central_directory = fread($getid3->fp, 22); rlm@3: rlm@3: $end_of_central_directory['signature'] = getid3_lib::LittleEndian2Int(substr($zip_end_of_central_directory, 0, 4)); rlm@3: rlm@3: // invalid End Of Central Directory Signature rlm@3: if ($end_of_central_directory['signature'] != 0x06054B50) { rlm@3: fseek($getid3->fp, $end_of_central_directory['offset'], SEEK_SET); // seek back to where filepointer originally was so it can be handled properly rlm@3: return false; rlm@3: } rlm@3: rlm@3: getid3_lib::ReadSequence('LittleEndian2Int', $end_of_central_directory, $zip_end_of_central_directory, 4, rlm@3: array ( rlm@3: 'disk_number_current' => 2, rlm@3: 'disk_number_start_directory' => 2, rlm@3: 'directory_entries_this_disk' => 2, rlm@3: 'directory_entries_total' => 2, rlm@3: 'directory_size' => 4, rlm@3: 'directory_offset' => 4, rlm@3: 'comment_length' => 2 rlm@3: ) rlm@3: ); rlm@3: rlm@3: if ($end_of_central_directory['comment_length'] > 0) { rlm@3: $end_of_central_directory['comment'] = fread($getid3->fp, $end_of_central_directory['comment_length']); rlm@3: } rlm@3: rlm@3: return $end_of_central_directory; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: public static function ZIPparseGeneralPurposeFlags($flag_bytes, $compression_method) { rlm@3: rlm@3: $parsed_flags['encrypted'] = (bool)($flag_bytes & 0x0001); rlm@3: rlm@3: switch ($compression_method) { rlm@3: case 6: rlm@3: $parsed_flags['dictionary_size'] = (($flag_bytes & 0x0002) ? 8192 : 4096); rlm@3: $parsed_flags['shannon_fano_trees'] = (($flag_bytes & 0x0004) ? 3 : 2); rlm@3: break; rlm@3: rlm@3: case 8: rlm@3: case 9: rlm@3: switch (($flag_bytes & 0x0006) >> 1) { rlm@3: case 0: rlm@3: $parsed_flags['compression_speed'] = 'normal'; rlm@3: break; rlm@3: case 1: rlm@3: $parsed_flags['compression_speed'] = 'maximum'; rlm@3: break; rlm@3: case 2: rlm@3: $parsed_flags['compression_speed'] = 'fast'; rlm@3: break; rlm@3: case 3: rlm@3: $parsed_flags['compression_speed'] = 'superfast'; rlm@3: break; rlm@3: } rlm@3: break; rlm@3: } rlm@3: $parsed_flags['data_descriptor_used'] = (bool)($flag_bytes & 0x0008); rlm@3: rlm@3: return $parsed_flags; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: public static function ZIPversionOSLookup($index) { rlm@3: rlm@3: static $lookup = array ( rlm@3: 0 => 'MS-DOS and OS/2 (FAT / VFAT / FAT32 file systems)', rlm@3: 1 => 'Amiga', rlm@3: 2 => 'OpenVMS', rlm@3: 3 => 'Unix', rlm@3: 4 => 'VM/CMS', rlm@3: 5 => 'Atari ST', rlm@3: 6 => 'OS/2 H.P.F.S.', rlm@3: 7 => 'Macintosh', rlm@3: 8 => 'Z-System', rlm@3: 9 => 'CP/M', rlm@3: 10 => 'Windows NTFS', rlm@3: 11 => 'MVS', rlm@3: 12 => 'VSE', rlm@3: 13 => 'Acorn Risc', rlm@3: 14 => 'VFAT', rlm@3: 15 => 'Alternate MVS', rlm@3: 16 => 'BeOS', rlm@3: 17 => 'Tandem' rlm@3: ); rlm@3: return (isset($lookup[$index]) ? $lookup[$index] : '[unknown]'); rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: public static function ZIPcompressionMethodLookup($index) { rlm@3: rlm@3: static $lookup = array ( rlm@3: 0 => 'store', rlm@3: 1 => 'shrink', rlm@3: 2 => 'reduce-1', rlm@3: 3 => 'reduce-2', rlm@3: 4 => 'reduce-3', rlm@3: 5 => 'reduce-4', rlm@3: 6 => 'implode', rlm@3: 7 => 'tokenize', rlm@3: 8 => 'deflate', rlm@3: 9 => 'deflate64', rlm@3: 10 => 'PKWARE Date Compression Library Imploding' rlm@3: ); rlm@3: return (isset($lookup[$index]) ? $lookup[$index] : '[unknown]'); rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: public static function DOStime2UNIXtime($DOSdate, $DOStime) { rlm@3: rlm@3: /* rlm@3: // wFatDate rlm@3: // Specifies the MS-DOS date. The date is a packed 16-bit value with the following format: rlm@3: // Bits Contents rlm@3: // 0-4 Day of the month (1-31) rlm@3: // 5-8 Month (1 = January, 2 = February, and so on) rlm@3: // 9-15 Year offset from 1980 (add 1980 to get actual year) rlm@3: rlm@3: $UNIXday = ($DOSdate & 0x001F); rlm@3: $UNIXmonth = (($DOSdate & 0x01E0) >> 5); rlm@3: $UNIXyear = (($DOSdate & 0xFE00) >> 9) + 1980; rlm@3: rlm@3: // wFatTime rlm@3: // Specifies the MS-DOS time. The time is a packed 16-bit value with the following format: rlm@3: // Bits Contents rlm@3: // 0-4 Second divided by 2 rlm@3: // 5-10 Minute (0-59) rlm@3: // 11-15 Hour (0-23 on a 24-hour clock) rlm@3: rlm@3: $UNIXsecond = ($DOStime & 0x001F) * 2; rlm@3: $UNIXminute = (($DOStime & 0x07E0) >> 5); rlm@3: $UNIXhour = (($DOStime & 0xF800) >> 11); rlm@3: rlm@3: return gmmktime($UNIXhour, $UNIXminute, $UNIXsecond, $UNIXmonth, $UNIXday, $UNIXyear); rlm@3: */ rlm@3: return gmmktime(($DOStime & 0xF800) >> 11, ($DOStime & 0x07E0) >> 5, ($DOStime & 0x001F) * 2, ($DOSdate & 0x01E0) >> 5, $DOSdate & 0x001F, (($DOSdate & 0xFE00) >> 9) + 1980); rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: public static function array_merge_clobber($array1, $array2) { rlm@3: rlm@3: // written by kcØhireability*com rlm@3: // taken from http://www.php.net/manual/en/function.array-merge-recursive.php rlm@3: rlm@3: if (!is_array($array1) || !is_array($array2)) { rlm@3: return false; rlm@3: } rlm@3: rlm@3: $newarray = $array1; rlm@3: foreach ($array2 as $key => $val) { rlm@3: if (is_array($val) && isset($newarray[$key]) && is_array($newarray[$key])) { rlm@3: $newarray[$key] = getid3_zip::array_merge_clobber($newarray[$key], $val); rlm@3: } else { rlm@3: $newarray[$key] = $val; rlm@3: } rlm@3: } rlm@3: return $newarray; rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: public static function CreateDeepArray($array_path, $separator, $value) { rlm@3: rlm@3: // assigns $value to a nested array path: rlm@3: // $foo = getid3_lib::CreateDeepArray('/path/to/my', '/', 'file.txt') rlm@3: // is the same as: rlm@3: // $foo = array ('path'=>array('to'=>'array('my'=>array('file.txt')))); rlm@3: // or rlm@3: // $foo['path']['to']['my'] = 'file.txt'; rlm@3: rlm@3: while ($array_path{0} == $separator) { rlm@3: $array_path = substr($array_path, 1); rlm@3: } rlm@3: if (($pos = strpos($array_path, $separator)) !== false) { rlm@3: return array (substr($array_path, 0, $pos) => getid3_zip::CreateDeepArray(substr($array_path, $pos + 1), $separator, $value)); rlm@3: } rlm@3: rlm@3: return array ($array_path => $value); rlm@3: } rlm@3: rlm@3: } rlm@3: rlm@3: rlm@3: ?>