rlm@3: | rlm@3: // | Allan Hansen | rlm@3: // +----------------------------------------------------------------------+ rlm@3: // | module.lib.data-hash.php | rlm@3: // | getID3() library file. | rlm@3: // | dependencies: NONE. | rlm@3: // +----------------------------------------------------------------------+ rlm@3: // rlm@3: // $Id: module.lib.data_hash.php,v 1.5 2006/12/03 19:28:18 ah Exp $ rlm@3: rlm@3: rlm@3: rlm@3: class getid3_lib_data_hash rlm@3: { rlm@3: rlm@3: private $getid3; rlm@3: rlm@3: rlm@3: // constructer - calculate md5/sha1 data rlm@3: public function __construct(getID3 $getid3, $algorithm) { rlm@3: rlm@3: $this->getid3 = $getid3; rlm@3: rlm@3: // Check algorithm rlm@3: if (!preg_match('/^(md5|sha1)$/', $algorithm)) { rlm@3: throw new getid3_exception('Unsupported algorithm, "'.$algorithm.'", in GetHashdata()'); rlm@3: } rlm@3: rlm@3: rlm@3: //// Handle ogg vorbis files rlm@3: rlm@3: if ((@$getid3->info['fileformat'] == 'ogg') && (@$getid3->info['audio']['dataformat'] == 'vorbis')) { rlm@3: rlm@3: // We cannot get an identical md5_data value for Ogg files where the comments rlm@3: // span more than 1 Ogg page (compared to the same audio data with smaller rlm@3: // comments) using the normal getID3() method of MD5'ing the data between the rlm@3: // end of the comments and the end of the file (minus any trailing tags), rlm@3: // because the page sequence numbers of the pages that the audio data is on rlm@3: // do not match. Under normal circumstances, where comments are smaller than rlm@3: // the nominal 4-8kB page size, then this is not a problem, but if there are rlm@3: // very large comments, the only way around it is to strip off the comment rlm@3: // tags with vorbiscomment and MD5 that file. rlm@3: // This procedure must be applied to ALL Ogg files, not just the ones with rlm@3: // comments larger than 1 page, because the below method simply MD5's the rlm@3: // whole file with the comments stripped, not just the portion after the rlm@3: // comments block (which is the standard getID3() method. rlm@3: rlm@3: // The above-mentioned problem of comments spanning multiple pages and changing rlm@3: // page sequence numbers likely happens for OggSpeex and OggFLAC as well, but rlm@3: // currently vorbiscomment only works on OggVorbis files. rlm@3: rlm@3: if ((bool)ini_get('safe_mode')) { rlm@3: throw new getid3_exception('PHP running in Safe Mode - cannot make system call to vorbiscomment[.exe] needed for '.$algorithm.'_data.'); rlm@3: } rlm@3: rlm@3: if (!preg_match('/^Vorbiscomment /', `vorbiscomment --version 2>&1`)) { rlm@3: throw new getid3_exception('vorbiscomment[.exe] binary not found in path. UNIX: typically /usr/bin. Windows: typically c:\windows\system32.'); rlm@3: } rlm@3: rlm@3: // Prevent user from aborting script rlm@3: $old_abort = ignore_user_abort(true); rlm@3: rlm@3: // Create empty file rlm@3: $empty = tempnam('*', 'getID3'); rlm@3: touch($empty); rlm@3: rlm@3: // Use vorbiscomment to make temp file without comments rlm@3: $temp = tempnam('*', 'getID3'); rlm@3: rlm@3: $command_line = 'vorbiscomment -w -c '.escapeshellarg($empty).' '.escapeshellarg(realpath($getid3->filename)).' '.escapeshellarg($temp).' 2>&1'; rlm@3: rlm@3: // Error from vorbiscomment rlm@3: if ($vorbis_comment_error = `$command_line`) { rlm@3: throw new getid3_exception('System call to vorbiscomment[.exe] failed.'); rlm@3: } rlm@3: rlm@3: // Get hash of newly created file rlm@3: $hash_function = $algorithm . '_file'; rlm@3: $getid3->info[$algorithm.'_data'] = $hash_function($temp); rlm@3: rlm@3: // Clean up rlm@3: unlink($empty); rlm@3: unlink($temp); rlm@3: rlm@3: // Reset abort setting rlm@3: ignore_user_abort($old_abort); rlm@3: rlm@3: // Return success rlm@3: return true; rlm@3: } rlm@3: rlm@3: //// Handle other file formats rlm@3: rlm@3: // Get hash from part of file rlm@3: if (@$getid3->info['avdataoffset'] || (@$getid3->info['avdataend'] && @$getid3->info['avdataend'] < $getid3->info['filesize'])) { rlm@3: rlm@3: if ((bool)ini_get('safe_mode')) { rlm@3: $getid3->warning('PHP running in Safe Mode - backtick operator not available, using slower non-system-call '.$algorithm.' algorithm.'); rlm@3: $hash_function = 'hash_file_partial_safe_mode'; rlm@3: } rlm@3: else { rlm@3: $hash_function = 'hash_file_partial'; rlm@3: } rlm@3: rlm@3: $getid3->info[$algorithm.'_data'] = $this->$hash_function($getid3->filename, $getid3->info['avdataoffset'], $getid3->info['avdataend'], $algorithm); rlm@3: } rlm@3: rlm@3: // Get hash from whole file - use built-in md5_file() and sha1_file() rlm@3: else { rlm@3: $hash_function = $algorithm . '_file'; rlm@3: $getid3->info[$algorithm.'_data'] = $hash_function($getid3->filename); rlm@3: } rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // Return md5/sha1sum for a file from starting position to absolute end position rlm@3: // Using windows system call rlm@3: private function hash_file_partial($file, $offset, $end, $algorithm) { rlm@3: rlm@3: // It seems that sha1sum.exe for Windows only works on physical files, does not accept piped data rlm@3: // Fall back to create-temp-file method: rlm@3: if ($algorithm == 'sha1' && strtoupper(substr(PHP_OS, 0, 3)) == 'WIN') { rlm@3: return $this->hash_file_partial_safe_mode($file, $offset, $end, $algorithm); rlm@3: } rlm@3: rlm@3: // Check for presence of binaries and revert to safe mode if not found rlm@3: if (!`head --version`) { rlm@3: return $this->hash_file_partial_safe_mode($file, $offset, $end, $algorithm); rlm@3: } rlm@3: rlm@3: if (!`tail --version`) { rlm@3: return $this->hash_file_partial_safe_mode($file, $offset, $end, $algorithm); rlm@3: } rlm@3: rlm@3: if (!`${algorithm}sum --version`) { rlm@3: return $this->hash_file_partial_safe_mode($file, $offset, $end, $algorithm); rlm@3: } rlm@3: rlm@3: $size = $end - $offset; rlm@3: $command_line = 'head -c'.$end.' '.escapeshellarg(realpath($file)).' | tail -c'.$size.' | '.$algorithm.'sum'; rlm@3: return substr(`$command_line`, 0, $algorithm == 'md5' ? 32 : 40); rlm@3: } rlm@3: rlm@3: rlm@3: rlm@3: // Return md5/sha1sum for a file from starting position to absolute end position rlm@3: // Using slow safe_mode temp file rlm@3: private function hash_file_partial_safe_mode($file, $offset, $end, $algorithm) { rlm@3: rlm@3: // Attempt to create a temporary file in the system temp directory - invalid dirname should force to system temp dir rlm@3: if (($data_filename = tempnam('*', 'getID3')) === false) { rlm@3: throw new getid3_exception('Unable to create temporary file.'); rlm@3: } rlm@3: rlm@3: // Init rlm@3: $result = false; rlm@3: rlm@3: // Copy parts of file rlm@3: if ($fp = @fopen($file, 'rb')) { rlm@3: rlm@3: if ($fp_data = @fopen($data_filename, 'wb')) { rlm@3: rlm@3: fseek($fp, $offset, SEEK_SET); rlm@3: $bytes_left_to_write = $end - $offset; rlm@3: while (($bytes_left_to_write > 0) && ($buffer = fread($fp, getid3::FREAD_BUFFER_SIZE))) { rlm@3: $bytes_written = fwrite($fp_data, $buffer, $bytes_left_to_write); rlm@3: $bytes_left_to_write -= $bytes_written; rlm@3: } rlm@3: fclose($fp_data); rlm@3: $hash_function = $algorithm . '_file'; rlm@3: $result = $hash_function($data_filename); rlm@3: rlm@3: } rlm@3: fclose($fp); rlm@3: } rlm@3: unlink($data_filename); rlm@3: return $result; rlm@3: } rlm@3: rlm@3: } rlm@3: rlm@3: ?>