Use at own risk. Programs haven't been thoroughly tested.
Class: IMDBInfo
Downloads movie information from IMDB.com This class contains a function, getinfo, that can lookup movie information from IMDB. It returns an array containing all the usefull information thath can be found about a specified movie, and also, it returns the location of the cover image, in the key: 'coverpic' This script can also be used in php-cli mode, so you can run it in the background to index your movie collection for instance, and to download all the coverart. This scripts requires the curl extension. This was supposed to be a PEAR package, however it got rejected because there is no official IMDB API so IMDB could take this as an offense. So please read the IMDB Terms Of Use before using this class.
Info
@version 0.5
@link http://kevin.vanzonneveld.net
@function imdbsearch finds the imdb url of a specific movie
@function imdbparse downloads the url of a specific movie, and returns an array with all the usefull information
@function getinfo combines imdbsearch and imdbparse in one slim function.
Example
$IMDBInfo = new Services_IMDB; $arr = $IMDBInfo->getMovie("Corpse Bride"); echo "imdb_url: ".$arr[imdb_url]."\n"; echo "coverpic: ".$arr[coverpic]."\n"; echo "user_rating: ".$arr[user_rating]."\n"; echo "tagline: ".$arr[tagline]."\n";
Outputs
imdb_url: http://us.imdb.com/title/tt0121164/
coverpic: http://ia.imdb.com/media/imdb/01/I/33/72/00/10m.jpg
user_rating: 7.5/10 (19,284 votes)
tagline:: Loving You Is Like Loving The Dead
Source Code
download source<? /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: */ /** * PEAR::Services_IMDB * * Services_IMDB searches for given movies and simplifies retrieving movie * information like: imdb_url, coverpic, user rating, cast, director, * tagline, plot outline, genre, playtime, etc. * * * * PHP versions 4 and 5 * * <LICENSE> * Copyright (c) 2005-2007, Kevin van Zonneveld * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * o Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * o Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * o Neither the name of the software nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * </LICENSE> * * @category Web Services * @package Services_IMDB * @author kvz <kevin@vanzonnveld.net> * @copyright 2005-2007 Kevin van Zonneveld * @license http://www.opensource.org/licenses/bsd-license.php BSD License * @version CVS: $Id: IMDB.php,v 0.1.0 2007/04/16 10:36:05 eru Exp $ * @link http://pear.php.net/package/Services_IMDB * @filesource */ require_once 'PEAR.php'; require_once 'HTTP/Request.php'; define('SERVICES_IMDB_VERSION', '0.1.0'); define('SERVICES_IMDB_USER_AGENT', 'Services_IMDB/'.SERVICES_IMDB_VERSION); /** * Class for accessing and retrieving information from IMDB * which containts a lot of information about movies. * * @category Web Services * @package Services_IMDB * @author Kevin van Zonneveld <kevin@vanzonneveld.net> * @uses PEAR * @uses HTTP_Request */ class Services_IMDB { /** * Allowed datakeys to gather from a moviepage, other parsed information will considered garbage * * @access private * @var array */ private $_allowed_keys = array( 'directed_by', 'writing_credits', 'genre', 'tagline', 'plot_outline', 'user_rating', 'cast_overview_first_billed_only', 'also_known_as', 'mpaa', 'runtime', 'country', 'language', 'color', 'sound_mix', 'certification', 'trivia', 'goofs', 'quotes', 'awards' ); /** * User-Agent for HTTP access. * * @access private * @var string */ var $_userAgent; /** * Constructor * * @access public */ function Services_IMDB() { $this->_userAgent = SERVICES_IMDB_USER_AGENT; } /** * Retrieves the version number of this class. * * @access public * @return string */ function getVersion() { return SERVICES_IMDB_VERSION; } /** * Retrieves the User-Agent name. * * @access public * @return string */ function getUserAgent() { return $this->_userAgent; } /** * Sets the User-Agent name. * * @access public * @return bool */ function setUserAgent($ua = null) { if (is_null($ua)) { $this->_userAgent = SERVICES_IMDB_USER_AGENT; return false; } else { $this->_userAgent = $ua; return true; } } /** * Download any URI. * * @access private * @param string $url URI of IMDB URI to download * @return array */ function _download( $url ) { $req = new HTTP_Request($url); $req->addHeader('User-Agent', $this->_userAgent); if (PEAR::isError($req->sendRequest())) { return null; } else if (($http_code = $req->getResponseCode()) != 200) { return PEAR::raiseError('IMDB: return HTTP '.$http_code); } elseif (($data = $req->getResponseBody()) == '') { return PEAR::raiseError('IMDB: empty HTTP response'); } else{ return array($http_code, $url, $data); } } /** * Search for movie page url. * * @access public * @param string $moviename Name of the movie to search for * @return string */ function searchIMDB( $moviename ) { // forge search url & download $url = 'http://us.imdb.com/find?q='.urlencode( $moviename ); list( $httpcode, $lasturl, $body ) = $this->_download( $url ); if( $httpcode != 200 ) { return PEAR::raiseError('Cannot fetch '.$moviename.' ('.$lasturl.'), http code = '.$httpcode.''); } // Check if we landed on an overview of possible movies, or that we hit the moviepage directly if( strpos( strtolower( $body ),'usercomments' ) === false || strpos( strtolower( $body ),'genre' ) === false ) { // cleanup & drill down to: 'following results:' $buf = strip_tags( utf8_decode( html_entity_decode( $body,ENT_QUOTES ) ),'<a>,<b>' ); $buf = substr( $buf,strpos( strtolower( $buf ),strtolower( 'following results:' ) ) +strlen( 'following results:' ),strlen( $buf ) ); // get the first link there & add the imdb prefix to the url preg_match( '/<a.*href=\'([^\']+)/i', $buf, $match); $url = 'http://us.imdb.com'.$match[1]; // download it once, just to find out if we will be redirected list( $httpcode, $lasturl, $body ) = $this->_download( $url ); if( $httpcode != 200 ) { return PEAR::raiseError('Cannot fetch '.$moviename.' ('.$lasturl.'), http code = '.$httpcode.''); } } // add a slash suffix if necessary if( substr( $lasturl,-1,1 ) !='/' ){ $lasturl.='/'; } return $lasturl; } /** * Parse a movie url and return all the interesting information. * * @access public * @param string $movieurl IMDB URI to start parsing * @return array */ function parseIMDB( $movieurl ) { $info = array(); $match = null; $matches = null; // download the moviepage list( $httpcode, $lasturl, $body ) = $this->_download( $movieurl ); if( $httpcode != 200 ) { return PEAR::raiseError('Cannot fetch '.$moviename.' ('.$lasturl.'), http code = '.$httpcode.''); } // save the coverpic URI before stripping all attributes preg_match( '/<div class=\'photo\'>.*src=\'([^\']+?)\'/isU', $body, $match); $info['coverpic'] = $match[1]; $info['imdb_url'] = $lasturl; // throw away everything before the H1 tag $body = preg_replace( '/(.*)(<h1)/isU', '$2', utf8_decode(html_entity_decode($body,ENT_QUOTES)) ); // releave all html tags of their attributes $body = preg_replace( '/(<\w+)(\s+.*)(>)/isU', '$1$3', $body ); // save the imdb title of this movie preg_match( '/<h1>(.*)<\/h1>/isU', $body, $match); $info['imdbtitle'] = trim(strip_tags($match[1])); // cleanup tiny discrepancies caused by imdb layout changes, a little dirty currently $body = str_replace( 'h5>','b>', $body ); $body = str_replace( '/div>','br>', $body ); $body = str_replace( 'br/>','br>', $body ); $body = preg_replace( '/<\/b>\s*<br>/isU', '</b>', $body ); $body = str_replace( '<img><small>(','<b>', $body ); $body = str_replace( ')</small><div>','</b>', $body ); // parse everything into raw arrays preg_match_all( '/<b>\s*(.+)\s*<\/b>\s*(.+)\s*<br>\n/isU', $body, $matches ); // build a clean array foreach($matches[1] as $i=>$key_raw){ // general setup & cleanup of keys $key = strtolower(trim(strip_tags($key_raw))); $key = str_replace(' ','_',$key); $key = str_replace(array(':',','),'',$key); // skip garbage keys that are not in the allowed list if(!in_array($key ,$this->_allowed_keys))continue; // general setup & cleanup of values $val_raw = $matches[2][$i]; $val_raw = str_replace(array('(more)','(view trailer)'),'',$val_raw); $val = trim(strip_tags($val_raw)); $val = preg_replace( '/(\s)*\s/isU', ' ', $val ); // fine-tuning for some specific keys switch(strtolower($key)){ case 'cast_overview_first_billed_only': $key = 'cast_overview'; $val = strip_tags($val_raw,'<td>'); $val = str_replace('</td>','',$val); $val = str_replace('<td>','|',$val); $val = str_replace(' ... |','',$val); $c = explode('||',$val); foreach($c as $l){ $p = explode('|',$l); if($p[0] && $p[1]){ $v[trim($p[0])]=trim($p[1]); } } $val = $v; break; } // save final data if($key && $val){ $info[$key] = $val; } } return $info; } /** * Search IMDB for a movie, and parse the moviepage into a useful array * * @access public * @param string $moviename Name of the movie to search for * @return array */ function getMovie( $moviename = '' ) { if( $url = $this->searchIMDB( $moviename ) ) { if( $info = $this->parseIMDB( $url ) ) { return $info; } else { return PEAR::raiseError('unable to parse valid movieinformation for: '.$moviename); } } else { return PEAR::raiseError('unable to retrieve a valid url for: '.$moviename); } } } ?>
No comments. Be the first!