summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFélix Sipma <gueux@gueux.org>2012-01-13 11:19:34 +0100
committerFélix Sipma <gueux@gueux.org>2012-01-13 11:19:34 +0100
commitd0502d684dab6da0d4cf5756d396be7c483263e7 (patch)
treeb9f82256b9ab15b5136151ab448930625eda0a5a
add imdb-ratings.py
-rwxr-xr-ximdb-ratings.py245
1 files changed, 245 insertions, 0 deletions
diff --git a/imdb-ratings.py b/imdb-ratings.py
new file mode 100755
index 0000000..cf3d8e9
--- /dev/null
+++ b/imdb-ratings.py
@@ -0,0 +1,245 @@
+#!/usr/bin/env python
+"""imdb-ratings.py
+Search IMDb to find the top movies that are not in VideoDB.
+Two modes:
+.TOP250 mode: use IMDb top 250 movies.
+.RANKFILE mode: use a rankings.list file (can be found on
+http://www.imdb.com/interfaces#plain ), currently it does not use VideoDB.
+"""
+
+################# CONFIGURATION #################################
+# The name of the configuration file.
+CONF_FILE = ''
+
+# Use TOP250 mode by default
+TOP250 = 0
+# Use RANKFILE mode by default
+RANK = 0
+# Use VERBOSE mode by default
+VERBOSE = 0
+# Minimum rank
+MINRANK = 8
+# Minimum number of votes
+MINVOTES = 1000
+
+# VideoDB database parameters
+confset = { # Parameters to connect to the database.
+ 'db_server': '',
+ 'db_password': '',
+ 'db_database': '',
+ 'db_user': '',
+ 'db_charset': 'utf8'}
+
+#################################################################
+
+import os, sys, getopt, urllib, ConfigParser, re
+from types import UnicodeType, ListType, TupleType
+
+try:
+ import MySQLdb
+ import _mysql_exceptions
+except ImportError:
+ print 'Unable to import the MySQLdb module!'
+ sys.exit(1)
+
+try:
+ from imdb import IMDb, IMDbError
+except ImportError:
+ print 'Unable to import the IMDbPY package!'
+ sys.exit(1)
+
+
+help = """imdb-ratings.py usage:
+ %s [OPTION]...
+
+ -c, --config=file select an alternate configuration file.
+ -t, --top250 print IMDb top 250 movies which are not in the VideoDB database.
+ -r, --rankfile=file print the movies of rankfile which rank and number of votes are superior to the values specified respectively with -n and -v
+ -n minimum rank (default 8)
+ -v minimum number of votes (default 1000)
+ --verbose verbose mode.
+ -h, --help show this help and exit.
+""" % sys.argv[0]
+
+# Set default out encoding
+out_encoding = sys.stdout.encoding or sys.getdefaultencoding()
+
+# Manage arguments list.
+try:
+ optlist, args = getopt.getopt(sys.argv[1:], 'c:tr:n:v:', ['help','top250','verbose','rankfile=','config='])
+except getopt.error, e:
+ print 'Troubles with arguments.'
+ print help
+ sys.exit(2)
+
+for opt in optlist:
+ if opt[0] in ('-c','--config'):
+ CONF_FILE = opt[1]
+ elif opt[0] in ('-t','--top250'):
+ TOP250 = 1
+ elif opt[0] in ('-r','--rankfile'):
+ RANK = 1
+ RANKFILE = opt[1]
+ elif opt[0] == '-n':
+ MINRANK = opt[1]
+ elif opt[0] == '-v':
+ MINVOTES = opt[1]
+ elif opt[0] in ('-h', '--help'):
+ print help
+ sys.exit(0)
+ elif opt[0] == '--verbose':
+ VERBOSE = 1
+
+# Everything else on the command line.
+args = args[1:]
+
+# Read rankfile
+if RANK:
+ rankfile = open(RANKFILE, 'r')
+
+# Read and parse the configuration file.
+conffile = ConfigParser.ConfigParser()
+
+if not CONF_FILE:
+ # ./vdbpyrc
+ CONF_FILE = ['vdbpyrc']
+ # /etc/vdbpyrc
+ CONF_FILE.append(os.path.join(os.sep, 'etc', 'vdbpyrc'))
+ if os.name != 'posix':
+ # C:\\Python24\etc\imdbpyweb.conf
+ CONF_FILE.append(os.path.join(sys.prefix, 'etc', 'vdbpyrc'))
+ # ~/.vdbpyrc
+ CONF_FILE.append(os.path.join(os.path.expanduser('~'), '.vdbpyrc'))
+
+try: conffile.read(CONF_FILE)
+except ConfigParser.Error, e:
+ print 'Unable to parse configuration file "%s".' % CONF_FILE
+ print str(e)
+ sys.exit(3)
+
+if not conffile.has_section('config'):
+ print 'Section [config] not present in the configuration file.'
+ sys.exit(3)
+
+for key in ('db_server', 'db_user', 'db_password', 'db_database'):
+ if not conffile.has_option('config', key):
+ print 'Required option "%s" not present in the [config] section.'
+ sys.exit(3)
+ confset[key] = conffile.get('config', key)
+
+if conffile.has_option('config', 'vdb_user'):
+ VDB_USER = conffile.get('config', 'vdb_user') or None
+
+if conffile.has_option('config', 'db_prefix'):
+ VDB_PREFIX = conffile.get('config', 'db_prefix') or None
+
+
+# ------- CONNECT TO THE DATABASE.
+
+def _(s):
+ if isinstance(s, (ListType, TupleType)):
+ s = [_(i) for i in s]
+ return s
+
+if VERBOSE:
+ print 'Connecting to the VideoDB database... ',
+ sys.stdout.flush()
+try:
+ db = MySQLdb.connect(db=confset['db_database'], host=confset['db_server'],
+ user=confset['db_user'], passwd=confset['db_password'],
+ charset=confset['db_charset'])
+ curs = db.cursor()
+except MySQLdb.Error, e:
+ print '\nUnable to connect to the database.'
+ print str(e)
+ sys.exit(4)
+##try: curs.execute('SET NAMES "latin1";')
+##except _mysql_exceptions.MySQLError: pass
+if VERBOSE:
+ print 'done!'
+
+# Collect information about the videodata table.
+if VERBOSE:
+ print 'Collecting information about the VideoDB database... ',
+ sys.stdout.flush()
+
+curs.execute('SELECT imdbID FROM ' + VDB_PREFIX + 'videodata;')
+vdb_movie_ids = [col[0].replace('imdb:','') for col in curs.fetchall()]
+
+
+############### Top250 mode
+# CONNECT TO IMDB AND GET MOVIE INFORMATION.
+try:
+ i = IMDb()
+except IMDbError, e:
+ print '\nError instancing the IMDb class.'
+ print str(e)
+ sys.exit(6)
+
+if VERBOSE:
+ print 'Fetching data from IMDb... ',
+ sys.stdout.flush()
+
+try:
+ top250 = i.get_top250_movies()
+except IMDbError, e:
+ print '\nError getting top 250 movies data from IMDb.'
+ print str(e)
+ sys.exit(6)
+
+if VERBOSE:
+ print 'done!'
+ sys.stdout.flush()
+
+# Select movies that are not in the VideoDB database
+top250 = filter(lambda x: x.movieID not in set(vdb_movie_ids), top250)
+
+if TOP250:
+ if VERBOSE:
+ print 'Top 250 movies that are not in VideoDB (with a minimum rank of %s and a minimum number of votes of %s:' % (MINVOTES, MINRANK)
+ for movie in top250:
+ if float(movie.get('rating')) > float(MINRANK):
+ if float(movie.get('votes')) > float(MINVOTES):
+ outl = u'%s\t%s\t%s\t%s' % (movie.get('rating'), movie.get('votes'),
+ movie.movieID, movie['long imdb title'])
+ print outl.encode(out_encoding, 'replace')
+
+
+############# RANK mode
+if RANK:
+ linenumber = 0
+ # We skip the beginning of the rankfile
+ for line in rankfile:
+ linenumber += 1
+ if 'MOVIE RATINGS REPORT' in line:
+ break
+
+ for line in rankfile:
+ linenumber += 1
+ # Beginning of the list of movies.
+ if 'New Distribution Votes Rank Title' in line:
+ break
+
+ for line in rankfile:
+ linenumber += 1
+ if not line.rstrip() == '':
+ # End of the list of movies.
+ if '-----------------------------------------------------------------------------' in line:
+ break
+ else:
+ try:
+ m_new, m_distrib, m_votes, m_rank, m_title = re.split('\s\s+', line.rstrip())
+ except:
+ sys.stderr.write('\nError on line number %s: %s' % (linenumber,line))
+ sys.exit(2)
+
+ if float(m_rank) > float(MINRANK):
+ if float(m_votes) > float(MINVOTES):
+ print "%s: %s: %s" % (m_rank, m_votes, m_title)
+
+
+ rankfile.close()
+
+
+sys.exit(0)
+