|
| 1 | +import csv |
| 2 | +from collections import defaultdict, namedtuple |
| 3 | +import os |
| 4 | +from urllib.request import urlretrieve |
| 5 | + |
| 6 | +BASE_URL = 'http://projects.bobbelderbos.com/pcc/movies/' |
| 7 | +TMP = '/tmp' |
| 8 | + |
| 9 | +fname = 'movie_metadata.csv' |
| 10 | +remote = os.path.join(BASE_URL, fname) |
| 11 | +local = os.path.join(TMP, fname) |
| 12 | +urlretrieve(remote, local) |
| 13 | + |
| 14 | +MOVIE_DATA = local |
| 15 | +MIN_MOVIES = 4 |
| 16 | +MIN_YEAR = 1960 |
| 17 | + |
| 18 | +Movie = namedtuple('Movie', 'title year score') |
| 19 | + |
| 20 | + |
| 21 | +def get_movies_by_director(): |
| 22 | + with open(local) as f: |
| 23 | + reader = csv.DictReader(f) |
| 24 | + dict_movies = {} |
| 25 | + for row in reader: |
| 26 | + if int(row['title_year'] or 0) >= MIN_YEAR: |
| 27 | + movie_tuple = Movie(row['movie_title'].strip(),int(row['title_year'] or 0),float(row['imdb_score'])) |
| 28 | + if row['director_name'] not in dict_movies: |
| 29 | + dict_movies[row['director_name']] = [movie_tuple] |
| 30 | + else: |
| 31 | + dict_movies[row['director_name']].append(movie_tuple) |
| 32 | + return dict_movies |
| 33 | + |
| 34 | + |
| 35 | +def calc_mean_score(movies): |
| 36 | + """Helper method to calculate mean of list of Movie namedtuples, |
| 37 | + round the mean to 1 decimal place""" |
| 38 | + list_score = [movie.score for movie in movies] |
| 39 | + return round(sum(list_score)/len(list_score), 1) |
| 40 | + |
| 41 | + |
| 42 | +def get_average_scores(directors): |
| 43 | + """Iterate through the directors dict (returned by get_movies_by_director), |
| 44 | + return a list of tuples (director, average_score) ordered by highest |
| 45 | + score in descending order. Only take directors into account |
| 46 | + with >= MIN_MOVIES""" |
| 47 | + return sorted([(director,calc_mean_score(movies)) for director, movies in directors.items() if len(movies) >= 4], key=lambda x: x[1], reverse=True) |
0 commit comments