Skip to content

Commit 7c15e6e

Browse files
committed
day 5 movie data analysis script
1 parent ecfa546 commit 7c15e6e

File tree

2 files changed

+47
-0
lines changed

2 files changed

+47
-0
lines changed
Binary file not shown.
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import csv
2+
from collections import defaultdict, namedtuple
3+
import os
4+
from urllib.request import urlretrieve
5+
6+
BASE_URL = 'http://projects.bobbelderbos.com/pcc/movies/'
7+
TMP = '/tmp'
8+
9+
fname = 'movie_metadata.csv'
10+
remote = os.path.join(BASE_URL, fname)
11+
local = os.path.join(TMP, fname)
12+
urlretrieve(remote, local)
13+
14+
MOVIE_DATA = local
15+
MIN_MOVIES = 4
16+
MIN_YEAR = 1960
17+
18+
Movie = namedtuple('Movie', 'title year score')
19+
20+
21+
def get_movies_by_director():
22+
with open(local) as f:
23+
reader = csv.DictReader(f)
24+
dict_movies = {}
25+
for row in reader:
26+
if int(row['title_year'] or 0) >= MIN_YEAR:
27+
movie_tuple = Movie(row['movie_title'].strip(),int(row['title_year'] or 0),float(row['imdb_score']))
28+
if row['director_name'] not in dict_movies:
29+
dict_movies[row['director_name']] = [movie_tuple]
30+
else:
31+
dict_movies[row['director_name']].append(movie_tuple)
32+
return dict_movies
33+
34+
35+
def calc_mean_score(movies):
36+
"""Helper method to calculate mean of list of Movie namedtuples,
37+
round the mean to 1 decimal place"""
38+
list_score = [movie.score for movie in movies]
39+
return round(sum(list_score)/len(list_score), 1)
40+
41+
42+
def get_average_scores(directors):
43+
"""Iterate through the directors dict (returned by get_movies_by_director),
44+
return a list of tuples (director, average_score) ordered by highest
45+
score in descending order. Only take directors into account
46+
with >= MIN_MOVIES"""
47+
return sorted([(director,calc_mean_score(movies)) for director, movies in directors.items() if len(movies) >= 4], key=lambda x: x[1], reverse=True)

0 commit comments

Comments
 (0)