Skip to content

Commit 706f52f

Browse files
committed
Bite 30. Movie data analysis
1 parent 4106944 commit 706f52f

File tree

1 file changed

+75
-0
lines changed

1 file changed

+75
-0
lines changed
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import csv
2+
from collections import defaultdict, namedtuple
3+
import os
4+
from urllib.request import urlretrieve
5+
import json
6+
7+
BASE_URL = 'http://projects.bobbelderbos.com/pcc/movies/'
8+
TMP = '/tmp'
9+
10+
fname = 'movie_metadata.csv'
11+
remote = os.path.join(BASE_URL, fname)
12+
local = os.path.join(TMP, fname)
13+
urlretrieve(remote, local)
14+
15+
MOVIE_DATA = local
16+
MIN_MOVIES = 4
17+
MIN_YEAR = 1960
18+
19+
Movie = namedtuple('Movie', 'title year score')
20+
21+
22+
def get_movies_by_director():
23+
"""Extracts all movies from csv and stores them in a dict,
24+
where keys are directors, and values are a list of movies,
25+
use the defined Movie namedtuple"""
26+
with open(TMP + '/' + fname,newline='') as csv_file:
27+
csv_reader = csv.DictReader(csv_file)
28+
directorWithMovies = defaultdict(list)
29+
for row in csv_reader:
30+
Movie = namedtuple("Movie","title , year , score")
31+
if row["title_year"] != "" and int(row["title_year"]) > MIN_YEAR:
32+
m = Movie(title = row["movie_title"],year = row["title_year"],score = row["imdb_score"])
33+
directorWithMovies[row["director_name"]].append(m)
34+
#print(json.dumps(directorWithMovies, indent=2))
35+
return directorWithMovies
36+
pass
37+
38+
39+
def calc_mean_score(movies):
40+
"""Helper method to calculate mean of list of Movie namedtuples,
41+
round the mean to 1 decimal place"""
42+
imdb_score_list = []
43+
for i in movies :
44+
imdb_score_list.append(float(i.score))
45+
#print(imdb_score_list)
46+
mean_imdb_score = sum(imdb_score_list) / len(imdb_score_list)
47+
#print(round(mean_imdb_score,1))
48+
return round(mean_imdb_score,1)
49+
pass
50+
51+
52+
def get_average_scores(directors):
53+
"""Iterate through the directors dict (returned by get_movies_by_director),
54+
return a list of tuples (director, average_score) ordered by highest
55+
score in descending order. Only take directors into account
56+
with >= MIN_MOVIES"""
57+
average_scores = []
58+
for key , value in directors.items():
59+
director = namedtuple("director", "name, score")
60+
if len(directors[key]) >= MIN_MOVIES:
61+
d = director(name = key , score = calc_mean_score(value))
62+
average_scores.append(d)
63+
average_score1 = sorted(average_scores,key=lambda x: x.score,reverse=True)
64+
print(average_score1)
65+
pass
66+
67+
director_movies = get_movies_by_director()
68+
69+
70+
#movies_sergio = director_movies['Sergio Leone']
71+
#print(movies_sergio)
72+
73+
#calc_mean_score(movies_sergio)
74+
75+
get_average_scores(director_movies)

0 commit comments

Comments
 (0)