Skip to content

Commit 2ebacb9

Browse files
authored
Merge pull request #2 from kokesak/chapter3
Complete day5/6 challenge
2 parents 1dc9aa1 + b3ffd18 commit 2ebacb9

File tree

2 files changed

+89
-0
lines changed

2 files changed

+89
-0
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__author__ = 'Martin Litwora'
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
import csv
2+
from math import nan
3+
from numpy import NaN
4+
import pandas as pd
5+
from collections import defaultdict, namedtuple, Counter
6+
7+
MOVIE_DATA = 'movie_metadata.csv'
8+
NUM_TOP_DIRECTORS = 20
9+
MIN_MOVIES = 4
10+
MIN_YEAR = 1960
11+
12+
Movie = namedtuple('Movie', 'title year score')
13+
14+
15+
def get_movies_by_director():
16+
'''Extracts all movies from csv and stores them in a dictionary
17+
where keys are directors, and values is a list of movies (named tuples)'''
18+
csv_url = 'https://raw.githubusercontent.com/sundeepblue/movie_rating_prediction/master/movie_metadata.csv'
19+
data = pd.read_csv(csv_url)
20+
21+
directors = defaultdict(list)
22+
for row in data.itertuples():
23+
if(type(row.director_name) != str):
24+
continue
25+
m = Movie(title=row.movie_title.replace('\xa0', ''), year=row.title_year, score=row.imdb_score)
26+
directors[row.director_name].append(m)
27+
return directors
28+
29+
30+
def get_average_scores(directors):
31+
'''Filter directors with < MIN_MOVIES and calculate averge score'''
32+
cnt = Counter()
33+
34+
for director, movies in directors.items():
35+
cnt[director] += len(movies)
36+
for element in cnt:
37+
if cnt[element] < MIN_MOVIES:
38+
del(directors[element])
39+
else:
40+
avg_score = _calc_mean(directors[element])
41+
directors[element].append(avg_score)
42+
return directors
43+
44+
45+
def _calc_mean(movies):
46+
'''Helper method to calculate mean of list of Movie namedtuples'''
47+
score = 0.0
48+
for movie in movies:
49+
score += movie.score
50+
return score / len(movies)
51+
52+
53+
def print_results(directors):
54+
'''Print directors ordered by highest average rating. For each director
55+
print his/her movies also ordered by highest rated movie.
56+
See http://pybit.es/codechallenge13.html for example output'''
57+
fmt_director_entry = '{counter:02}. {director:<52} {avg:.1f}'
58+
fmt_movie_entry = '{year:.0f}] {title:<50} {score}'
59+
sep_line = '-' * 60
60+
61+
cnt = Counter()
62+
for director, movies in directors.items():
63+
cnt[director] = movies[-1]
64+
directors_to_print = cnt.most_common(NUM_TOP_DIRECTORS)
65+
66+
i = 1
67+
for dir in directors_to_print:
68+
print(fmt_director_entry.format(counter=i, director=dir[0], avg=dir[1]))
69+
for movie in directors[dir[0]]:
70+
if(type(movie) == float):
71+
continue
72+
print(fmt_movie_entry.format(year=movie.year, title=movie.title, score=movie.score))
73+
print(sep_line)
74+
i += 1
75+
76+
77+
def main():
78+
'''This is a template, feel free to structure your code differently.
79+
We wrote some tests based on our solution: test_directors.py'''
80+
directors = get_movies_by_director()
81+
82+
directors = get_average_scores(directors)
83+
print(directors['Christopher Nolan'])
84+
print_results(directors)
85+
86+
87+
if __name__ == '__main__':
88+
main()

0 commit comments

Comments
 (0)