1
+ import csv
2
+ from collections import defaultdict , namedtuple
3
+ import os
4
+ from urllib .request import urlretrieve
5
+ import json
6
+
7
+ BASE_URL = 'http://projects.bobbelderbos.com/pcc/movies/'
8
+ TMP = '/tmp'
9
+
10
+ fname = 'movie_metadata.csv'
11
+ remote = os .path .join (BASE_URL , fname )
12
+ local = os .path .join (TMP , fname )
13
+ urlretrieve (remote , local )
14
+
15
+ MOVIE_DATA = local
16
+ MIN_MOVIES = 4
17
+ MIN_YEAR = 1960
18
+
19
+ Movie = namedtuple ('Movie' , 'title year score' )
20
+
21
+
22
+ def get_movies_by_director ():
23
+ """Extracts all movies from csv and stores them in a dict,
24
+ where keys are directors, and values are a list of movies,
25
+ use the defined Movie namedtuple"""
26
+ with open (TMP + '/' + fname ,newline = '' ) as csv_file :
27
+ csv_reader = csv .DictReader (csv_file )
28
+ directorWithMovies = defaultdict (list )
29
+ for row in csv_reader :
30
+ Movie = namedtuple ("Movie" ,"title , year , score" )
31
+ if row ["title_year" ] != "" and int (row ["title_year" ]) > MIN_YEAR :
32
+ m = Movie (title = row ["movie_title" ],year = row ["title_year" ],score = row ["imdb_score" ])
33
+ directorWithMovies [row ["director_name" ]].append (m )
34
+ #print(json.dumps(directorWithMovies, indent=2))
35
+ return directorWithMovies
36
+ pass
37
+
38
+
39
+ def calc_mean_score (movies ):
40
+ """Helper method to calculate mean of list of Movie namedtuples,
41
+ round the mean to 1 decimal place"""
42
+ imdb_score_list = []
43
+ for i in movies :
44
+ imdb_score_list .append (float (i .score ))
45
+ #print(imdb_score_list)
46
+ mean_imdb_score = sum (imdb_score_list ) / len (imdb_score_list )
47
+ #print(round(mean_imdb_score,1))
48
+ return round (mean_imdb_score ,1 )
49
+ pass
50
+
51
+
52
+ def get_average_scores (directors ):
53
+ """Iterate through the directors dict (returned by get_movies_by_director),
54
+ return a list of tuples (director, average_score) ordered by highest
55
+ score in descending order. Only take directors into account
56
+ with >= MIN_MOVIES"""
57
+ average_scores = []
58
+ for key , value in directors .items ():
59
+ director = namedtuple ("director" , "name, score" )
60
+ if len (directors [key ]) >= MIN_MOVIES :
61
+ d = director (name = key , score = calc_mean_score (value ))
62
+ average_scores .append (d )
63
+ average_score1 = sorted (average_scores ,key = lambda x : x .score ,reverse = True )
64
+ print (average_score1 )
65
+ pass
66
+
67
+ director_movies = get_movies_by_director ()
68
+
69
+
70
+ #movies_sergio = director_movies['Sergio Leone']
71
+ #print(movies_sergio)
72
+
73
+ #calc_mean_score(movies_sergio)
74
+
75
+ get_average_scores (director_movies )
0 commit comments