|
| 1 | +import csv |
| 2 | +from collections import defaultdict, namedtuple |
| 3 | +import os |
| 4 | +from urllib.request import urlretrieve |
| 5 | + |
| 6 | +BASE_URL = 'https://bites-data.s3.us-east-2.amazonaws.com/' |
| 7 | +TMP = '/tmp' |
| 8 | + |
| 9 | +fname = 'movie_metadata.csv' |
| 10 | +remote = os.path.join(BASE_URL, fname) |
| 11 | +local = os.path.join(TMP, fname) |
| 12 | +urlretrieve(remote, local) |
| 13 | + |
| 14 | +MOVIE_DATA = local |
| 15 | +MIN_MOVIES = 4 |
| 16 | +MIN_YEAR = 1960 |
| 17 | + |
| 18 | +Movie = namedtuple('Movie', 'title_year score') |
| 19 | + |
| 20 | + |
| 21 | +def get_movies_by_director(): |
| 22 | + """Extracts all movies from csv and stores them in a dict, |
| 23 | + where keys are directors, and values are a list of movies, |
| 24 | + use the defined Movie namedtuple""" |
| 25 | + movies = defaultdict(list) |
| 26 | + input_file = csv.DictReader(open(f"/tmp/{fname}")) |
| 27 | + for row in input_file: |
| 28 | + if not row["title_year"] == "" and int(row["title_year"]) >= 1960: |
| 29 | + # if "title_year" in row and int(row["title_year"]) >= 1960 : |
| 30 | + movies[row["director_name"]].append( |
| 31 | + Movie(row["movie_title"], [row["title_year"], row["imdb_score"]]) |
| 32 | + ) |
| 33 | + |
| 34 | + return movies |
| 35 | + |
| 36 | + |
| 37 | +def calc_mean_score(movies): |
| 38 | + """Helper method to calculate mean of list of Movie namedtuples, |
| 39 | + round the mean to 1 decimal place""" |
| 40 | + total = 0 |
| 41 | + count = 0 |
| 42 | + print(movies) |
| 43 | + for movie in movies: |
| 44 | + try: print(float(movie.score[1])) |
| 45 | + except TypeError: |
| 46 | + continue |
| 47 | + count = count + 1 |
| 48 | + total = total + float(movie.score[1]) |
| 49 | + return round((total/count), 1) |
| 50 | + |
| 51 | + |
| 52 | +def get_average_scores(directors): |
| 53 | + """Iterate through the directors dict (returned by get_movies_by_director), |
| 54 | + return a list of tuples (director, average_score) ordered by highest |
| 55 | + score in descending order. Only take directors into account |
| 56 | + with >= MIN_MOVIES""" |
| 57 | + |
| 58 | + director_del = [] |
| 59 | + for director in directors: |
| 60 | + if len(directors[director]) < 4: |
| 61 | + director_del.append(director) |
| 62 | + |
| 63 | + for dir in director_del: |
| 64 | + del directors[dir] |
| 65 | + |
| 66 | + returnValue = [] |
| 67 | + |
| 68 | + # for director, movies in directors.items(): |
| 69 | + # total = 0 |
| 70 | + # count = 0 |
| 71 | + # for mov in movies: |
| 72 | + # total = total + float(mov.score[1]) |
| 73 | + # count = count + 1 |
| 74 | + # returnValue.append((director, round((total / count), 1))) |
| 75 | + |
| 76 | + for director, movies in directors.items(): |
| 77 | + returnValue.append( |
| 78 | + ( |
| 79 | + director, |
| 80 | + round((sum([float(mov.score[1]) for mov in movies]) / len(movies)) , 1) |
| 81 | + ) |
| 82 | + ) |
| 83 | + |
| 84 | + return sorted(returnValue, key=lambda x: x[1], reverse=True) |
| 85 | + |
| 86 | + |
| 87 | + |
| 88 | + |
| 89 | +director_movies = get_movies_by_director() |
| 90 | +get_average_scores(director_movies) |
| 91 | +print(director_movies['Peter Jackson']) |
| 92 | +calc_mean_score(director_movies['Sergio Leone']) |
| 93 | +print(director_movies.items()) |
0 commit comments