Skip to content

Commit b20df64

Browse files
author
Tomasz Janicki
committed
practice days 04-06
1 parent 0820b2c commit b20df64

File tree

8 files changed

+5437
-99
lines changed

8 files changed

+5437
-99
lines changed

days/04-06-collections/__init__.py

Whitespace-only changes.

days/04-06-collections/collections.ipynb

Lines changed: 186 additions & 99 deletions
Large diffs are not rendered by default.

days/04-06-collections/day06.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from collections import defaultdict
2+
import csv
3+
4+
# miasta = defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
5+
miasta = defaultdict(lambda: defaultdict(int))
6+
7+
with open('uklad-komunikacyjny.csv', encoding='utf8') as file:
8+
reader = csv.DictReader(file, fieldnames=None, delimiter=';')
9+
for row in reader:
10+
miasta[row['miasto']][row['year']] = 0
11+
12+
for miasto in miasta:
13+
for rok in miasta[miasto]:
14+
print(f"{miasto} {rok}")

days/04-06-collections/directors.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import csv
2+
from collections import defaultdict, namedtuple
3+
import os
4+
from csv import DictReader
5+
from urllib.request import urlretrieve
6+
7+
BASE_URL = 'https://bites-data.s3.us-east-2.amazonaws.com/'
8+
TMP = os.getenv("TMP", "/tmp")
9+
10+
fname = 'movie_metadata.csv'
11+
remote = os.path.join(BASE_URL, fname)
12+
local = os.path.join(TMP, fname)
13+
urlretrieve(remote, local)
14+
15+
MOVIE_DATA = local
16+
MIN_MOVIES = 4
17+
MIN_YEAR = 1960
18+
19+
Movie = namedtuple('Movie', 'title year score')
20+
21+
22+
def get_movies_by_director() -> dict[list]:
23+
"""Extracts all movies from csv and stores them in a dict,
24+
where keys are directors, and values are a list of movies,
25+
use the defined Movie namedtuple"""
26+
27+
directors: dict[list] = defaultdict(list)
28+
29+
with open(MOVIE_DATA) as f:
30+
reader: DictReader[str] = csv.DictReader(f)
31+
32+
for row in reader:
33+
director: str = row['director_name']
34+
title: str = row['movie_title'].replace('\xa0', '')
35+
try:
36+
year: int = int(row['title_year'])
37+
except ValueError:
38+
continue
39+
score: float = float(row['imdb_score'])
40+
41+
if year >= MIN_YEAR:
42+
directors[director].append(Movie(title, year, score))
43+
44+
return directors
45+
46+
47+
def calc_mean_score(movies: list[namedtuple]) -> float:
48+
"""Helper method to calculate mean of list of Movie namedtuples,
49+
round the mean to 1 decimal place"""
50+
summary: float = 0.0
51+
cnt: int = 0
52+
for movie in movies:
53+
summary += movie[2]
54+
cnt += 1
55+
return round(summary / cnt, 1)
56+
57+
58+
def get_average_scores(directors) -> list[tuple]:
59+
"""Iterate through the directors dict (returned by get_movies_by_director),
60+
return a list of tuples (director, average_score) ordered by highest
61+
score in descending order. Only take directors into account
62+
with >= MIN_MOVIES"""
63+
scores: list = []
64+
for director, movies in directors.items():
65+
if len(movies) < MIN_MOVIES:
66+
continue
67+
scores.append((director, calc_mean_score(movies)))
68+
69+
return sorted(scores, key=lambda x: x[1], reverse=True)
70+
71+
72+
if __name__ == "__main__":
73+
directors = get_movies_by_director()
74+
for director, score in get_average_scores(directors):
75+
print(f"{director}: {score}")

days/04-06-collections/movies.csv

Lines changed: 5043 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
year;rodzaj_biletow;przychody
2+
2017;jednorazowe ZKM;24805149
3+
2017;okresowe ZKM;33210442
4+
2017;metropolitalne wraz z rekompensatą MZKZG;6391592
5+
2018;jednorazowe ZKM;23447257
6+
2018;okresowe ZKM;30508314
7+
2018;metropolitalne wraz z rekompensatą MZKZG;7351578
8+
2019;jednorazowe ZKM;28635842.59
9+
2019;okresowe ZKM;20615848.66
10+
2019;metropolitalne wraz z rekompensatą MZKZG;8857405.94
11+
2020;jednorazowe ZKM;12823957.35
12+
2020;okresowe ZKM;17709274.44
13+
2020;metropolitalne wraz z rekompensatą MZKZG;8128979.13
14+
2021;jednorazowe ZKM;11519582
15+
2021;okresowe ZKM;21393508
16+
2021;metropolitalne wraz z rekompensatą MZKZG;12463994
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
from collections import defaultdict
2+
3+
import pytest
4+
5+
from directors import (get_movies_by_director, get_average_scores,
6+
calc_mean_score, Movie)
7+
8+
9+
@pytest.fixture(scope="module")
10+
def movies():
11+
return get_movies_by_director()
12+
13+
14+
@pytest.fixture(scope="module")
15+
def scores(movies):
16+
return get_average_scores(movies)
17+
18+
19+
def test_get_movies_by_director(movies):
20+
assert 'Sergio Leone' in movies
21+
assert len(movies['Sergio Leone']) == 4
22+
assert len(movies['Peter Jackson']) == 12
23+
24+
25+
def test_director_movies_data_structure(movies):
26+
assert type(movies) in (dict, defaultdict)
27+
assert type(movies['Peter Jackson']) == list
28+
assert type(movies['Peter Jackson'][0]) == Movie
29+
30+
31+
def test_calc_mean_score(movies):
32+
movies_sergio = movies['Sergio Leone']
33+
movies_nolan = movies['Christopher Nolan']
34+
assert calc_mean_score(movies_sergio) == 8.5
35+
assert calc_mean_score(movies_nolan) == 8.4
36+
37+
38+
def test_get_average_scores_top_directors(scores):
39+
expected = [('Sergio Leone', 8.5),
40+
('Christopher Nolan', 8.4),
41+
('Quentin Tarantino', 8.2),
42+
('Hayao Miyazaki', 8.2),
43+
('Frank Darabont', 8.0),
44+
('Stanley Kubrick', 8.0),
45+
('James Cameron', 7.9),
46+
('Joss Whedon', 7.9)]
47+
assert scores[0:8] == expected
48+
49+
50+
@pytest.mark.parametrize("director", [
51+
'Quentin Tarantino', 'Hayao Miyazaki',
52+
'Frank Darabont', 'Stanley Kubrick',
53+
'James Cameron', 'Joss Whedon',
54+
'Alejandro G. Iñárritu',
55+
])
56+
def test_director_in_top_scores(director, scores):
57+
# order / score might slightly change depending the way the mean
58+
# is calculated so only test director names in top scores
59+
top_scores = scores[2:13]
60+
directors = {score[0] for score in top_scores}
61+
assert director in directors
62+
63+
64+
def test_ignore_older_movies(movies):
65+
"""Lowell Sherman's Black and White is from 1933 and should
66+
be skipped"""
67+
assert len(movies["Lowell Sherman"]) == 0
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
year;miasto;dlugosc_tras_autobusowych_w_granicach_miasta;dlugosc_tras_trolejbusowych_w_granicach_miasta;dlugosc_linii_autobusowych_w_granicach_miasta;dlugosc_linii_trolejbusowych_w_granicach_miasta;wozokm_autobusowe_w_granicach_miasta;wozokm_trolejbusowe_w_granicach_miasta
2+
2017;Gdynia;199;44.7;734;168;11760;5148
3+
2017;Rumia;47;0;77.6;0;1007;0
4+
2017;Sopot;37.65;3.7;49.5;7.7;684;116
5+
2017;Żukowo;20.7;0;20.7;0;231;0
6+
2017;Kosakowo;34.1;0;85;0;565;0
7+
2017;Wejherowo;7.6;0;7.6;0;91;0
8+
2017;Szemud;29.5;0;32.7;0;156;0
9+
2018;Gdynia;201.8;42.7;735;168;11656;5148
10+
2018;Rumia;47;0;77.6;0;1028;0
11+
2018;Sopot;40.2;3.7;49.5;9;687;121
12+
2018;Żukowo;20.7;0;20.7;0;230;0
13+
2018;Kosakowo;34.1;0;85;0;599;0
14+
2018;Wejherowo;7.6;0;7.6;0;91;0
15+
2018;Szemud;29.5;0;37.7;0;157;0
16+
2019;Gdynia;201.8;44.2;728;189;11782;5233
17+
2019;Rumia;47;0;77.6;0;1028;0
18+
2019;Sopot;40.2;6.1;49.5;9;689;149
19+
2019;Żukowo;20.7;0;20.7;0;240;0
20+
2019;Kosakowo;34.1;0;85;0;607;0
21+
2019;Wejherowo;7.6;0;7.6;0;91;0
22+
2019;Szemud;29.5;0;32.7;0;157;0
23+
2020;Gdynia;204;54.6;688;202;11571;5285
24+
2020;Rumia;47;0;77.4;0;964;0
25+
2020;Sopot;34;6,1;48.4;9;635;192
26+
2020;Żukowo;17;0;21.7;0;247;0
27+
2020;Kosakowo;41.1;0;85;0;607;0
28+
2020;Wejherowo;7.6;0;7.6;0;75;0
29+
2020;Szemud;31;0;32.7;0;147;0
30+
2021;Gdynia;204;51.6;660;202;11438;5598
31+
2021;Rumia;47;0;77.4;0;945;0
32+
2021;Sopot;34;6.1;48.4;9;584;246
33+
2021;Żukowo;17;0;21.7;0;250;0
34+
2021;Kosakowo;41.1;0;85;0;642;0
35+
2021;Wejherowo;7.6;0;7.6;0;76;0
36+
2021;Szemud;31;0;32.7;0;149;0

0 commit comments

Comments
 (0)