2
2
import httpx
3
3
import bs4
4
4
import datetime
5
+ import asyncio
6
+
7
+ loop = asyncio .get_event_loop ()
5
8
6
9
7
10
def main ():
8
11
print ("Python async web scraper" )
9
12
10
13
t0 = datetime .datetime .now ()
11
- get_titles ()
14
+ loop . run_until_complete ( get_titles () )
12
15
dt = datetime .datetime .now () - t0
13
16
print (f"Finished in { dt .total_seconds ():,.2f} seconds." )
14
17
18
+ loop .close ()
19
+
15
20
16
- def get_html (n : int ) -> str :
21
+ async def get_html (n : int ) -> str :
17
22
print (Fore .YELLOW + f"Getting HTML for episode { n } ..." , flush = True )
18
23
url = f'https://talkpython.fm/{ n } '
19
24
20
- resp = httpx .get (url )
21
- resp .raise_for_status ()
25
+ # The "async with" syntax ensures that all active connections are closed on exit.
26
+ async with httpx .AsyncClient () as client :
27
+ resp = await client .get (url )
28
+ resp .raise_for_status ()
22
29
23
30
return resp .text
24
31
@@ -34,10 +41,24 @@ def get_title_from_html(n: int, html: str) -> str:
34
41
return header .text .strip ()
35
42
36
43
37
- def get_titles ():
44
+ # async def get_titles():
45
+ # for n in range(220, 231):
46
+ # html = await get_html(n)
47
+ # title = get_title_from_html(n, html)
48
+ # print(Fore.GREEN + title)
49
+
50
+
51
+ async def get_titles ():
52
+ tasks = []
38
53
for n in range (220 , 231 ):
39
- html = get_html (n )
40
- title = get_title_from_html (n , html )
54
+ task = loop .create_task (get_html (n ))
55
+ episode = n
56
+
57
+ tasks .append ((episode , task ))
58
+
59
+ for episode , task in tasks :
60
+ html = await task
61
+ title = get_title_from_html (episode , html )
41
62
print (Fore .GREEN + title )
42
63
43
64
0 commit comments