1
1
from colorama import Fore
2
2
import httpx
3
3
import bs4
4
+ import datetime
5
+ from unsync import unsync
4
6
5
7
6
8
def main ():
7
- print ("Using Python packages " )
9
+ print ("Python async web scraper " )
8
10
9
- get_titles ()
11
+ t0 = datetime .datetime .now ()
12
+ get_titles ().result ()
13
+ dt = datetime .datetime .now () - t0
14
+ print (f"Finished in { dt .total_seconds ():,.2f} seconds." )
10
15
11
16
12
- def get_html (n : int ) -> str :
17
+ @unsync
18
+ async def get_html (n : int ) -> str :
13
19
print (Fore .YELLOW + f"Getting HTML for episode { n } ..." , flush = True )
14
20
url = f'https://talkpython.fm/{ n } '
15
21
16
- resp = httpx .get (url )
17
- resp .raise_for_status ()
22
+ # The "async with" syntax ensures that all active connections are closed on exit.
23
+ async with httpx .AsyncClient () as client :
24
+ resp = await client .get (url )
25
+ resp .raise_for_status ()
18
26
19
27
return resp .text
20
28
21
29
30
+ # @unsync # <-- will run get_title_from_html() on a background thread.
31
+ # @unsync(cpu_bound=True) # <-- will run get_title_from_html() on a subprocess.
22
32
def get_title_from_html (n : int , html : str ) -> str :
23
33
print (Fore .CYAN + f"Getting TITLE for episode { n } ..." , flush = True )
24
34
@@ -30,10 +40,24 @@ def get_title_from_html(n: int, html: str) -> str:
30
40
return header .text .strip ()
31
41
32
42
33
- def get_titles ():
34
- for n in range (220 , 230 ):
35
- html = get_html (n )
36
- title = get_title_from_html (n , html )
43
+ # async def get_titles():
44
+ # for n in range(220, 231):
45
+ # html = await get_html(n)
46
+ # title = get_title_from_html(n, html)
47
+ # print(Fore.GREEN + title)
48
+
49
+ @unsync
50
+ async def get_titles ():
51
+ tasks = []
52
+ for n in range (220 , 231 ):
53
+ task = get_html (n )
54
+ episode = n
55
+
56
+ tasks .append ((episode , task ))
57
+
58
+ for episode , task in tasks :
59
+ html = await task
60
+ title = get_title_from_html (episode , html )
37
61
print (Fore .GREEN + title )
38
62
39
63
0 commit comments