Skip to content

Commit 332df01

Browse files
author
Plamen Milenkov
committed
Day 46
1 parent d80b5c2 commit 332df01

File tree

6 files changed

+44
-0
lines changed

6 files changed

+44
-0
lines changed

.DS_Store

0 Bytes
Binary file not shown.

days/.DS_Store

2 KB
Binary file not shown.

days/43-45-search-api/.DS_Store

0 Bytes
Binary file not shown.

days/46-48-beautifulsoup4/.DS_Store

8 KB
Binary file not shown.
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
requests
2+
bs4
3+
logbook
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import re
2+
from pprint import pprint
3+
import logbook
4+
5+
6+
import requests
7+
import bs4
8+
9+
10+
logbook.RotatingFileHandler('scrapper.log',level=logbook.TRACE).push_application()
11+
logger = logbook.Logger('scraper')
12+
13+
def get_site(URL):
14+
logger.debug(f'Getting url {URL}')
15+
response = requests.get(URL)
16+
response.raise_for_status()
17+
logger.debug(f'Successfully retrieved url {URL}')
18+
return response.text
19+
20+
def main():
21+
logger.info('Start scraping...')
22+
site = get_site('http://codechalleng.es/challenges/')
23+
soup = bs4.BeautifulSoup(site, 'html.parser')
24+
25+
css_class = '.challengeTitle'
26+
logger.debug('Looking for class {css_class}')
27+
challenges = soup.select(css_class)
28+
29+
challenges_names = []
30+
logger.info('Iterating over challenges...')
31+
for challenge in challenges:
32+
logger.debug('')
33+
title = re.sub("^\s*[0-9]{2} - ", "", challenge.text)
34+
title = re.sub("\s*$", "", title)
35+
challenges_names.append(title)
36+
37+
print(f'Found {len(challenges_names)} challenges.')
38+
pprint(challenges_names)
39+
40+
if __name__ == '__main__':
41+
main()

0 commit comments

Comments
 (0)