Skip to content

Commit 41f6c11

Browse files
author
Plamen Milenkov
committed
Days 47 - 54
1 parent 332df01 commit 41f6c11

File tree

11 files changed

+15764
-0
lines changed

11 files changed

+15764
-0
lines changed

.DS_Store

0 Bytes
Binary file not shown.

days/.DS_Store

2 KB
Binary file not shown.
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from typing import List
2+
3+
import logbook
4+
import requests
5+
import bs4
6+
import os
7+
8+
from bs4 import Tag
9+
10+
11+
def file_exists(file_name):
12+
return os.path.isfile(file_name)
13+
14+
def download_site(url):
15+
if file_exists('sportal.html'):
16+
print("Not downloading...")
17+
return
18+
19+
response = requests.get(url)
20+
response.raise_for_status()
21+
with open('sportal.html', 'w') as f:
22+
f.write(response.text)
23+
24+
25+
def get_top_news() -> List[Tag]:
26+
top_news = []
27+
with open('sportal.html') as f:
28+
soup = bs4.BeautifulSoup(f, 'html.parser')
29+
for item in soup.select('.orange_link'):
30+
top_news.append(item)
31+
32+
return top_news
33+
34+
def look_for_pattern(news: List[Tag], pattern):
35+
for item in news:
36+
if pattern in item.text:
37+
print(f'Title {item.text}: link https://www.sportal.bg/{item.get("href")}')
38+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import logbook
2+
import api
3+
4+
logbook.RotatingFileHandler('sportal_scraper.log').push_application()
5+
logger = logbook.Logger('Main')
6+
7+
8+
def main():
9+
logger.info('Starting application...')
10+
api.download_site('https://www.sportal.bg')
11+
top_news = api.get_top_news()
12+
api.look_for_pattern(top_news, 'Мадрид')
13+
api.look_for_pattern(top_news, 'Барселона')
14+
15+
if __name__ == '__main__':
16+
main()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
requests
2+
bs4
3+
logbook

days/46-48-beautifulsoup4/scrap_sportal/sportal.html

Lines changed: 15137 additions & 0 deletions
Large diffs are not rendered by default.

days/52-54-feedparser/my_code/my_file_day1.xml

Lines changed: 309 additions & 0 deletions
Large diffs are not rendered by default.

days/52-54-feedparser/my_code/my_file_day2.xml

Lines changed: 201 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import feedparser
2+
import logbook
3+
4+
logbook.RotatingFileHandler('parser.log').push_application()
5+
logger = logbook.Logger('feed_parser')
6+
7+
def print_feed(feed, selected_tag):
8+
logger.debug("Trying to print 'published', 'title' and 'link'...")
9+
selected_feeds = [entry for entry in feed.entries if selected_tag in entry.tags[0].term]
10+
for entry in selected_feeds:
11+
try:
12+
print(f'[{entry.tags[0].term}] {entry.published} - {entry.title}: {entry.link}')
13+
except AttributeError as error:
14+
logger.error(f'Could not print some of the attributes of the feed.')
15+
print(f'Cannot find some of the attributes: {error}')
16+
17+
18+
def main():
19+
logger.info('Starting the application...')
20+
selected_tag = input('Preferred tag name:')
21+
# day1_file = 'my_file_day1.xml'
22+
# feed = feedparser.parse(day1_file)
23+
day2_file = 'my_file_day2.xml'
24+
feed = feedparser.parse(day2_file)
25+
print_feed(feed, selected_tag)
26+
27+
if __name__ == '__main__':
28+
main()
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import requests
2+
3+
4+
def day1():
5+
URL = "http://store.steampowered.com/feeds/newreleases.xml"
6+
r = requests.get(URL)
7+
r.raise_for_status()
8+
9+
with open("my_file_day1.xml", "w") as f:
10+
f.write(r.text)
11+
12+
13+
def day2():
14+
URL = "https://www.sportal.bg/uploads/rss_category_0.xml"
15+
r = requests.get(URL)
16+
r.raise_for_status()
17+
18+
with open("my_file_day2.xml", "w", encoding='iso_8859_1') as f:
19+
f.write(r.text)
20+
21+
22+
def main():
23+
# day1()
24+
day2()
25+
26+
27+
28+
if __name__ == '__main__':
29+
main()

0 commit comments

Comments
 (0)