File tree Expand file tree Collapse file tree 2 files changed +44
-0
lines changed
days/073-076-webscraping/code Expand file tree Collapse file tree 2 files changed +44
-0
lines changed Original file line number Diff line number Diff line change
1
+ import newspaper
2
+
3
+ from newspaper import Article
4
+
5
+ URL = 'https://www.news.com.au/travel/travel-updates/the-sultan-of-brunei-created-a-30-million-gold-coast-real-estate-nightmare/news-story/8e4a4e1ee4137a9b014ce73563fda376'
6
+
7
+ article = Article (URL )
8
+
9
+ article .download ()
10
+
11
+ article .html
12
+
13
+ article .parse ()
14
+
15
+ article .authors
16
+
17
+ article .publish_date
18
+
19
+ article .text
20
+
21
+ article .top_image
22
+
23
+ article .movies
24
+
25
+ article .summary
Original file line number Diff line number Diff line change
1
+ import requests
2
+ import bs4
3
+
4
+ URL = "https://training.talkpython.fm/courses/all"
5
+ header_list = []
6
+
7
+ def main ():
8
+ raw_site_page = requests .get (URL )
9
+ raw_site_page .raise_for_status ()
10
+
11
+ soup = bs4 .BeautifulSoup (raw_site_page .text , 'html.parser' )
12
+ html_header_list = soup .select ('h3' )
13
+ for headers in html_header_list :
14
+ header_list .append (headers .getText ())
15
+ for headers in header_list :
16
+ print (headers )
17
+
18
+ if __name__ == "__main__" :
19
+ main ()
You can’t perform that action at this time.
0 commit comments