Skip to content

Commit ce65c4f

Browse files
committed
web scraping code push
1 parent 5b188af commit ce65c4f

File tree

2 files changed

+44
-0
lines changed

2 files changed

+44
-0
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import newspaper
2+
3+
from newspaper import Article
4+
5+
URL = 'https://www.news.com.au/travel/travel-updates/the-sultan-of-brunei-created-a-30-million-gold-coast-real-estate-nightmare/news-story/8e4a4e1ee4137a9b014ce73563fda376'
6+
7+
article = Article(URL)
8+
9+
article.download()
10+
11+
article.html
12+
13+
article.parse()
14+
15+
article.authors
16+
17+
article.publish_date
18+
19+
article.text
20+
21+
article.top_image
22+
23+
article.movies
24+
25+
article.summary
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import requests
2+
import bs4
3+
4+
URL = "https://training.talkpython.fm/courses/all"
5+
header_list = []
6+
7+
def main():
8+
raw_site_page = requests.get(URL)
9+
raw_site_page.raise_for_status()
10+
11+
soup = bs4.BeautifulSoup(raw_site_page.text, 'html.parser')
12+
html_header_list = soup.select('h3')
13+
for headers in html_header_list:
14+
header_list.append(headers.getText())
15+
for headers in header_list:
16+
print(headers)
17+
18+
if __name__ == "__main__":
19+
main()

0 commit comments

Comments
 (0)