web scraping code push

hobojoe1848 · hobojoe1848 · commit ce65c4fed1b3 · 2019-04-14T10:40:38.000+10:00
diff --git a/days/073-076-webscraping/code/newspaper3k_code.py b/days/073-076-webscraping/code/newspaper3k_code.py
@@ -0,0 +1,25 @@
+import newspaper
+
+from newspaper import Article
+
+URL = 'https://www.news.com.au/travel/travel-updates/the-sultan-of-brunei-created-a-30-million-gold-coast-real-estate-nightmare/news-story/8e4a4e1ee4137a9b014ce73563fda376'
+
+article = Article(URL)
+
+article.download()
+
+article.html
+
+article.parse()
+
+article.authors
+
+article.publish_date
+
+article.text
+
+article.top_image
+
+article.movies
+
+article.summary
diff --git a/days/073-076-webscraping/code/talkpy_bs4.py b/days/073-076-webscraping/code/talkpy_bs4.py
@@ -0,0 +1,19 @@
+import requests
+import bs4
+
+URL = "https://training.talkpython.fm/courses/all"
+header_list = []
+
+def main():
+    raw_site_page = requests.get(URL)
+    raw_site_page.raise_for_status()
+
+    soup = bs4.BeautifulSoup(raw_site_page.text, 'html.parser')
+    html_header_list = soup.select('h3')
+    for headers in html_header_list:
+        header_list.append(headers.getText())
+    for headers in header_list:
+        print(headers)
+
+if __name__ == "__main__":
+    main()