1
+ import re
2
+
3
+ COURSE = ('Introduction 1 Lecture 01:47'
4
+ 'The Basics 4 Lectures 32:03'
5
+ 'Getting Technical! 4 Lectures 41:51'
6
+ 'Challenge 2 Lectures 27:48'
7
+ 'Afterword 1 Lecture 05:02' )
8
+ TWEET = ('New PyBites article: Module of the Week - Requests-cache '
9
+ 'for Repeated API Calls - http://pybit.es/requests-cache.html '
10
+ '#python #APIs' )
11
+ HTML = ('<p>pybites != greedy</p>'
12
+ '<p>not the same can be said REgarding ...</p>' )
13
+
14
+
15
+ def extract_course_times (course = COURSE ):
16
+ """Return the course timings from the passed in
17
+ course string. Timings are in mm:ss (minutes:seconds)
18
+ format, so taking COURSE above you would extract:
19
+ ['01:47', '32:03', '41:51', '27:48', '05:02']
20
+ Return this list.
21
+ """
22
+ reg1 = re .compile (r'\d\d:\d\d' )
23
+ return reg1 .findall (course )
24
+
25
+ pass
26
+
27
+
28
+ def get_all_hashtags_and_links (tweet = TWEET ):
29
+ """Get all hashtags and links from the tweet text
30
+ that is passed into this function. So for TWEET
31
+ above you need to extract the following list:
32
+ ['http://pybit.es/requests-cache.html',
33
+ '#python',
34
+ '#APIs']
35
+ Return this list.
36
+ """
37
+ reg1 = re .compile (r'(?:#|http)\S+' )
38
+ return reg1 .findall (tweet )
39
+
40
+ pass
41
+
42
+
43
+ def match_first_paragraph (html = HTML ):
44
+ """Extract the first paragraph of the passed in
45
+ html, so for HTML above this would be:
46
+ 'pybites != greedy' (= content of first paragraph).
47
+ Return this string.
48
+ """
49
+ # reg1 = re.compile(r'<p>(.*?)</p>')
50
+ # return reg1.findall(html)[0]
51
+ print (re .sub (r'^<p>(.*?)</p>.*$' , r" \1" , html ))
52
+ print (html )
53
+ pass
54
+
55
+ extract_course_times ()
56
+ get_all_hashtags_and_links ()
57
+ match_first_paragraph ()
0 commit comments