Skip to content

Commit 684019c

Browse files
committed
feat: save raw data of lc problems
1 parent 8720418 commit 684019c

File tree

6 files changed

+4693
-4690
lines changed

6 files changed

+4693
-4690
lines changed

.gitignore

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
.idea/
2-
.DS_Store
3-
.vscode
4-
/node_modules
5-
/solution/result.json
6-
/lcof/lcof.json
7-
/lcof/lcof_list.json
8-
/lcci/lcci.json
1+
.idea/
2+
.DS_Store
3+
.vscode
4+
/node_modules
5+
/solution/result.json
6+
/solution/raw.json
7+
/lcof/lcof.json
8+
/lcof/lcof_list.json
9+
/lcci/lcci.json
10+
/solution/__pycache__

solution/main.py

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import os
3-
from urllib.parse import quote
3+
import re
4+
from urllib.parse import quote, unquote
45

56
from spider import Spider
67

@@ -130,16 +131,77 @@ def generate_summary(result):
130131
f.write(summary_en)
131132

132133

134+
def refresh(result):
135+
"""update problems"""
136+
pattern = re.compile("src=\"(.*?)\"")
137+
138+
for question in result:
139+
front_question_id = question['frontend_question_id']
140+
print(front_question_id)
141+
142+
path_cn = unquote(str(question['relative_path_cn']).replace("/solution", "."))
143+
path_en = unquote(str(question['relative_path_en']).replace("/solution", "."))
144+
145+
with open(path_cn, 'r', encoding='utf-8') as f1:
146+
cn_content = f1.read()
147+
148+
with open(path_en, 'r', encoding='utf-8') as f2:
149+
en_content = f2.read()
150+
151+
# update question content
152+
old_content = re.search("<!-- 这里写题目描述 -->(.*?)## 解法", cn_content, re.S).group(1)
153+
cn_content = cn_content.replace(
154+
old_content, "\n\n" + question['content_cn'] + "\n\n"
155+
).replace("\n\n <ul>", "\n <ul>")
156+
157+
# replace image url to cdn link
158+
for url in pattern.findall(cn_content) or []:
159+
image_name = (
160+
os.path.basename(url).replace('.PNG', '.png').replace('.JPG', '.jpg')
161+
)
162+
new_url = (
163+
'https://cdn.jsdelivr.net/gh/doocs/leetcode@main'
164+
+ str(question['relative_path_cn']).replace("README.md", "images/")
165+
+ image_name
166+
)
167+
cn_content = cn_content.replace(url, new_url)
168+
169+
with open(path_cn, 'w', encoding='utf-8') as f1:
170+
f1.write(cn_content)
171+
172+
old_content = re.search(
173+
"## Description(.*?)## Solutions", en_content, re.S
174+
).group(1)
175+
en_content = en_content.replace(
176+
old_content, "\n\n" + question['content_en'] + "\n\n"
177+
).replace("\n\n <ul>", "\n <ul>")
178+
179+
for url in pattern.findall(en_content) or []:
180+
image_name = (
181+
os.path.basename(url).replace('.PNG', '.png').replace('.JPG', '.jpg')
182+
)
183+
new_url = (
184+
'https://cdn.jsdelivr.net/gh/doocs/leetcode@main'
185+
+ str(question['relative_path_cn']).replace("README.md", "images/")
186+
+ image_name
187+
)
188+
en_content = en_content.replace(url, new_url)
189+
190+
with open(path_en, 'w', encoding='utf-8') as f2:
191+
f2.write(en_content)
192+
193+
133194
def save(result):
134195
with open('./result.json', 'w', encoding='utf-8') as f:
135196
f.write(json.dumps(result))
136197

137198

138199
if __name__ == '__main__':
139-
cookie_cn = ''
140-
cookie_en = ''
200+
cookie_cn = '__auc=85ca7fb417508357e2a5a4c54bf; gr_user_id=4a6501f1-6d25-4b24-80a9-00a43b35e715; _ga=GA1.2.656607691.1602160525; grwng_uid=ce9f2e3b-15df-4f37-ac78-9d6e5f9ca018; __atuvc=1|21; csrftoken=pgozvZH6lu5Ls6O1p1tvEAGftxoUhwVM5AnmH64UkQ9KdcmvlBf2atIgAnBjU7lZ; a2873925c34ecbd2_gr_last_sent_cs1=lcbin; p_h5_u=944F20A8-E0F4-47CE-91F4-EC1D36B69DB2; selectedStreamLevel=HD; _bl_uid=qdk87wb3z3bo66uL4ztbke7r0gq4; _gid=GA1.2.955304545.1648025340; aliyungf_tc=a037027675496c4afd16779c436088ceb980404f1221fa66676cdec7e09c0911; Hm_lvt_fa218a3ff7179639febdb15e372f411c=1649898032,1649927306,1649983681,1650021377; __appToken__=; NEW_PROBLEMLIST_PAGE=1; a2873925c34ecbd2_gr_session_id=bc42d2c4-2755-47c1-b509-b763d341260c; a2873925c34ecbd2_gr_last_sent_sid_with_cs1=bc42d2c4-2755-47c1-b509-b763d341260c; a2873925c34ecbd2_gr_session_id_bc42d2c4-2755-47c1-b509-b763d341260c=true; _gat_gtag_UA_131851415_1=1; LEETCODE_SESSION=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJuZXh0X2FmdGVyX29hdXRoIjoiL3Byb2JsZW1zL2xvbmdlc3Qtc3Vic3RyaW5nLW9mLW9uZS1yZXBlYXRpbmctY2hhcmFjdGVyLyIsIl9hdXRoX3VzZXJfaWQiOiIyNTQwMyIsIl9hdXRoX3VzZXJfYmFja2VuZCI6ImRqYW5nby5jb250cmliLmF1dGguYmFja2VuZHMuTW9kZWxCYWNrZW5kIiwiX2F1dGhfdXNlcl9oYXNoIjoiYTkwOWQwNTQzODU2NDgyYzhhNTA3ZDhiYjY4OTdlNTM1Y2EyZGYyOTU2ZTc5MzBiZjMwNzViMGMyZTNmYWIzZiIsImlkIjoyNTQwMywiZW1haWwiOiJjb250YWN0QHlhbmdsaWJpbi5pbmZvIiwidXNlcm5hbWUiOiJsY2JpbiIsInVzZXJfc2x1ZyI6ImxjYmluIiwiYXZhdGFyIjoiaHR0cHM6Ly9hc3NldHMubGVldGNvZGUtY24uY29tL2FsaXl1bi1sYy11cGxvYWQvdXNlcnMvYmluZ29vby9hdmF0YXJfMTUyMjY0OTQ3Ny5wbmciLCJwaG9uZV92ZXJpZmllZCI6dHJ1ZSwiX3RpbWVzdGFtcCI6MTY0ODEwNDg4OC4zMzM1NDE2LCJleHBpcmVkX3RpbWVfIjoxNjUwNjU0MDAwLCJ2ZXJzaW9uX2tleV8iOjAsImxhdGVzdF90aW1lc3RhbXBfIjoxNjUwMDc0NTQyfQ.89QnOykZYcsKG-AH_xa0E0fZkbP7wd_iCdTzO-mFDa4; Hm_lpvt_fa218a3ff7179639febdb15e372f411c=1650074549; a2873925c34ecbd2_gr_cs1=lcbin'
201+
cookie_en = 'gr_user_id=726c2dd2-240d-4971-b0f5-37744121657c; 87b5a3c3f1a55520_gr_last_sent_cs1=bingooo; intercom-id-pq9rak4o=aa42fcc8-64e1-40b5-b596-8dd44c90a142; _ga_DKXQ03QCVK=GS1.1.1628508904.1.1.1628508911.53; _ga=GA1.2.1636041299.1607561338; csrftoken=ACzGOlaLR7I4Om6CLZxy3dU0AHb0DhIt5IlxSMDun7wrugCAHGnFweYculeWZdTB; _gid=GA1.2.1318533002.1650004268; 87b5a3c3f1a55520_gr_cs1=bingooo; c_a_u="YmluZ29vbw==:1nfXlr:mPRm5NKTQwfomYC0IW3sqCLF3b8"; LEETCODE_SESSION=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJfYXV0aF91c2VyX2lkIjoiMTE4NzEwNyIsIl9hdXRoX3VzZXJfYmFja2VuZCI6ImFsbGF1dGguYWNjb3VudC5hdXRoX2JhY2tlbmRzLkF1dGhlbnRpY2F0aW9uQmFja2VuZCIsIl9hdXRoX3VzZXJfaGFzaCI6ImEyM2FkMWMyOTBhNDY1MTliNDg2YjMwNjljNDFjMjg3ZjVhYzI4YWUiLCJpZCI6MTE4NzEwNywiZW1haWwiOiJjb250YWN0QHlhbmdsaWJpbi5pbmZvIiwidXNlcm5hbWUiOiJiaW5nb29vIiwidXNlcl9zbHVnIjoiYmluZ29vbyIsImF2YXRhciI6Imh0dHBzOi8vYXNzZXRzLmxlZXRjb2RlLmNvbS91c2Vycy9iaW5nb29vL2F2YXRhcl8xNTIyNjgxMzQwLnBuZyIsInJlZnJlc2hlZF9hdCI6MTY0OTkwMjA1NSwiaXAiOiIxODMuMTQuMzEuNjEiLCJpZGVudGl0eSI6Ijg3MjI1MGZlYmRiM2JjOGY3OWY1YmQxOWI5YTk2YzRhIiwiY29udmVyc2lvbl90YXJnZXRzIjp7IlJ4dGFWeEVLQVFSSEdVVlFTaGdYRlFNVVRCUUdBQndFRFF0QkZGZGJTMThhSFZKUUZoc1hBZz09Ijp7InNlbmRfc2Vzc2lvbl9pZHMiOlsxNTE1XSwiZW1haWwiOiJjb250YWN0QHlhbmdsaWJpbi5pbmZvIn0sIlJ4dGFWeEVLQVFSSEQxQmNEZ01MWFFzWFcwMEFIQVpkV2t3TUZnPT0iOnsic2VuZF9zZXNzaW9uX2lkcyI6WzE1MTVdLCJlbWFpbCI6ImNvbnRhY3RAeWFuZ2xpYmluLmluZm8ifX0sInNlc3Npb25faWQiOjE3Njc3NTE2fQ.3uixtnnmvtoW5AgeuKxoz6Wl3r8iAe05hKsenFTrQW0; _gat=1'
141202
spider = Spider(cookie_cn, cookie_en)
142203
res = spider.run()
204+
save(res)
143205

144206
# with open('./result.json', 'r', encoding='utf-8') as f:
145207
# res = f.read()
@@ -148,4 +210,4 @@ def save(result):
148210
generate_readme(res)
149211
generate_question_readme(res)
150212
generate_summary(res)
151-
save(res)
213+
# refresh(res)

solution/refresh.py

Lines changed: 0 additions & 68 deletions
This file was deleted.

solution/spider.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ def __init__(self, cookie_cn: str, cookie_en: str):
9090
self.cookie_cn = cookie_cn
9191
self.cookie_en = cookie_en
9292
self.session = requests.session()
93+
self.raw_data = {}
9394

9495
def get_all_questions(self) -> List:
9596
"""获取所有题目"""
@@ -192,6 +193,7 @@ def handle(self, question: dict) -> dict:
192193
url_cn = f'https://leetcode-cn.com/problems/{question_title_slug}'
193194
url_en = f'https://leetcode.com/problems/{question_title_slug}'
194195
frontend_question_id = str(question['stat']['frontend_question_id']).zfill(4)
196+
self.raw_data[frontend_question_id] = question_detail
195197
no = int(frontend_question_id) // 100
196198
question_title_en = question['stat']['question__title']
197199
question_title_en = re.sub(r'[\\/:*?"<>|]', '', question_title_en).strip()
@@ -251,6 +253,10 @@ def handle(self, question: dict) -> dict:
251253
item['md_table_row_en'] = [col1_en, col2_en, col3_en, col4_en, col5_en]
252254
return item
253255

256+
def save(self):
257+
with open('./raw.json', 'w', encoding='utf-8') as f:
258+
f.write(json.dumps(self.raw_data))
259+
254260
def run(self):
255261
questions = self.get_all_questions()
256262
details = [self.handle(question) for question in questions]
@@ -259,4 +265,5 @@ def run(self):
259265
]
260266
details += [self.handle(question) for question in failed_questions]
261267
details = [detail for detail in details if detail]
268+
self.save()
262269
return details

0 commit comments

Comments
 (0)