Skip to content

Commit 8cd1926

Browse files
committed
feat: add solutions to lc problem: No.1236
No.1236.Web Crawler
1 parent 76f10a2 commit 8cd1926

File tree

8 files changed

+419
-6
lines changed

8 files changed

+419
-6
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -255,13 +255,13 @@
255255

256256
## Stars 趋势
257257

258-
<a href="https://github.com/doocs/leetcode/stargazers" target="_blank"><img src="./images/starcharts.svg" alt="Stargazers over time" /></a>
258+
<a href="https://github.com/doocs/leetcode/stargazers" target="_blank"><img src="https://cdn.jsdelivr.net/gh/doocs/leetcode@main/images/starcharts.svg" alt="Stargazers over time" /></a>
259259

260260
## 贡献者
261261

262262
感谢以下所有朋友对本项目的贡献!
263263

264-
<a href="https://github.com/doocs/leetcode/graphs/contributors" target="_blank"><img src="./images/contributors.svg" /></a>
264+
<a href="https://github.com/doocs/leetcode/graphs/contributors" target="_blank"><img src="https://cdn.jsdelivr.net/gh/doocs/leetcode@main/images/contributors.svg" /></a>
265265

266266
## 赞助者
267267

README_EN.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -245,13 +245,13 @@ You can also contribute to [doocs/leetcode](https://github.com/doocs/leetcode) u
245245

246246
## Stargazers over time
247247

248-
<a href="https://github.com/doocs/leetcode/stargazers" target="_blank"><img src="./images/starcharts.svg" alt="Stargazers over time" /></a>
248+
<a href="https://github.com/doocs/leetcode/stargazers" target="_blank"><img src="https://cdn.jsdelivr.net/gh/doocs/leetcode@main/images/starcharts.svg" alt="Stargazers over time" /></a>
249249

250250
## Contributors
251251

252252
This project exists thanks to all the people who contribute.
253253

254-
<a href="https://github.com/doocs/leetcode/graphs/contributors" target="_blank"><img src="./images/contributors.svg" /></a>
254+
<a href="https://github.com/doocs/leetcode/graphs/contributors" target="_blank"><img src="https://cdn.jsdelivr.net/gh/doocs/leetcode@main/images/contributors.svg" /></a>
255255

256256
## Backers & Sponsors
257257

solution/1200-1299/1236.Web Crawler/README.md

Lines changed: 142 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,22 +88,163 @@ startUrl = &quot;http://news.google.com&quot;
8888

8989
<!-- 这里可写通用的实现逻辑 -->
9090

91+
DFS。
92+
9193
<!-- tabs:start -->
9294

9395
### **Python3**
9496

9597
<!-- 这里可写当前语言的特殊实现逻辑 -->
9698

9799
```python
98-
100+
# """
101+
# This is HtmlParser's API interface.
102+
# You should not implement it, or speculate about its implementation
103+
# """
104+
# class HtmlParser(object):
105+
# def getUrls(self, url):
106+
# """
107+
# :type url: str
108+
# :rtype List[str]
109+
# """
110+
111+
class Solution:
112+
def crawl(self, startUrl: str, htmlParser: 'HtmlParser') -> List[str]:
113+
def host(url):
114+
url = url[7:]
115+
return url.split('/')[0]
116+
117+
def dfs(url):
118+
if url in ans:
119+
return
120+
ans.add(url)
121+
for next in htmlParser.getUrls(url):
122+
if host(url) == host(next):
123+
dfs(next)
124+
125+
ans = set()
126+
dfs(startUrl)
127+
return list(ans)
99128
```
100129

101130
### **Java**
102131

103132
<!-- 这里可写当前语言的特殊实现逻辑 -->
104133

105134
```java
135+
/**
136+
* // This is the HtmlParser's API interface.
137+
* // You should not implement it, or speculate about its implementation
138+
* interface HtmlParser {
139+
* public List<String> getUrls(String url) {}
140+
* }
141+
*/
142+
143+
class Solution {
144+
private Set<String> ans;
145+
146+
public List<String> crawl(String startUrl, HtmlParser htmlParser) {
147+
ans = new HashSet<>();
148+
dfs(startUrl, htmlParser);
149+
return new ArrayList<>(ans);
150+
}
151+
152+
private void dfs(String url, HtmlParser htmlParser) {
153+
if (ans.contains(url)) {
154+
return;
155+
}
156+
ans.add(url);
157+
for (String next : htmlParser.getUrls(url)) {
158+
if (host(next).equals(host(url))) {
159+
dfs(next, htmlParser);
160+
}
161+
}
162+
}
163+
164+
private String host(String url) {
165+
url = url.substring(7);
166+
return url.split("/")[0];
167+
}
168+
}
169+
```
170+
171+
### **C++**
172+
173+
```cpp
174+
/**
175+
* // This is the HtmlParser's API interface.
176+
* // You should not implement it, or speculate about its implementation
177+
* class HtmlParser {
178+
* public:
179+
* vector<string> getUrls(string url);
180+
* };
181+
*/
182+
183+
class Solution {
184+
public:
185+
vector<string> ans;
186+
unordered_set<string> vis;
187+
188+
vector<string> crawl(string startUrl, HtmlParser htmlParser) {
189+
dfs(startUrl, htmlParser);
190+
return ans;
191+
}
192+
193+
void dfs(string& url, HtmlParser& htmlParser) {
194+
if (vis.count(url)) return;
195+
vis.insert(url);
196+
ans.push_back(url);
197+
for (string next : htmlParser.getUrls(url))
198+
if (host(url) == host(next))
199+
dfs(next, htmlParser);
200+
}
201+
202+
string host(string url) {
203+
int i = 7;
204+
string res;
205+
for (; i < url.size(); ++i)
206+
{
207+
if (url[i] == '/') break;
208+
res += url[i];
209+
}
210+
return res;
211+
}
212+
};
213+
```
106214
215+
### **Go**
216+
217+
```go
218+
/**
219+
* // This is HtmlParser's API interface.
220+
* // You should not implement it, or speculate about its implementation
221+
* type HtmlParser struct {
222+
* func GetUrls(url string) []string {}
223+
* }
224+
*/
225+
226+
func crawl(startUrl string, htmlParser HtmlParser) []string {
227+
var ans []string
228+
vis := make(map[string]bool)
229+
var dfs func(url string)
230+
host := func(url string) string {
231+
return strings.Split(url[7:], "/")[0]
232+
}
233+
dfs = func(url string) {
234+
if vis[url] {
235+
return
236+
}
237+
vis[url] = true
238+
ans = append(ans, url)
239+
for _, next := range htmlParser.GetUrls(url) {
240+
if host(next) == host(url) {
241+
dfs(next)
242+
}
243+
}
244+
}
245+
dfs(startUrl)
246+
return ans
247+
}
107248
```
108249

109250
### **...**

solution/1200-1299/1236.Web Crawler/README_EN.md

Lines changed: 142 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,18 +87,159 @@ startUrl = &quot;http://news.google.com&quot;
8787

8888
## Solutions
8989

90+
DFS.
91+
9092
<!-- tabs:start -->
9193

9294
### **Python3**
9395

9496
```python
95-
97+
# """
98+
# This is HtmlParser's API interface.
99+
# You should not implement it, or speculate about its implementation
100+
# """
101+
# class HtmlParser(object):
102+
# def getUrls(self, url):
103+
# """
104+
# :type url: str
105+
# :rtype List[str]
106+
# """
107+
108+
class Solution:
109+
def crawl(self, startUrl: str, htmlParser: 'HtmlParser') -> List[str]:
110+
def host(url):
111+
url = url[7:]
112+
return url.split('/')[0]
113+
114+
def dfs(url):
115+
if url in ans:
116+
return
117+
ans.add(url)
118+
for next in htmlParser.getUrls(url):
119+
if host(url) == host(next):
120+
dfs(next)
121+
122+
ans = set()
123+
dfs(startUrl)
124+
return list(ans)
96125
```
97126

98127
### **Java**
99128

100129
```java
130+
/**
131+
* // This is the HtmlParser's API interface.
132+
* // You should not implement it, or speculate about its implementation
133+
* interface HtmlParser {
134+
* public List<String> getUrls(String url) {}
135+
* }
136+
*/
137+
138+
class Solution {
139+
private Set<String> ans;
140+
141+
public List<String> crawl(String startUrl, HtmlParser htmlParser) {
142+
ans = new HashSet<>();
143+
dfs(startUrl, htmlParser);
144+
return new ArrayList<>(ans);
145+
}
146+
147+
private void dfs(String url, HtmlParser htmlParser) {
148+
if (ans.contains(url)) {
149+
return;
150+
}
151+
ans.add(url);
152+
for (String next : htmlParser.getUrls(url)) {
153+
if (host(next).equals(host(url))) {
154+
dfs(next, htmlParser);
155+
}
156+
}
157+
}
158+
159+
private String host(String url) {
160+
url = url.substring(7);
161+
return url.split("/")[0];
162+
}
163+
}
164+
```
165+
166+
### **C++**
167+
168+
```cpp
169+
/**
170+
* // This is the HtmlParser's API interface.
171+
* // You should not implement it, or speculate about its implementation
172+
* class HtmlParser {
173+
* public:
174+
* vector<string> getUrls(string url);
175+
* };
176+
*/
177+
178+
class Solution {
179+
public:
180+
vector<string> ans;
181+
unordered_set<string> vis;
182+
183+
vector<string> crawl(string startUrl, HtmlParser htmlParser) {
184+
dfs(startUrl, htmlParser);
185+
return ans;
186+
}
187+
188+
void dfs(string& url, HtmlParser& htmlParser) {
189+
if (vis.count(url)) return;
190+
vis.insert(url);
191+
ans.push_back(url);
192+
for (string next : htmlParser.getUrls(url))
193+
if (host(url) == host(next))
194+
dfs(next, htmlParser);
195+
}
196+
197+
string host(string url) {
198+
int i = 7;
199+
string res;
200+
for (; i < url.size(); ++i)
201+
{
202+
if (url[i] == '/') break;
203+
res += url[i];
204+
}
205+
return res;
206+
}
207+
};
208+
```
101209
210+
### **Go**
211+
212+
```go
213+
/**
214+
* // This is HtmlParser's API interface.
215+
* // You should not implement it, or speculate about its implementation
216+
* type HtmlParser struct {
217+
* func GetUrls(url string) []string {}
218+
* }
219+
*/
220+
221+
func crawl(startUrl string, htmlParser HtmlParser) []string {
222+
var ans []string
223+
vis := make(map[string]bool)
224+
var dfs func(url string)
225+
host := func(url string) string {
226+
return strings.Split(url[7:], "/")[0]
227+
}
228+
dfs = func(url string) {
229+
if vis[url] {
230+
return
231+
}
232+
vis[url] = true
233+
ans = append(ans, url)
234+
for _, next := range htmlParser.GetUrls(url) {
235+
if host(next) == host(url) {
236+
dfs(next)
237+
}
238+
}
239+
}
240+
dfs(startUrl)
241+
return ans
242+
}
102243
```
103244

104245
### **...**
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/**
2+
* // This is the HtmlParser's API interface.
3+
* // You should not implement it, or speculate about its implementation
4+
* class HtmlParser {
5+
* public:
6+
* vector<string> getUrls(string url);
7+
* };
8+
*/
9+
10+
class Solution {
11+
public:
12+
vector<string> ans;
13+
unordered_set<string> vis;
14+
15+
vector<string> crawl(string startUrl, HtmlParser htmlParser) {
16+
dfs(startUrl, htmlParser);
17+
return ans;
18+
}
19+
20+
void dfs(string& url, HtmlParser& htmlParser) {
21+
if (vis.count(url)) return;
22+
vis.insert(url);
23+
ans.push_back(url);
24+
for (string next : htmlParser.getUrls(url))
25+
if (host(url) == host(next))
26+
dfs(next, htmlParser);
27+
}
28+
29+
string host(string url) {
30+
int i = 7;
31+
string res;
32+
for (; i < url.size(); ++i)
33+
{
34+
if (url[i] == '/') break;
35+
res += url[i];
36+
}
37+
return res;
38+
}
39+
};

0 commit comments

Comments
 (0)