Skip to content

Commit 540f091

Browse files
author
Evgheni C
committed
Combine Unknown and Unsupported protocols
1 parent adb8894 commit 540f091

File tree

1 file changed

+50
-52
lines changed

1 file changed

+50
-52
lines changed

crawling-daemon.js

Lines changed: 50 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,10 @@ process.on("message", function(data)
6969
{
7070
sitemap_content +=
7171
'<url>' +
72-
'<loc>'+ doc.url +'</loc>' +
73-
// '<lastmod>---</lastmod>' +
74-
// '<changefreq>---</changefreq>' +
75-
'<priority>0.7</priority>' +
72+
'<loc>'+ doc.url +'</loc>' +
73+
// '<lastmod>---</lastmod>' +
74+
// '<changefreq>---</changefreq>' +
75+
'<priority>0.7</priority>' +
7676
'</url>';
7777
})
7878

@@ -302,37 +302,35 @@ function checkUrl()
302302
* @return {String} The revised link
303303
*/
304304
function check_link(link, parent_link) {
305-
// check for "empty" links
306-
if (link===undefined) {
307-
return false;
308-
} else if (link==='' || link=='#') {
309-
return false;
310-
}
311-
// parse the link
312-
parts = url.parse(link);
313-
// check the scheme
314-
if (parts.protocol=='mailto' || parts.protocol=='javascript' || parts.protocol=='ftp') {
315-
// incompatible protocol
316-
return false;
317-
} else if (parts.protocol=='http' || parts.protocol=='https') {
318-
// make sure host is our ___domain
319-
if (parts.host!=scrapeHost) {
320-
return false;
321-
}
322-
} else if (link.indexOf('//')===0) {
323-
// handle schema-less; ensure host is ours
324-
if (link.indexOf('//' + scrapeHost)!==0) {
325-
return false;
326-
}
327-
link = 'http:' + link;
328-
} else if (parts.protocol) {
329-
// unknown protocol
330-
return false;
331-
} else {
332-
// relative link
333-
link = url.resolve(parent_link, link);
334-
}
335-
return link;
305+
// check for "empty" links
306+
if (link===undefined) {
307+
return false;
308+
} else if (link==='' || link=='#') {
309+
return false;
310+
}
311+
// parse the link
312+
parts = url.parse(link);
313+
314+
// check the scheme
315+
if (parts.protocol=='http' || parts.protocol=='https') {
316+
// make sure host is our ___domain
317+
if (parts.host!=scrapeHost) {
318+
return false;
319+
}
320+
} else if (link.indexOf('//')===0) {
321+
// handle schema-less; ensure host is ours
322+
if (link.indexOf('//' + scrapeHost)!==0) {
323+
return false;
324+
}
325+
link = 'http:' + link;
326+
} else if (parts.protocol) {
327+
// unknown or unsupported protocol
328+
return false;
329+
} else {
330+
// relative link
331+
link = url.resolve(parent_link, link);
332+
}
333+
return link;
336334
}
337335

338336
function make_request(protocol, host, path, depth, callback)
@@ -397,27 +395,27 @@ function randomString(len)
397395

398396
function bytesToSize(bytes, precision)
399397
{
400-
var kilobyte = 1024;
401-
var megabyte = kilobyte * 1024;
402-
var gigabyte = megabyte * 1024;
403-
var terabyte = gigabyte * 1024;
398+
var kilobyte = 1024;
399+
var megabyte = kilobyte * 1024;
400+
var gigabyte = megabyte * 1024;
401+
var terabyte = gigabyte * 1024;
404402

405-
if ((bytes >= 0) && (bytes < kilobyte)) {
406-
return bytes + ' B';
403+
if ((bytes >= 0) && (bytes < kilobyte)) {
404+
return bytes + ' B';
407405

408-
} else if ((bytes >= kilobyte) && (bytes < megabyte)) {
409-
return (bytes / kilobyte).toFixed(precision) + ' KB';
406+
} else if ((bytes >= kilobyte) && (bytes < megabyte)) {
407+
return (bytes / kilobyte).toFixed(precision) + ' KB';
410408

411-
} else if ((bytes >= megabyte) && (bytes < gigabyte)) {
412-
return (bytes / megabyte).toFixed(precision) + ' MB';
409+
} else if ((bytes >= megabyte) && (bytes < gigabyte)) {
410+
return (bytes / megabyte).toFixed(precision) + ' MB';
413411

414-
} else if ((bytes >= gigabyte) && (bytes < terabyte)) {
415-
return (bytes / gigabyte).toFixed(precision) + ' GB';
412+
} else if ((bytes >= gigabyte) && (bytes < terabyte)) {
413+
return (bytes / gigabyte).toFixed(precision) + ' GB';
416414

417-
} else if (bytes >= terabyte) {
418-
return (bytes / terabyte).toFixed(precision) + ' TB';
415+
} else if (bytes >= terabyte) {
416+
return (bytes / terabyte).toFixed(precision) + ' TB';
419417

420-
} else {
421-
return bytes + ' B';
422-
}
418+
} else {
419+
return bytes + ' B';
420+
}
423421
}

0 commit comments

Comments
 (0)