Commit 2ecb3f2d authored by Mehmet Kozan's avatar Mehmet Kozan

v2.0.0 released.

parent facc3f9f
Pipeline #15599254 passed with stages
in 3 minutes and 50 seconds
package-lock=false
{
}
\ No newline at end of file
......@@ -17,14 +17,76 @@
```js
const cup = require('crawler-url-parser');
//// parse(current_url,base_url)
let url = cup.parse("../ddd","http://question.stackoverflow.com/aaa/bbb/ccc/");
console.log(url.normalized);//http://question.stackoverflow.com/aaa/bbb/ddd
console.log(url.host); // question.stackoverflow.com
console.log(url.domain); // stackoverflow.com
console.log(url.subdomain); // question
console.log(url.protocol); // http:
console.log(url.path); // /aaa/bbb/ddd
//// parse(current_url[,base_url])
let result = cup.parse("http://question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2");
console.log(result.url);
// http://question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2
console.log(result.baseurl);
// null
console.log(result.normalized);
// http://question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2
console.log(result.host);
// question.stackoverflow.com
console.log(result.domain);
// stackoverflow.com
console.log(result.subdomain);
// question
console.log(result.protocol);
// http:
console.log(result.path);
// /aaa/bbb/ddd
console.log(result.search);
// q1=query1&q2=query2
console.log(result.querycount);
// 2
```
### Parse with baseURL
```js
const cup = require('crawler-url-parser');
//// parse(current_url[,base_url])
let result = cup.parse("../ddd?q1=query1&q2=query2","http://question.stackoverflow.com/aaa/bbb/ccc/");
console.log(result.url);
// http://question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2
console.log(result.baseurl);
// http://question.stackoverflow.com/aaa/bbb/ccc
console.log(result.normalized);
// http://question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2
console.log(result.host);
// question.stackoverflow.com
console.log(result.domain);
// stackoverflow.com
console.log(result.subdomain);
// question
console.log(result.protocol);
// http:
console.log(result.path);
// /aaa/bbb/ddd
console.log(result.search);
// q1=query1&q2=query2
console.log(result.querycount);
// 2
```
### Extract
......@@ -32,50 +94,55 @@ console.log(url.path); // /aaa/bbb/ddd
const cup = require('crawler-url-parser');
//// extract(html_str,current_url);
let htmlStr=
'html> \
<body> \
<a href="http://www.stackoverflow.com/internal-1">test-link-4</a><br /> \
<a href="http://www.stackoverflow.com/internal-2">test-link-5</a><br /> \
<a href="http://www.stackoverflow.com/internal-2">test-link-6</a><br /> \
<a href="http://faq.stackoverflow.com/subdomain-1">test-link-7</a><br /> \
<a href="http://faq.stackoverflow.com/subdomain-2">test-link-8</a><br /> \
<a href="http://faq.stackoverflow.com/subdomain-2">test-link-9</a><br /> \
<a href="http://www.google.com/external-1">test-link-10</a><br /> \
<a href="http://www.google.com/external-2">test-link-11</a><br /> \
<a href="http://www.google.com/external-2">test-link-12</a><br /> \
</body> \
</html>';
let currentUrl= "http://www.stackoverflow.com/aaa/bbb/ccc";
let htmlStr='<html><body> \
<a href="http://best.question.stackoverflow.com">subdomain</a><br /> \
<a href="http://faq.stackoverflow.com">subdomain</a><br /> \
<a href="http://stackoverflow.com">updomain</a><br /> \
<a href="http://www.google.com">external</a><br /> \
<a href="http://www.facebook.com">external</a><br /> \
<a href="http://question.stackoverflow.com/aaa/bbb/ccc">sublevel</a><br /> \
<a href="http://question.stackoverflow.com/aaa/bbb/zzz">sublevel</a><br /> \
<a href="http://question.stackoverflow.com/aaa/">uplevel</a><br /> \
<a href="http://question.stackoverflow.com/aaa/ddd">samelevel</a><br /> \
<a href="http://question.stackoverflow.com/aaa/eee">samelevel</a><br /> \
<a href="http://question.stackoverflow.com/aaa/ddd/eee">internal</a><br /> \
<a href="http://question.stackoverflow.com/zzz">internal</a><br /> \
</body></html>';
let currentUrl= "http://question.stackoverflow.com/aaa/bbb";
let urls = cup.extract(htmlStr,currentUrl);
console.log(urls.length); // 6
console.log(urls[0].type); //subdomain
console.log(urls[1].type); //subdomain
console.log(urls[2].type); //updomain
console.log(urls[3].type); //external
console.log(urls[4].type); //external
console.log(urls[5].type); //sublevel
console.log(urls[6].type); //sublevel
console.log(urls[7].type); //uplevel
console.log(urls[8].type); //samelevel
console.log(urls[9].type); //samelevel
console.log(urls[10].type); //internal
console.log(urls[11].type); //subdomain
```
### Level
```js
const cup = require('crawler-url-parser');
//// getlevel(current_url,base_url);
let level = cup.getlevel("sub.domain.com/aaa/bbb/","sub.domain.com/aaa/bbb/ccc");
//// gettype(current_url,base_url);
let level = cup.gettype("sub.domain.com/aaa/bbb/","sub.domain.com/aaa/bbb/ccc");
console.log(level); //sublevel
level = cup.getlevel("sub.domain.com/aaa/bbb/ccc/ddd","sub.domain.com/aaa/bbb/ccc");
level = cup.gettype("sub.domain.com/aaa/bbb/ccc/ddd","sub.domain.com/aaa/bbb/ccc");
console.log(level); //uplevel
level = cup.getlevel("sub.domain.com/aaa/bbb/eee","sub.domain.com/aaa/bbb/ccc");
level = cup.gettype("sub.domain.com/aaa/bbb/eee","sub.domain.com/aaa/bbb/ccc");
console.log(level); //samelevel
level = cup.getlevel("sub.domain.com/aaa/bbb/eee","sub.anotherdomain.com/aaa/bbb/ccc");
console.log(level); //null
```
### Query
```js
const cup = require('crawler-url-parser');
//// querycount(url)
let count = cup.querycount("sub.domain.com/aaa/bbb?q1=data1&q2=data2&q3=data3");
console.log(count); //3
level = cup.gettype("sub.domain.com/aaa/bbb/eee","sub.anotherdomain.com/aaa/bbb/ccc");
console.log(level); //external
```
## Test
......
const url = require('url');
const URL = require('url');
const psl = require('psl');
const normalizeUrl = require('normalize-url');
const cleanUrl = require('url-clean');
const cheerio = require('cheerio');
const normalize_options={
removeDirectoryIndex:false,
removeTrailingSlash:false,
stripWWW:false,
stripFragment:true,
normalizeHttps:false,
normalizeProtocol:true,
removeQueryParameters: [/^utm_\w+/i, 'ref']
}
const result_normalize_options={
removeDirectoryIndex:true,
removeTrailingSlash:true,
stripWWW:true,
stripFragment:true,
normalizeHttps:false,
normalizeProtocol:true,
removeQueryParameters: [/^utm_\w+/i, 'ref']
}
function parse(currentUrlStr,baseUrlStr)
{
let ret = {url:null,normalized:null,protocol:null,host:null,domain:null,subdomain:null,path:null}
let ret = {url:null,baseurl:null,normalized:null,protocol:null,host:null,domain:null,subdomain:null,path:null,search:null,querycount:0}
if(typeof currentUrlStr === 'undefined') return null;
let currentNormUrlStr = cleanUrl(currentUrlStr,normalize_options);
if(currentNormUrlStr === "") return null;
//check if currentUrlStr format like "aaa"
let currentNormUrlStr = normalizeUrl(currentUrlStr);
//test for normalised url like "http://aaa"
if(/^http:\/\/[^.]+$/.test(currentNormUrlStr)){
currentNormUrlStr = currentNormUrlStr.replace("http://","");
currentNormUrlStr = currentNormUrlStr.replace("/?","?");
}
let parsedUrl = url.parse(currentNormUrlStr,true,true);
let parsedUrl = URL.parse(currentNormUrlStr,true,true);
if(parsedUrl.protocol !='http:' && parsedUrl.protocol !='https:' && parsedUrl.protocol != null) return null;
//current url is relative like "abc", "/abc" or "../abc"
if(parsedUrl.host == null && typeof baseUrlStr !== "undefined")
let normalizedBaseUrl = null;
if(parsedUrl.host == null && typeof baseUrlStr !== "undefined" && baseUrlStr != null)
{
let normalizedBaseUrl = normalizeUrl(baseUrlStr,{removeTrailingSlash: false});
let parsedBaseUrl = url.parse(normalizedBaseUrl,{removeTrailingSlash: false});
let absoluteUrl = url.parse(url.resolve(parsedBaseUrl,parsedUrl));
currentUrlStr = url.format(absoluteUrl);
normalizedBaseUrl = cleanUrl(baseUrlStr,normalize_options);
ret.baseurl = normalizedBaseUrl;
let parsedBaseUrl = URL.parse(normalizedBaseUrl,normalize_options);
let absoluteUrl = URL.parse(URL.resolve(parsedBaseUrl,parsedUrl));
currentUrlStr = URL.format(absoluteUrl);
}
ret.url = currentUrlStr;
ret.normalized = normalizeUrl(currentUrlStr);
ret.normalized = cleanUrl(currentUrlStr,result_normalize_options);
if(/^http:\/\/[^.]+$/.test(ret.normalized)){
ret.normalized = ret.normalized.replace("http://","");
ret.normalized = ret.normalized.replace("/?","?");
}
parsedUrl = url.parse(ret.normalized,true,true);
parsedUrl = URL.parse(ret.normalized,true,true);
ret.protocol = parsedUrl.protocol;
ret.host = parsedUrl.host;
......@@ -50,6 +62,11 @@ function parse(currentUrlStr,baseUrlStr)
ret.subdomain = parsedHost.subdomain;
}
ret.search=parsedUrl.search;
ret.querycount = parsedUrl.search ? parsedUrl.search.split("=").length -1 : 0;
//ret.type = normalizedBaseUrl ? gettype(ret.normalized,normalizedBaseUrl):"none";
return ret;
}
......@@ -60,106 +77,92 @@ function extract(data,sourceUrl){
let $ = typeof data === "string" ? cheerio.load(data) : data;
let embedBaseUrlStr = $('base').attr('href');
let embedBaseUrl = parse(embedBaseUrlStr);
baseUrl = embedBaseUrl ? embedBaseUrl : baseUrl;
let baseUrlStr = baseUrl ? baseUrl.normalized : null;
$('a').each(function(i, el) {
let href = $(this).attr('href');
let text = $(this).text();
if(typeof href == "undefined" && href.length < 3 && /^(javascript|mailto:|ftp:)/ig.test(href)) return;
let text = $(this).text().trim();
//href = href.replace(/;.*$/g,"");
if(typeof href == "undefined" || href.length < 3 || /^(javascript|mailto:|ftp:)/ig.test(href)) return;
let currentUrl = embedBaseUrl == null ? parse(href,baseUrl.normalized) : parse(href,embedBaseUrl.normalized);
if(currentUrl == null) return;
//let currentUrl = embedBaseUrl == null ? parse(href,baseUrl.normalized) : parse(href,embedBaseUrl.normalized);
let currentUrl = parse(href,baseUrlStr);
if(!urlMap.has(currentUrl.normalized)){
urlMap.set(currentUrl.normalized,{url:currentUrl,text:text});
currentUrl.text = text == null ? "": text;
currentUrl.baseurl = baseUrlStr;
urlMap.set(currentUrl.normalized,currentUrl);
}
else{
let tmpUrl = urlMap.get(currentUrl.normalized);
if(! tmpUrl.text.includes(text)){
tmpUrl.text += ` ${text}`;
tmpUrl.text = `${tmpUrl.text} ${text}`;
}
}
});
//remove base url
urlMap.delete(baseUrl.normalized);
if(embedBaseUrl!=null){
urlMap.delete(embedBaseUrl.normalized);
}
urlMap.delete(baseUrlStr);
for (let currentUrl of urlMap.values()) {
if(baseUrl.host == currentUrl.host){
//internal
currentUrl.type="internal";
}
else if(baseUrl.domain == currentUrl.domain){
//subdomain
currentUrl.type="subdomain";
}
else{
//external
currentUrl.type="external";
}
currentUrl.type = gettype(currentUrl,baseUrl);
}
return Array.from(urlMap.values());
let retArr = Array.from(urlMap.values());
retArr = retArr.map(function(el) {
return {url:el.normalized, text:el.text, type:el.type}
});
return retArr;
}
function getlevel(current,base){
//samelevel,sublevel,uplevel
//if baseurl "sub.domain.com/aaa/bbb/ccc"
// "sub.domain.com/aaa/bbb/" - uplevel
// "sub.domain.com/aaa/ddd/" - samelevel
// "sub.domain.com/aaa/bbb/ccc/ddd" - sublevel
//else
// null
let ret = null ;
let normlizedCurrent = normalizeUrl(current);
let normlizedBase = normalizeUrl(base);
let parsedCurrentUrl = url.parse(normlizedCurrent);
let parsedBaseUrl = url.parse(normlizedBase);
if(parsedCurrentUrl.host==parsedBaseUrl.host){
let cPath = parsedCurrentUrl.pathname;
let bPath = parsedBaseUrl.pathname;
let cCount = cPath.split("/").length -1;
let bCount = bPath.split("/").length -1;
if(cCount!=bCount){
if(cPath.includes(bPath)){
ret = "uplevel";
}
else if(bPath.includes(cPath)){
ret = "sublevel"
}
function gettype(linkurl,pageurl){
if(typeof linkurl == "string") linkurl = parse(linkurl);
if(typeof pageurl == "string") pageurl = parse(pageurl);
let linkurl_subdomain_len = linkurl.subdomain ? linkurl.subdomain.length : 0;
let pageurl_subdomain_len = pageurl.subdomain ? pageurl.subdomain.length : 0;
let linkurl_path = linkurl.path ? linkurl.path : "";
let pageurl_path = pageurl.path ? pageurl.path : "";
let linkurl_parts = linkurl_path.split("/").filter(function(elem, index, array){ return elem.length > 0});
let pageurl_parts = pageurl_path.split("/").filter(function(elem, index, array){ return elem.length > 0});
if(pageurl.host == linkurl.host){
let part_count_diff = linkurl_parts.length - pageurl_parts.length;
if(part_count_diff == 0){
let linkurl_without_last_part = linkurl_path.replace(/(\/[^\/]*)[\/]?$/,"");
let pageurl_without_last_part = pageurl_path.replace(/(\/[^\/]*)[\/]?$/,"");
if(linkurl_without_last_part == pageurl_without_last_part) return "samelevel"
}
else if(cCount == bCount){
cPath = cPath.replace(/(\/[^\/]*)[\/]?$/,"");
bPath = bPath.replace(/(\/[^\/]*)[\/]?$/,"");
if(cPath == bPath){
return "samelevel";
}
else if(part_count_diff == 1){
if(linkurl_path.includes(pageurl_path)) return "sublevel";
}
else if(part_count_diff == -1){
if(pageurl_path.includes(linkurl_path)) return "uplevel";
}
}
return ret;
}
function querycount(current){
let ret = 0 ;
let normlizedCurrent = normalizeUrl(current);
let parsedCurrentUrl = url.parse(normlizedCurrent);
if(parsedCurrentUrl.search!=null){
ret = parsedCurrentUrl.search.split("=").length -1;
return "internal";
}
return ret;
else if(linkurl.domain == pageurl.domain){
if(linkurl_subdomain_len < pageurl_subdomain_len) return "updomain";
return "subdomain";
}
return "external";
}
module.exports.parse = parse;
module.exports.extract = extract;
module.exports.getlevel = getlevel;
module.exports.querycount = querycount;
module.exports.gettype = gettype;
//for testing purpose
if (!module.parent){
......@@ -169,5 +172,14 @@ if (!module.parent){
//let res2 = getlevel("sub.domain.com/aaa/bbb/ccc/ddd","sub.domain.com/aaa/bbb/ccc");
//let res3 = getlevel("sub.domain.com/aaa/bbb/eee","sub.domain.com/aaa/bbb/ccc");
//debugger;
process.exit();
//let res = parse("ddd","http://www.stackoverflow.com/aaa/bbb/ccc/");
let page = 'http://journals.tubitak.gov.tr/';
let link = 'http://journals.tubitak.gov.tr/genel/telifhakki.pdf';
let res = gettype(link,page);
debugger
res = gettype(page,link);
debugger
//process.exit();
}
\ No newline at end of file
This diff is collapsed.
{
"name": "crawler-url-parser",
"version": "1.5.1",
"version": "2.0.0",
"description": "An `URL` parser for crawling purpose.",
"main": "crawler-url-parser.js",
"keywords": [
......@@ -35,13 +35,14 @@
],
"dependencies": {
"cheerio": "^1.0.0-rc.2",
"normalize-url": "^2.0.0",
"psl": "^1.1.20",
"url": "^0.11.0"
"url": "^0.11.0",
"url-clean": "1.0.2"
},
"devDependencies": {
"mocha": "^4.0.1",
"path": "^0.12.7"
"path": "^0.12.7",
"crawler-request": "^1.1.3"
},
"scripts": {
"start": "node crawler-url-parser.js",
......
......@@ -278,7 +278,17 @@ describe('parse paths with subdomain "https://www.google.com"', function() {
});
});
describe('parse paths with invalid protocol "ftp://www.google.com"', function() {
it('should be null "ftp://www.google.com"', function() {
let res = cup.parse("ftp://www.google.com");
assert.equal(res,null);
});
});
describe('parse paths with invalid protocol "htp://www.google.com"', function() {
it('should be null "htp://www.google.com"', function() {
let res = cup.parse("htp://www.google.com");
assert.equal(res,null);
......
const assert = require('assert');
const cup = require("../");
describe('getlevel url as samelevel, sublevel, uplevel', function() {
it('should getlevel sublevel urls', function() {
let res = cup.getlevel("sub.domain.com/aaa/bbb/","sub.domain.com/aaa/bbb/ccc");
assert.equal(res,"sublevel");
describe('gettype url as samelevel, sublevel, uplevel', function() {
it('should gettype sublevel urls', function() {
let res = cup.gettype("sub.domain.com/aaa/bbb/","sub.domain.com/aaa/bbb/ccc");
assert.equal(res,"uplevel");
});
it('should getlevel uplevel urls', function() {
let res = cup.getlevel("sub.domain.com/aaa/bbb/ccc/ddd","sub.domain.com/aaa/bbb/ccc");
assert.equal(res,"uplevel");
it('should gettype uplevel urls', function() {
let res = cup.gettype("sub.domain.com/aaa/bbb/ccc/ddd","sub.domain.com/aaa/bbb/ccc");
assert.equal(res,"sublevel");
});
it('should getlevel samelevel urls', function() {
let res = cup.getlevel("sub.domain.com/aaa/bbb/eee","sub.domain.com/aaa/bbb/ccc");
it('should gettype samelevel urls', function() {
let res = cup.gettype("sub.domain.com/aaa/bbb/eee","sub.domain.com/aaa/bbb/ccc");
assert.equal(res,"samelevel");;
});
it('should handle unvalid urls', function() {
let res = cup.getlevel("sub.domain.com/aaa/bbb/eee","sub.anotherdomain.com/aaa/bbb/ccc");
assert.equal(res,null);
it('should handle invalid urls', function() {
let res = cup.gettype("sub.domain.com/aaa/bbb/eee","sub.anotherdomain.com/aaa/bbb/ccc");
assert.equal(res,"external");
});
});
......@@ -3,27 +3,27 @@ const cup = require("../");
describe('querycount url', function() {
it('should calculate urls query-0', function() {
let res = cup.querycount("sub.domain.com/aaa/bbb");
assert.equal(res,0);
let res = cup.parse("sub.domain.com/aaa/bbb");
assert.equal(res.querycount,0);
});
it('should calculate urls query-1', function() {
let res = cup.querycount("sub.domain.com/aaa/bbb?q1=data1");
assert.equal(res,1);
let res = cup.parse("sub.domain.com/aaa/bbb?q1=data1");
assert.equal(res.querycount,1);
});
it('should calculate urls query-2', function() {
let res = cup.querycount("sub.domain.com/aaa/bbb?q1=data1&q2=data2");
assert.equal(res,2);
let res = cup.parse("sub.domain.com/aaa/bbb?q1=data1&q2=data2");
assert.equal(res.querycount,2);
});
it('should calculate urls query-3', function() {
let res = cup.querycount("sub.domain.com/aaa/bbb?q1=data1&q2=data2&q3=data3");
assert.equal(res,3);
let res = cup.parse("sub.domain.com/aaa/bbb?q1=data1&q2=data2&q3=data3");
assert.equal(res.querycount,3);
});
it('should calculate urls query-4', function() {
let res = cup.querycount("sub.domain.com/aaa/bbb?q1=data1&q2=data2&q3=data3&q4=data4");
assert.equal(res,4);
let res = cup.parse("sub.domain.com/aaa/bbb?q1=data1&q2=data2&q3=data3&q4=data4");
assert.equal(res.querycount,4);
});
});
const assert = require('assert');
const cup = require("../");
var CrawlerRequest = require('crawler-request');
const fs = require('fs');
const path = require('path');
describe('05 static html', function() {
it('should pass 05 static html', function() {
let htmlPath = path.resolve(__dirname,'05_tubitak.html');
let htmlString = fs.readFileSync(htmlPath,'utf-8');
let result = cup.extract(htmlString,"http://journals.tubitak.gov.tr/");
let suplevelArr = result.filter((el, index, arr) => el.type == "sublevel");
let uplevelArr = result.filter((el, index, arr) => el.type == "uplevel");
let samelevelArr = result.filter((el, index, arr) => el.type == "samelevel");
let internalArr = result.filter((el, index, arr) => el.type == "internal");
let subdomainArr = result.filter((el, index, arr) => el.type == "subdomain");
let updomainArr = result.filter((el, index, arr) => el.type == "updomain");
let externalArr = result.filter((el, index, arr) => el.type == "external");
assert.equal(result.length,33);
assert.equal(suplevelArr.length+uplevelArr.length+samelevelArr.length+internalArr.length+subdomainArr.length+updomainArr.length+externalArr.length,33);
assert.equal(suplevelArr.length,12);
assert.equal(uplevelArr.length,0);
assert.equal(samelevelArr.length,0);
assert.equal(internalArr.length,13);
assert.equal(subdomainArr.length,2);
assert.equal(updomainArr.length,1);
assert.equal(externalArr.length,5);
});
});
This diff is collapsed.
<html>
<head>
<base href="http://question.stackoverflow.com/aaa/bbb" target="_blank">
</head>
<body>
<a href="http://best.question.stackoverflow.com">subdomain</a><br />
<a href="http://faq.stackoverflow.com">subdomain</a><br />
<a href="http://stackoverflow.com">updomain</a><br />
<a href="http://www.google.com">external</a><br />
<a href="http://www.facebook.com">external</a><br />
<a href="http://question.stackoverflow.com/aaa/bbb/ccc">sublevel</a><br />
<a href="http://question.stackoverflow.com/aaa/bbb/zzz">sublevel</a><br />
<a href="http://question.stackoverflow.com/aaa/">uplevel</a><br />
<a href="http://question.stackoverflow.com/aaa/ddd">samelevel</a><br />
<a href="http://question.stackoverflow.com/aaa/eee">samelevel</a><br />
<a href="http://question.stackoverflow.com/aaa/ddd/eee">internal</a><br />
<a href="http://question.stackoverflow.com/zzz">internal</a><br />
</body>
</html>
\ No newline at end of file
const assert = require('assert');
const cup = require("../");
const fs = require('fs');
const path = require('path');
describe('06 static html type test', function() {
it('should pass 06 static html type test', function() {
let htmlPath = path.resolve(__dirname,'06_type-test.html');
let htmlString = fs.readFileSync(htmlPath,'utf-8');
let result = cup.extract(htmlString);
let suplevelArr = result.filter((el, index, arr) => el.type == "sublevel");
let uplevelArr = result.filter((el, index, arr) => el.type == "uplevel");
let samelevelArr = result.filter((el, index, arr) => el.type == "samelevel");
let internalArr = result.filter((el, index, arr) => el.type == "internal");
let subdomainArr = result.filter((el, index, arr) => el.type == "subdomain");
let updomainArr = result.filter((el, index, arr) => el.type == "updomain");
let externalArr = result.filter((el, index, arr) => el.type == "external");
assert.equal(result.length,12);
assert.equal(suplevelArr.length+uplevelArr.length+samelevelArr.length+internalArr.length+subdomainArr.length+updomainArr.length+externalArr.length,12);
assert.equal(suplevelArr.length,2);
assert.equal(uplevelArr.length,1);
assert.equal(samelevelArr.length,2);
assert.equal(internalArr.length,2);
assert.equal(subdomainArr.length,1);
assert.equal(updomainArr.length,2);
assert.equal(externalArr.length,2);
});
});
const assert = require('assert');
const cup = require("../");
describe('07 readme parse test', function() {
it('should pass 07 readme parse test 01', function() {
let result = cup.parse("http://question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2");
assert.equal(result.baseurl,null);
assert.equal(result.domain,"stackoverflow.com");
assert.equal(result.host,"question.stackoverflow.com");
assert.equal(result.normalized,"http://question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2");
assert.equal(result.path,"/aaa/bbb/ddd");
assert.equal(result.protocol,"http:");
assert.equal(result.querycount,2);
assert.equal(result.search,"?q1=query1&q2=query2");
assert.equal(result.subdomain,"question");
assert.equal(result.url,"http://question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2");
});
it('should pass 07 readme parse test 02', function() {
let result = cup.parse("http://www.question.stackoverflow.com/aaa/bbb/ddd?q1=query1&q2=query2");