Commit 4a781219 authored by Mehmet Kozan's avatar Mehmet Kozan

minor bugs fixed.

parent 2ecb3f2d
Pipeline #15603992 passed with stages
in 2 minutes and 28 seconds
root = true
[*]
indent_style = tab
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
[*.yml]
indent_style = space
indent_size = 2
{
"cSpell.words": [
"gettype",
"mailto",
"npmjs",
"querycount",
"samelevel",
"subdomain",
"updomain",
"uplevel"
]
}
\ No newline at end of file
This diff is collapsed.
{
"name": "crawler-url-parser",
"version": "2.0.0",
"description": "An `URL` parser for crawling purpose.",
"main": "crawler-url-parser.js",
"keywords": [
"crawler-url-parser",
"url-parser",
"extract-url",
"url-parse",
"is-parent-url",
"is-child-url",
"url",
"parser",
"parse",
"crawler",
"extract",
"extractor",
"absolute",
"relative",
"child",
"parent",
"normalize",
"normalization",
"uri",
"href",
"trim",
"crawling",
"subdomain",
"external",
"internal",
"parse-url",
"urlparser",
"fast-url-parser"
],
"dependencies": {
"cheerio": "^1.0.0-rc.2",
"psl": "^1.1.20",
"url": "^0.11.0",
"url-clean": "1.0.2"
},
"devDependencies": {
"mocha": "^4.0.1",
"path": "^0.12.7",
"crawler-request": "^1.1.3"
},
"scripts": {
"start": "node crawler-url-parser.js",
"test": "mocha --recursive"
},
"bugs": {
"url": "https://gitlab.com/autokent/crawler-url-parser/issues",
"email": "[email protected]"
},
"homepage": "https://gitlab.com/autokent/crawler-url-parser",
"repository": {
"type": "git",
"url": "https://gitlab.com/autokent/crawler-url-parser.git"
},
"author": "Mehmet Kozan <[email protected]>",
"license": "MIT",
"engines": {
"node": ">=6.8.1"
}
"name": "crawler-url-parser",
"version": "2.0.1",
"description": "An `URL` parser for crawling purpose.",
"main": "crawler-url-parser.js",
"keywords": [
"crawler-url-parser",
"url-parser",
"extract-url",
"url-parse",
"is-parent-url",
"is-child-url",
"url",
"parser",
"parse",
"crawler",
"extract",
"extractor",
"absolute",
"relative",
"child",
"parent",
"normalize",
"normalization",
"uri",
"href",
"trim",
"crawling",
"subdomain",
"external",
"internal",
"parse-url",
"urlparser",
"fast-url-parser"
],
"dependencies": {
"cheerio": "^1.0.0-rc.2",
"psl": "^1.1.20",
"url": "^0.11.0",
"url-clean": "1.0.2"
},
"devDependencies": {
"mocha": "^4.0.1",
"path": "^0.12.7",
"crawler-request": "^1.1.3"
},
"scripts": {
"start": "node crawler-url-parser.js",
"test": "mocha --recursive"
},
"bugs": {
"url": "https://gitlab.com/autokent/crawler-url-parser/issues",
"email": "[email protected]"
},
"homepage": "https://gitlab.com/autokent/crawler-url-parser",
"repository": {
"type": "git",
"url": "https://gitlab.com/autokent/crawler-url-parser.git"
},
"author": "Mehmet Kozan <[email protected]>",
"license": "MIT",
"engines": {
"node": ">=6.8.1"
}
}
This diff is collapsed.
const assert = require('assert');
const cup = require("../");
const cr = require("crawler-request");
describe('crawler test 01', function () {
this.timeout(10000);
let url = 'https://github.com/Microsoft';
it(`should pass for ${url}`, function () {
let res = cup.parse("#start-of-content", url);
assert.equal(res.normalized, "https://github.com/Microsoft");
});
});
describe('crawler test 02', function () {
this.timeout(10000);
let url = 'http://journals.tubitak.gov.tr';
it(`should pass for ${url}`, function () {
return cr(url).then(function (result) {
let urls = cup.extract(result.html, url);
assert.notEqual(urls.length, 0);
});
});
});
describe('crawler test 03', function () {
this.timeout(10000);
let url = 'http://journals.tubitak.gov.tr/';
it(`should pass for ${url}`, function () {
return cr(url).then(function (result) {
let urls = cup.extract(result.html, url);
assert.notEqual(urls.length, 0);
});
});
});
describe('crawler test 04', function () {
this.timeout(10000);
let url = 'https://github.com/Microsoft';
it(`should pass for ${url}`, function () {
return cr(url).then(function (result) {
let urls = cup.extract(result.html, url);
assert.notEqual(urls.length, 0);
});
});
});
describe('crawler test 05', function () {
this.timeout(10000);
let url = 'https://github.com/Microsoft/';
it(`should pass for ${url}`, function () {
return cr(url).then(function (result) {
let urls = cup.extract(result.html, url);
assert.notEqual(urls.length, 0);
});
});
});
describe('crawler test 06', function () {
this.timeout(10000);
let url = 'https://www.npmjs.com/package/electron-window-manager';
it(`should pass for ${url}`, function () {
return cr(url).then(function (result) {
debugger;
let urls = cup.extract(result.html, url);
assert.notEqual(urls.length, 0);
});
});
});
describe('crawler test 06', function () {
this.timeout(10000);
let url = 'https://www.npmjs.com/package/electron-window-manager';
it(`should pass for ${url}`, function () {
return cr(url).then(function (result) {
debugger;
let urls = cup.extract(result.html, url);
assert.notEqual(urls.length, 0);
});
});
});
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment