From 09bd2058a92c4e8e8db57bb769bc5c457724254b Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 31 Jul 2015 15:35:00 +0530 Subject: [PATCH 1/3] Added Simple_HTTP --- test/Simple_Http.js | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 test/Simple_Http.js diff --git a/test/Simple_Http.js b/test/Simple_Http.js new file mode 100644 index 0000000..5ee307e --- /dev/null +++ b/test/Simple_Http.js @@ -0,0 +1,38 @@ +var + jscrap = require('jscrap'), + start = new Date(); +var port = 3000; +var ip ="127.0.0.1"; +// HTTP server setup +var http = require('http'); +http.createServer(function (req, res) { + + res.writeHead(200, { // Tell Browser to wait + 'Content-Type': 'text/plain' + }); + + function echoData(text1, text2) { + + res.end(text1 + " | " + text2); + } + + function scrapData(callback) { + jscrap.scrap("https://www.kernel.org/", { + debug: true + }, function (err, $) { + text1 = "Latest Linux Kernel: " + $("article #latest_link > a").text(); + text2 = "Released: " + $("article #releases tr:first-child td:nth-child(3)").text(); + + if (err) { + console.log(err); + } + + callback(text1, text2); + }); + }; + + scrapData(echoData); + + +}).listen(port, ip); +console.log('Server running at http://127.0.0.1:1337/'); \ No newline at end of file From 4a3c52566debdde0de6d4cb9c97e1c04868caf91 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 31 Jul 2015 16:19:23 +0530 Subject: [PATCH 2/3] Added .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b59f7e3 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +test/ \ No newline at end of file From 73431bf139857d06fd8b8ede622ae0dc43687d11 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 31 Jul 2015 19:44:46 +0530 Subject: [PATCH 3/3] Updated README --- README.md | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 028ea88..2e10878 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,16 @@ -# jscrap: A very easy-to-use and lighweight web scrapper +# jscrap : A very easy-to-use and lighweight web scrapper + `jscrap` is a very fast and easy-to-use web scrapper for node.js -# Installing +### Installing +```npm npm install jscrap +``` -# Having fun - +### Example: +```javascript var jscrap = require('jscrap'); @@ -15,16 +18,18 @@ console.log("Latest Linux Kernel: ",$("article #latest_link > a").text().trim()); console.log("Released: ",$("article #releases tr:first-child td:nth-child(3)").text()); }); - -# Supported selectors: +``` +### Supported selectors: `jscrap` supports all the [zcsel](https://www.npmjs.org/package/zcsel) selectors and functions. Watch out [zcsel](https://www.npmjs.org/package/zcsel) documentation. -# Options +### Options -The `scrap()` function supports these options: +The __`scrap()`__ function supports these options: -`debug` : Activates the debug mode. Defaults to `false`. -`followRedirects` : Number of redirects to follow. Defaults to `3`. -`charsetEncoding` : Document charset. Default to `utf-8`. +* __`debug`__ : Activates the debug mode. Defaults to `false`. +* __`followRedirects`__ : Number of redirects to follow. Defaults to `3`. +* __`charsetEncoding`__ : Document charset. Default to `utf-8`. +* __`headers`__ : Headers to pass with request. `Not set` by Default. +* __`timeout`__ : Timeout for request. `null` by Default. \ No newline at end of file