diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b59f7e3 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +test/ \ No newline at end of file diff --git a/README.md b/README.md index 028ea88..2e10878 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,16 @@ -# jscrap: A very easy-to-use and lighweight web scrapper +# jscrap : A very easy-to-use and lighweight web scrapper + `jscrap` is a very fast and easy-to-use web scrapper for node.js -# Installing +### Installing +```npm npm install jscrap +``` -# Having fun - +### Example: +```javascript var jscrap = require('jscrap'); @@ -15,16 +18,18 @@ console.log("Latest Linux Kernel: ",$("article #latest_link > a").text().trim()); console.log("Released: ",$("article #releases tr:first-child td:nth-child(3)").text()); }); - -# Supported selectors: +``` +### Supported selectors: `jscrap` supports all the [zcsel](https://www.npmjs.org/package/zcsel) selectors and functions. Watch out [zcsel](https://www.npmjs.org/package/zcsel) documentation. -# Options +### Options -The `scrap()` function supports these options: +The __`scrap()`__ function supports these options: -`debug` : Activates the debug mode. Defaults to `false`. -`followRedirects` : Number of redirects to follow. Defaults to `3`. -`charsetEncoding` : Document charset. Default to `utf-8`. +* __`debug`__ : Activates the debug mode. Defaults to `false`. +* __`followRedirects`__ : Number of redirects to follow. Defaults to `3`. +* __`charsetEncoding`__ : Document charset. Default to `utf-8`. +* __`headers`__ : Headers to pass with request. `Not set` by Default. +* __`timeout`__ : Timeout for request. `null` by Default. \ No newline at end of file diff --git a/test/Simple_Http.js b/test/Simple_Http.js new file mode 100644 index 0000000..5ee307e --- /dev/null +++ b/test/Simple_Http.js @@ -0,0 +1,38 @@ +var + jscrap = require('jscrap'), + start = new Date(); +var port = 3000; +var ip ="127.0.0.1"; +// HTTP server setup +var http = require('http'); +http.createServer(function (req, res) { + + res.writeHead(200, { // Tell Browser to wait + 'Content-Type': 'text/plain' + }); + + function echoData(text1, text2) { + + res.end(text1 + " | " + text2); + } + + function scrapData(callback) { + jscrap.scrap("https://www.kernel.org/", { + debug: true + }, function (err, $) { + text1 = "Latest Linux Kernel: " + $("article #latest_link > a").text(); + text2 = "Released: " + $("article #releases tr:first-child td:nth-child(3)").text(); + + if (err) { + console.log(err); + } + + callback(text1, text2); + }); + }; + + scrapData(echoData); + + +}).listen(port, ip); +console.log('Server running at http://127.0.0.1:1337/'); \ No newline at end of file