CasperJS in 40 minutes

The Ultimate Guidebook :)

Twitter: @krzkot

Github: lis2

Email: kotlarek.krzysztof@gmail.com

What are we going to do?


Css and Xpath


      $$("")
      



  • p + a
  • p > a
  • a[href*="casper"]
  • a[href^="casper"]
  • a[href&="casper"]
  • p a:nth-child(3)

When Xpath?

Contract Type
x("//span[.='Contract Type']/parent::td/following-sibling::td//input")
Contract Type

Homework

Css selectors

  1. http://www.w3schools.com/cssref/css_selectors.asp
  2. http://code.tutsplus.com/tutorials/the-30-css-selectors-you-must-memorize--net-16048
  3. https://developer.mozilla.org/en-US/docs/Web/Guide/CSS/Getting_started/Selectors

Xpath selectors

  1. http://en.wikipedia.org/wiki/XPath
  2. http://www.w3schools.com/xpath/

Practice!

Js promise


var makePromiseWithSon = function() {
    SonService.getWeather()
        .then(function(data) {
            if (data.forecast==='good') {
                prepareFishingTrip();
            } else {
                prepareSundayRoastDinner();
            }
        }, function(error) {
            prepareSundayRoastDinner();
        });
};
        

var request = require('request');

function scrapeGoogle() {
  request('http://www.google.com.au', function (error, response, body) {
    if (!error && response.statusCode == 200) {
      return body;
    }
  })
}

var content = scrapeGoogle();
console.log(content);
        

~/p/casperjs git:master ✱ ◼ ❯❯❯ node promise.js
undefined
        

var request = require('request');

function scrapeGoogle() {
  request('http://www.google.com.au', function (error, response, body) {
    if (!error && response.statusCode == 200) {
      return body;
    }
  })
}

var content = scrapeGoogle();
console.log(content);
        

var request = require('request');
Q = require('q')

function scrapeGoogle() {
  var deferred = Q.defer();
  request('http://www.google.com.au', function (error, response, body) {
    if (!error && response.statusCode == 200) {
      deferred.resolve(body)
    } else {
      deferred.reject("Something went wrong")
    }
  })
  return deferred.promise;
}

scrapeGoogle().then(function(body) {
  console.log(body)
})
      

var request = require('request');
Q = require('q')

function scrapeGoogle() {
  var deferred = Q.defer();
  request('http://www.goooooooooooooooogle.com.au', function (error, response, body) {
    if (!error && response.statusCode == 200) {
      deferred.resolve(body)
    } else {
      deferred.reject("Something went wrong")
    }
  })
  return deferred.promise;
}

scrapeGoogle().then(function(body) {
  console.log(body);
}, function(error) {
  console.log(error);
});
      

~/p/casperjs git:master ⏎ ✱ ◼ ❯❯❯ node promise.js
Something went wrong
      

Homework

  1. http://andyshora.com/promises-angularjs-explained-as-cartoon.html
  2. https://www.npmjs.com/package/q

CasperJS


var casper = require('casper').create();

casper.start('http://google.com.au/', function() {
    this.echo(this.getTitle());
});

casper.run();
      

~/p/casperjs git:master ✱ ◼ ❯❯❯ casperjs casperjs.js
Google
    


var casper = require('casper').create();

casper.start('http://google.com.au/', function() {
    this.echo(this.getTitle());
});

casper.then(function() {
  f = {};
  f['q'] = 'Jn Solutions';
  this.fill('form[action="/search"]', f, false);
});

casper.run();
    

var casper = require('casper').create();
var fs = require('fs');

casper.start('http://google.com.au/', function() {
    this.echo(this.getTitle());
});

casper.then(function() {
  f = {};
  f['q'] = 'Jn Solutions';
  this.fill('form[action="/search"]', f, false);

  fs.write('google.html', this.getHTML());
  this.capture('google.png');
});

casper.run();
    

var casper = require('casper').create();

casper.start('http://google.com.au/', function() {
    this.echo(this.getTitle());
});

casper.then(function() {
  f = {};
  f['q'] = 'Jn Solutions';
  this.fill('form[action="/search"]', f, true);

  this.capture('google2.png');
});

casper.run();
    

casper.then(function() {
  f = {};
  f['q'] = 'Jn Solutions';
  this.fill('form[action="/search"]', f, true);

});

casper.then(function() {
  this.click("td.b a");
  this.capture("second_page.png");
});
    

...

casper.then(function() {
  f = {};
  f['q'] = 'Jn Solutions';
  this.fill('form[action="/search"]', f, true);

});

casper.then(function() {
  this.click("td.b a");

  this.waitForSelector("#ires", function() {
    this.capture("final.png");
  }, function() {
    this.die("Something went wrong")
  }, 4000);
});
    

casper.then(function() {
  f = {};
  f['q'] = 'Jn Solutions';
  this.fill('form[action="/search"]', f, true);

});

casper.then(function() {
  this.click("td.b a");

  this.waitForSelector("#iresTESTTESTTEST", function() {
    this.capture("final.png");
  }, function() {
    this.die("Something went wrong")
  }, 4000);
});
    

~/p/casperjs git:master ⏎ ✱ ◼ ❯❯❯ casperjs casperjs.js
Something went wrong
    

evaluate()

Evaluates an expression in the current page DOM context

As a reminder, think of the evaluate() method as a gate between the CasperJS environment and the one of the page you have opened; everytime you pass a closure to evaluate(), you’re entering the page and execute code as if you were using the browser console.

function getLinks() {
    var links = document.querySelectorAll('h3.r a');
    return Array.prototype.map.call(links, function(e) {
        return (e.getAttribute('href') + '\r\n');
    });
}

casper.then(function() {
  this.click("td.b a");

  this.waitForSelector("#ires", function() {
    var links = this.evaluate(getLinks);
    this.echo(links);
  }, function() {
    this.die("Something went wrong")
  }, 4000);
});
    

~/p/casperjs git:master ✱ ◼ ❯❯❯ casperjs casperjs.js
/url?q=http://www.jnsolutions.com/&sa=U&ei=9SEHVd_RI4H6PL6TgYgC&ved=0CBsQFjAAOAo&usg=AFQjCNEzy8cP86l_d_zPiE_TBHK92tQ6Ug
,/url?q=http://www.jnsolutions.com/solut.htm&sa=U&ei=9SEHVd_RI4H6PL6TgYgC&ved=0CCEQFjABOAo&usg=AFQjCNEFi3Oz5AnBwhgZf2T-L7FxuRecmA
,/url?q=http://www.jnsolutions.com/about.htm&sa=U&ei=9SEHVd_RI4H6PL6TgYgC&ved=0CCYQFjACOAo&usg=AFQjCNHKVZoIQ6KbYZmzx3B0S7sVP1DyBg
,/url?q=https://www.facebook.com/pages/JN-Solutions-Inc/114192285259833&sa=U&ei=9SEHVd_RI4H6PL6TgYgC&ved=0CCwQFjADOAo&usg=AFQjCNGQbCHC-62jafDgg2m_WYKPU8LW_w
,/url?q=http://www.yelp.com.au/biz/jn-solutions-skaelskor&sa=U&ei=9SEHVd_RI4H6PL6TgYgC&ved=0CDEQFjAEOAo&usg=AFQjCNHSOq5KUcKfczl5nR1ROm4gBDWQBA
,/url?q=http://www.aroundyou.com.au/place/businesses/jn-solutions&sa=U&ei=9SEHVd_RI4H6PL6TgYgC&ved=0CDYQFjAFOAo&usg=AFQjCNE8m9x2NN0dpuTweev1xgaQIVuOTg
,/url?q=https://twitter.com/jnsolutions&sa=U&ei=9SEHVd_RI4H6PL6TgYgC&ved=0CDsQFjAGOAo&usg=AFQjCNFDdJRsZS6h_GdPMaTB7_mVj5BWHw
,/url?q=https://github.com/JN-Solutions&sa=U&ei=9SEHVd_RI4H6PL6TgYgC&ved=0CEAQFjAHOAo&usg=AFQjCNFMeZVN-pSlgM2wmp9SQDHKPa7qSg
,/url?q=https://local.yahoo.com/info-163733501-jn-solutions-incorporated-caledonia&sa=U&ei=9SEHVd_RI4H6PL6TgYgC&ved=0CEUQFjAIOAo&usg=AFQjCNHSRuGsXc9X9aNVZePhkUZ3V3mKDQ
,/url?q=http://www.hotfrog.com.au/Companies/Jn-Solutions_2758142&sa=U&ei=9SEHVd_RI4H6PL6TgYgC&ved=0CEoQFjAJOAo&usg=AFQjCNErW1wUNwTcH6BCXnQbBPrYxCsATQ
    

Google scraper

https://gist.github.com/anonymous/c87b335e1cf09a0e4443

Command Line Options


  var username = c.cli.options["username"];
  var password = c.cli.options["password"];

  esr_pages.login(username, password);

  if(casper.cli.options["subscribe"]) {
    createSubscription(c.cli.options["contract"], c.cli.options["subscribe"]);
  }

  if(casper.cli.options["delete"]) {
    deleteSubscription(c.cli.options["contract"], c.cli.options["delete"])
  }

  if(casper.cli.options["list"]) {}
    listSubscriptions()
  }

  casper.run()
    

casperjs ./script/esr.coffee --subscribe='JSC01' --password='secr3t' --username='login'
    

function esr_subscribe(job) {
  var deferred = Q.defer();
  var data = JSON.parse(job.data);
  var login_details = @get_username_and_password(job.organisation_id);
  cmd = "casperjs ./script/esr.coffee --subscribe='#{data.subscription}' --password='#{login_details.password}' --username='#{login_details.username}'"

  shell.exec(cmd, function(code, output) {
    if(code == 0) {
      deferred.resolve({ status: "Success", "response_data": output});
    } else {
      deferred.reject({ status: "Error", "response_data": output});
    }
  )}
  deferred.promise
    

Homework

CasperJS
  1. http://victorwyee.com/js/webscraping-with-casperjs-phantomjs/
  2. http://casperjs.readthedocs.org/en/latest/quickstart.html
Modules
  1. http://www.sitepoint.com/understanding-module-exports-exports-node-js/

Useful modules

  1. Q
  2. Request
  3. fs

Useful modules

  1. Q
  2. Request
  3. fs
  4. ShellJS

Useful modules

  1. Q
  2. Request
  3. fs
  4. ShellJS
  5. mysql

Useful modules

  1. Q
  2. Request
  3. fs
  4. ShellJS
  5. mysql
  6. config

Useful modules

  1. Q
  2. Request
  3. fs
  4. ShellJS
  5. mysql
  6. config
  7. Keystone/Swift Awesome JNS Module

Thank you