howetown翻译(How to scrape web pages with PhantomJS and jQuery JavaScript phantomjs, scrape, jquery)
导读:How to scrape web pages with PhantomJS and jQuery...
How to scrape web pages with PhantomJS and jQuery
This is an example of how to scrape the web using PhantomJS and jQuery:
1 var page = new WebPage(), 2 url = http://localhost/a-search-form, 3 stepIndex = 0; 4 5 /** 6 * From PhantomJS documentation: 7 * This callback is invoked when there is a JavaScript console. The callback may accept up to three arguments: 8 * the string for the message, the line number, and the source identifier. 9 */ 10 page.onConsoleMessage = function (msg, line, source) { 11 console.log(console> + msg); 12 }; 13 14 /** 15 * From PhantomJS documentation: 16 * This callback is invoked when there is a JavaScript alert. The only argument passed to the callback is the string for the message. 17 */ 18 page.onAlert = function (msg) { 19 console.log(alert!!> + msg); 20 }; 21 22 // Callback is executed each time a page is loaded... 23 page.open(url, function (status) { 24 if (status === success) { 25 // State is initially empty. State is persisted between page loads and can be used for identifying which page were on. 26 console.log(============================================); 27 console.log(Step " + stepIndex + "); 28 console.log(============================================); 29 30 // Inject jQuery for scraping (you need to save jquery-1.6.1.min.js in the same folder as this file) 31 page.injectJs(jquery-1.6.1.min.js); 32 33 // Our "event loop" 34 if(!phantom.state){ 35 initialize(); 36 } else { 37 phantom.state(); 38 } 39 40 // Save screenshot for debugging purposes 41 page.render("step" + stepIndex++ + ".png"); 42 } 43 }); 44 45 // Step 1 46 function initialize() { 47 page.evaluate(function() { 48 $(form#search input.query).val(Jebus saves); 49 $(form#search).submit(); 50 console.log(Searching...); 51 }); 52 // Phantom state doesnt change between page reloads 53 // We use the state to store the search result handler, ie. the next step 54 phantom.state = parseResults; 55 } 56 57 // Step 2 58 function parseResults() { 59 page.evaluate(function() { 60 $(#search-result a).each(function(index, link) { 61 console.log($(link).attr(href)); 62 }) 63 console.log(Parsed results); 64 }); 65 // If there was a 3rd step we could point to another function 66 // but we would have to reload the page for the callback to be called again 67 phantom.exit(); 68 }
声明:本站所有文章,如无特殊说明或标注,均为本站原创发布。任何个人或组织,在未征得本站同意时,禁止复制、盗用、采集、发布本站内容到任何网站、书籍等各类媒体平台。如若本站内容侵犯了原著者的合法权益,可联系我们进行处理。
创心域SEO版权声明:以上内容作者已申请原创保护,未经允许不得转载,侵权必究!授权事宜、对本内容有异议或投诉,敬请联系网站管理员,我们将尽快回复您,谢谢合作!