scrapingbee node
v1.7.5
ScrapingBee 是一個網頁抓取 API,可以處理無頭瀏覽器並為您輪換代理。 Node SDK 讓與 ScrapingBee 的 API 互動變得更加容易。
您可以使用 npm 安裝 ScrapingBee Node SDK。
npm install scrapingbee
ScrapingBee Node SDK 是 axios 函式庫的包裝器。 ScrapingBee 支援 GET 和 POST 請求。
註冊 ScrapingBee 以取得您的 API 金鑰和一些免費積分以開始使用。
const scrapingbee = require ( 'scrapingbee' ) ;
async function get ( url ) {
var client = new scrapingbee . ScrapingBeeClient ( 'REPLACE-WITH-YOUR-API-KEY' ) ;
var response = await client . get ( {
// The URL you want to scrape
url : url ,
params : {
// Block ads on the page you want to scrape
block_ads : false ,
// Block images and CSS on the page you want to scrape
block_resources : true ,
// Premium proxy geolocation
country_code : '' ,
// Control the device the request will be sent from
device : 'desktop' ,
// Use some data extraction rules
extract_rules : { title : 'h1' } ,
// Wrap response in JSON
json_response : false ,
// JavaScript scenario to execute (clicking on button, scrolling ...)
js_scenario : {
instructions : [
{ wait_for : '#slow_button' } ,
{ click : '#slow_button' } ,
{ scroll_x : 1000 } ,
{ wait : 1000 } ,
{ scroll_x : 1000 } ,
{ wait : 1000 } ,
] ,
} ,
// Use premium proxies to bypass difficult to scrape websites (10-25 credits/request)
premium_proxy : false ,
// Execute JavaScript code with a Headless Browser (5 credits/request)
render_js : true ,
// Return the original HTML before the JavaScript rendering
return_page_source : false ,
// Return page screenshot as a png image
screenshot : false ,
// Take a full page screenshot without the window limitation
screenshot_full_page : false ,
// Transparently return the same HTTP code of the page requested.
transparent_status_code : false ,
// Wait, in miliseconds, before returning the response
wait : 0 ,
// Wait for CSS selector before returning the response, ex ".title"
wait_for : '' ,
// Set the browser window width in pixel
window_width : 1920 ,
// Set the browser window height in pixel
window_height : 1080 ,
} ,
headers : {
// Forward custom headers to the target website
key : 'value' ,
} ,
cookies : {
// Forward custom cookies to the target website
name : 'value' ,
} ,
// `timeout` specifies the number of milliseconds before the request times out.
// If the request takes longer than `timeout`, the request will be aborted.
timeout : 10000 , // here 10sec, default is `0` (no timeout)
} ) ;
var decoder = new TextDecoder ( ) ;
var text = decoder . decode ( response . data ) ;
console . log ( text ) ;
}
get ( 'https://httpbin-scrapingbee.cleverapps.io/html' ) . catch ( ( e ) => console . log ( 'A problem occurs : ' + e . message ) ) ;
/* -- output
<!DOCTYPE html><html lang="en"><head>...
*/
ScrapingBee 採用各種參數來渲染 JavaScript、執行自訂 JavaScript 腳本、使用來自特定地理位置的高階代理程式等等。
您可以在 ScrapingBee 的文檔中找到所有支援的參數。
您可以發送自訂 cookie 和標頭,就像通常使用請求庫所做的那樣。
這裡有一個小例子,介紹如何以行動解析度檢索和儲存 ScrapingBee 部落格的螢幕截圖。
const fs = require ( 'fs' ) ;
const scrapingbee = require ( 'scrapingbee' ) ;
async function screenshot ( url , path ) {
var client = new scrapingbee . ScrapingBeeClient ( 'REPLACE-WITH-YOUR-API-KEY' ) ;
var response = await client . get ( {
url : url ,
params : {
screenshot : true , // Take a screenshot
screenshot_full_page : true , // Specify that we need the full height
window_width : 375 , // Specify a mobile width in pixel
} ,
} ) ;
fs . writeFileSync ( path , response . data ) ;
}
screenshot ( 'https://httpbin-scrapingbee.cleverapps.io/html' , './httpbin.png' ) . catch ( ( e ) =>
console . log ( 'A problem occurs : ' + e . message )
) ;
客戶端包含針對 5XX 回應的重試機制。
const spb = require ( 'scrapingbee' ) ;
async function get ( url ) {
let client = new spb . ScrapingBeeClient ( 'REPLACE-WITH-YOUR-API-KEY' ) ;
let resp = await client . get ( { url : url , params : { render_js : false } , retries : 5 } ) ;
let decoder = new TextDecoder ( ) ;
let text = decoder . decode ( resp . data ) ;
console . log ( text ) ;
}
get ( 'https://httpbin-scrapingbee.cleverapps.io/html' ) . catch ( ( e ) => console . log ( 'A problem occured: ' + e . message ) ) ;