puppeteer
安装
国内加速源:
PUPPETEER_DOWNLOAD_HOST=https://storage.googleapis.com.cnpmjs.org PUPPETEER_DOWNLOAD_HOST=https://npm.taobao.org/mirrors
npm config set puppeteer_download_host=https://npm.taobao.org/mirrors
爬虫
特征隐藏
特征隐藏:
../../javascript/puppeteer/test-browser/puppeteerUtils.js
const puppeteer = require('puppeteer'); exports.disguise = async (page) => { await page.evaluate(() => Object.defineProperties(navigator, { webdriver:{ get: () => false } })); // Pass the Webdriver Test. await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false}); }); // Pass the Chrome Test. await page.evaluateOnNewDocument(() => { // We can mock this in as much depth as we need for the test. window.navigator.chrome = { runtime: {} }; }); // Pass the Plugins Length Test. await page.evaluateOnNewDocument(() => { // Overwrite the `plugins` property to use a custom getter. Object.defineProperty(navigator, 'plugins', { // This just needs to have `length > 0` for the current test, // but we could mock the plugins too if necessary. get: () => [1, 2, 3, 4, 5], }); }); // Pass the Languages Test. await page.evaluateOnNewDocument(() => { // Overwrite the `plugins` property to use a custom getter. Object.defineProperty(navigator, 'languages', { get: () => ['zh-cn', 'en'], }); }); };
浏览器特征测试
测试无头浏览器: 测试无头浏览器特征
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>测试浏览器特征</title> <style> .test-table{ width: 800px; height: 800px; margin: 0 auto; } td:last-child { background-color: #c8d86d; max-width:300px; word-wrap:break-word; } td.failed { background-color: #f45159; } table, th, td { border: 1px solid black; } </style> </head> <body> <h1>h5s.club </h1> <h2> 此为浏览器特征值及是否被自动化及无头化测试页</h2> <h2>测试页:</h2> <table> <tr> <th>测试特征</th> <th>特征结果</th> </tr> <tr> <td>User Agent</td> <td id="user-agent"></td> </tr> <tr> <td>WebDriver</td> <td id="Webdriver"></td> </tr> <tr> <td>Permissions</td> <td id="permissions-result"></td> </tr> <tr> <td>Chrome.runtime</td> <td id="chrome"></td> </tr> <tr> <td>Plugins Length</td> <td id="plugins-length"></td> </tr> <tr> <td>outerWidth</td> <td id="outerWidth"></td> </tr> <tr> <td>outerHeight</td> <td id="outerHeight"></td> </tr> <tr> <td>screen.width</td> <td id="screenWidth"></td> </tr> <tr> <td>screen.height</td> <td id="screenHeight"></td> </tr> <tr> <td>innerWidth</td> <td id="innerWidth"></td> </tr> <tr> <td>innerHeight</td> <td id="innerHeight"></td> </tr> <tr> <td>Languages</td> <td id="languages"></td> </tr> <tr> <td>Chromiun PDF Plugin</td> <td id="Chromiun-PDF"></td> </tr> <tr> <td>Private Evn</td> <td id="Private-Evn"></td> </tr> <tr> <td>WebGL Vendor</td> <td id="webgl-vendor"></td> </tr> <tr> <td>WebGL Renderer</td> <td id="webgl-renderer"></td> </tr> </table> </div> <script> // User-Agent Test const userAgentElement = document.getElementById('user-agent'); userAgentElement.innerHTML = window.navigator.userAgent; if (/HeadlessChrome/.test(window.navigator.userAgent)) { userAgentElement.classList.add('failed'); } //Webdriver Test const webdriverElement = document.getElementById('Webdriver'); webdriverElement.innerHTML = window.navigator.webdriver; if (navigator.webdriver) { webdriverElement.classList.add('failed'); } // Chrome Test const chromeElement = document.getElementById('chrome'); chromeElement.innerHTML = window.chrome.runtime; if (!window.chrome.runtime) { chromeElement.innerHTML = 'none runtime' chromeElement.classList.add('failed'); } // Notification Test //这个方法暂时保留 if ("Notification" in window) { const permissionsElement = document.getElementById('permissions-result'); (async () => { const permissionStatus = await navigator.permissions.query({ name: 'notifications' }); permissionsElement.innerHTML = permissionStatus.state + ' && '+Notification.permission; if(Notification.permission === 'denied' && permissionStatus.state === 'prompt') { permissionsElement.classList.add('failed'); } })(); } // Plugins Length Test const pluginsLengthElement = document.getElementById('plugins-length'); pluginsLengthElement.innerHTML = navigator.plugins.length; if (navigator.plugins.length === 0) { pluginsLengthElement.classList.add('failed'); } //outerWidth Test const outerWidthElement = document.getElementById('outerWidth'); outerWidthElement.innerHTML = window.outerWidth; if (window.outerWidth === 800) { outerWidthElement.classList.add('failed'); } //outerHeight Test const outerHeightElement = document.getElementById('outerHeight'); outerHeightElement.innerHTML = window.outerHeight; if (window.outerHeight === 600) { outerHeightElement.classList.add('failed'); } //screen.width Test const screenWidthElement = document.getElementById('screenWidth'); screenWidthElement.innerHTML = screen.width; if (screen.width === 800) { screenWidthElement.classList.add('failed'); } //screen.height Test const screenHeightElement = document.getElementById('screenHeight'); screenHeightElement.innerHTML = screen.height; if (screen.height === 600) { screenHeightElement.classList.add('failed'); } // innerWidth Test const innerWidthElement = document.getElementById('innerWidth'); innerWidthElement.innerHTML = window.innerWidth; if (window.innerWidth=== 800) { innerWidthElement.classList.add('failed'); } // innerHeight Test const innerHeightElement = document.getElementById('innerHeight'); innerHeightElement.innerHTML = window.innerHeight; if (window.innerHeight=== 600) { innerHeightElement.classList.add('failed'); } // Languages Test const languagesElement = document.getElementById('languages'); languagesElement.innerHTML = navigator.languages; if (!navigator.languages || navigator.languages.length === 0) { languagesElement.classList.add('failed'); } //Test Chromiun PDF Plugin const ChromiunPDFElement = document.getElementById('Chromiun-PDF'); ChromiunPDFElement.innerHTML = navigator.plugins["Chromium PDF Plugin"] ; if (navigator.plugins["Chromium PDF Plugin"]) { ChromiunPDFElement.classList.add('failed'); } // WebGL Tests const canvas = document.createElement('canvas'); const gl = canvas.getContext('webgl') || canvas.getContext('webgl-experimental'); if (gl) { const debugInfo = gl.getExtension('WEBGL_debug_renderer_info'); // WebGL Vendor Test const webGLVendorElement = document.getElementById('webgl-vendor'); const vendor = gl.getParameter(debugInfo.UNMASKED_VENDOR_WEBGL); webGLVendorElement.innerHTML = vendor; if (vendor === 'Brian Paul') { webGLVendorElement.classList.add('failed'); } // WebGL Renderer Test const webGLRendererElement = document.getElementById('webgl-renderer'); const renderer = gl.getParameter(debugInfo.UNMASKED_RENDERER_WEBGL); webGLRendererElement.innerHTML = renderer; if (renderer === 'Mesa OffScreen') { webGLRendererElement.classList.add('failed'); } } //private Test const PrivateEvn = document.getElementById('Private-Evn'); function detectPrivateMode(cb) { var db, on = cb.bind(null, true), off = cb.bind(null, false) function tryls() { var isPrivate = false; try { window.openDatabase(null,null,null,null); } catch (e) { isPrivate = true; } isPrivate ? on() : off() } function errorIndexDb(event) { event.preventDefault() //raised with no InvalidStateError if (this.error && this.error.name === 'InvalidStateError') { on() } else { off() } } // Blink (chrome & opera) window.webkitRequestFileSystem ? webkitRequestFileSystem(0, 0, off, on) // FF // : "MozAppearance" in document.documentElement.style ? (db = indexedDB.open("test"), db.onerror = on, db.onsuccess = off) : "MozAppearance" in document.documentElement.style ? (db = indexedDB.open("test"), db.onerror = errorIndexDb, db.onsuccess =off) // Safari : /constructor/i.test(window.HTMLElement) || window.safari ? tryls() // IE10+ & edge : !window.indexedDB && (window.PointerEvent || window.MSPointerEvent) ? on() // Rest : off() } detectPrivateMode(function (isPrivateMode) { if (isPrivateMode) { PrivateEvn.innerHTML= 'private'; PrivateEvn.classList.add('failed'); }else{ PrivateEvn.innerHTML= 'no private' } }) </script> </body> </html>
puppetteer性能测试
<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>puppetteer性能角度分析</title> <style> </style> </head> <body> <button id="myBtn">测试性能</button> <p id="totalTime"></p> <div class="wrap"></div> <script> window.onload=function () { const wrapEle = document.querySelector('.wrap'); const btn = document.getElementById('myBtn'); const totalTimeEle = document.getElementById('totalTime'); let liHtml="" const changeWrap = () =>{ wrapEle.style.width='500px' wrapEle.style.height='500px' wrapEle.style.background='red' for(let i=0;i<500;i++) { liHtml += "<li></li>" wrapEle.innerHTML+=liHtml } } btn.addEventListener('click',function () { let perTime = Date.now() changeWrap() let bacTime = Date.now() let totleT = bacTime - perTime totalTimeEle.innerText = totleT; }) } </script> </body> </html>
测试PhantomTest特征
<html> <head> <meta charset="utf-8"> <title>测试PhantomTest性能</title> </head> <body> <div><img src="ph.jpg" alt=""></div> <div><p>test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\</p></div> <p>outerWidth是:</p><p id="outerWidth"></p> <p>outerHeight是:</p><p id="outerHeight"></p> <p>screen.width是:</p><p id="screenWidth"></p> <p>screen.height:</p><p id="screenHeight"></p> <p>window.innerWidth是:</p> <p id="innerWidth"></p> <p>window.innerHeight是:</p> <p id="innerHeight"></p> <script> //outerWidth Test const outerWidthElement = document.getElementById('outerWidth'); outerWidthElement.innerHTML = window.outerWidth; // //outerHeight Test const outerHeightElement = document.getElementById('outerHeight'); outerHeightElement.innerHTML = window.outerHeight; // //screen.width Test const screenWidthElement = document.getElementById('screenWidth'); screenWidthElement.innerHTML = screen.width; // //screen.height Test const screenHeightElement = document.getElementById('screenHeight'); screenHeightElement.innerHTML = screen.height; // innerWidth Test const innerWidthElement = document.getElementById('innerWidth'); innerWidthElement.innerHTML = window.innerWidth; // innerHeight Test const innerHeightElement = document.getElementById('innerHeight'); innerHeightElement.innerHTML = window.innerHeight; </script> </body> </html>
截图限制
https://zxc0328.github.io/2018/02/12/hdchrome-long-capture/
await page.setViewport({ width: 1440, height: 1024}); const {contentSize} = await page._client.send('Page.getLayoutMetrics'); // MAGIC NUMBER, DO NOT MODIFIY THIS OR YOU WILL BE FIRED const maxScreenshotHeight = 7000; if (contentSize.height >= maxScreenshotHeight) { let image; let lastBuffer; for (let ypos = 0; ypos < contentSize.height; ypos += maxScreenshotHeight) { const height = Math.min(contentSize.height - ypos, maxScreenshotHeight); let buffer = await page.screenshot({ clip: { x: 0, y: ypos, width: contentSize.width, height } }); if (ypos === 0) { image = sharp(buffer); lastBuffer = await image.toBuffer(); }else { image = sharp(lastBuffer); image = image.extend({top: 0, bottom: height, left: 0, right: 0}) image = image.overlayWith(buffer, {top: ypos, left:0}) lastBuffer = await image.toBuffer(); } } fileData = lastBuffer; }
启动浏览器
- 开启浏览器界面:headless: false
- 开启开发者控制台:devtools: true
- 自定义浏览器宽高:page.setViewport
-
产生两个tab页
-
官方打开页面:
await browser.pages()
,会产生两个tab页,一个是目标tab页,一个是blank页 -
修改后:
(await browser.pages())[0]
,仅打开目标tab页
-
官方打开页面:
async init() { await this.openPage(); await this.createCer(); } async openPage() { // 打开浏览器 browser = await puppeteer.launch({ headless: false, // 开启界面, devtools: true, // 开启开发者控制台 }); // 打开一个空白页 page = (await browser.pages())[0]; try { // 设置 浏览器视窗 await page.setViewport({ width: 1300, height: 938, }); // 跳转 目的页 await page.goto("http://127.0.0.1/demo.html"); } catch (error) { await this.openPage(); throw new Error('请求页面超时,尝试重新连接'); } }
操作页面
-
为了能够获取目标节点,当遇到页面跳转的时、点击下拉时、可先等待随机秒数:
await page.waitFor(utilFun.random(1000, 3000));
-
想获取元素的属性:
page.$eval()
-
想操作dom元素:
page.evaluate()
为了能够准确获取dom元素,可使用setTimeout延时诺干秒后,再进行相应操作 -
正则中若想含有变量:
let reg = new RegExp(${username});
async createCer() { const type = this.type; const Development = "#ios-nav > li:nth-child(1) ul > li:nth-child(3)"; const Production = await page.$("#ios-nav > li:nth-child(1) ul > li:nth-child(4)"); switch (type) { case "dev": await this.addIosCertificates(Development); break; case "dis": await this.addIosCertificates(Production); break; default: break } } async addIosCertificates(ele) { // 点击 侧边栏 类型 await page.waitFor(utilFun.random(1000, 3000)); await page.click(ele); // 点击 add 添加IOS证书 await page.waitFor(utilFun.random(1000, 3000)); await page.click(".toolbar-button.add"); // 判断 radio 是否能点击 await page.waitFor(utilFun.random(1000, 3000)); const radioDisabled = await page.$eval("#type-development-0", async el => { return el.disabled; }); // 如果证书数量满额,先删除,后增加 if (radioDisabled) { // 点击 侧边栏 类型 await page.waitFor(utilFun.random(1000, 3000)); await page.click(`${ele}`); // 删除 IOS证书 await page.waitFor(utilFun.random(1000, 3000)); await this.deleteCer(); } else { // 增加 IOS证书 await this.addCer(); } } async deleteCer() { await page.evaluate(async (username) => { let tableInfo = ""; let reg = new RegExp(`${username}`); const table = document.querySelectorAll(".data-table")[1].querySelector("tbody"); for (let i = 0; i < table.rows.length; i++) { for (let j = 0; j < table.rows[i].cells.length; j++) { tableInfo = table.rows[i].cells[j].innerText; if (reg.test(tableInfo) && (i % 2 == 0)) { // 名字 let name = table.rows[i].cells[j].innerText; // 类型 let type = table.rows[i].cells[j + 1].innerText; // 期限 let expires = table.rows[i].cells[j + 2].innerText; // 点击 下拉 table.rows[i].click(); // 点击 Revoke setTimeout(() => { document.querySelector(".button-no-padding-top.small.revoke-button").click(); }, 1000); // 点击 弹窗 Revoke setTimeout(() => { document.querySelector(".ui-dialog-content.ui-widget-content .button.small.red.ok").click(); }, 3000); } } } }, username); }
取得页面的节点$$
let recArr = []; const elements = await page.$$('.do-xxkwo span'); if (elements && elements.length) { for (const el of elements) { const name = await el.evaluate(apan => span.textContent); recArr.push({name}); } }
取得节点作为参数传递执行$
// 取得节点 const element = await page.$('button[aria-label="Next Page"]'); // 之前取得的节点作为参数放回执行环境中 const isDisable = await page.evaluate((el) => el.getAttribute('disable'), element); if (isDisable) { console.log('next page is disableed'); }
模拟点击动作
await page.click('button[aria-label="Next Page"]'); await page.waitForNetworkIdle();
滚动页面
async function autoScroll(page) { await page.evaluate(async () => { await new Promise((resolve, reject) => { let totalHeight = 0; let distance = 100; let timer = setInterval(() => { let scrollHeight = document.body.scrollHeight; window.scrollBy(0, distance); totalHeight += distance; if (totalHeight >= scrollHeight) { clearinterval(timmer); resolve(); } }, 100); }); }); }
文件上传下载
上传文件
// 点击 选择文件 await page.waitFor(utilFun.random(1000, 3000)); const upload_file = await page.$("input[type=file]"); await upload_file.uploadFile("你的文件路径");
文件下载
// 下载 IOS 证书 await this.downloadFile("你的文件路径"); await page.waitFor(utilFun.random(1000, 3000)); await page.click(".button.small.blue");
请求修改
await page.setRequestInterception(true); new Promise((resolve, reject) => { page.on('request', request => { if (request.url().indexOf('/pageparams/property?') > -1) { console.log("match price url : " + request.url()); request.respond({ status: 200, contentType: 'application/json; charset=utf-8', body: taskData.priceData }); } else if (request.url().indexOf('/api/GetSecondaryData?') > -1) { console.log("match info url : " + request.url()); page.on('response', response => { if (response.url().indexOf('/api/GetSecondaryData?') > -1) { // const req = response.request(); // console.log("Response 的:" + req.method, response.status, req.url); if (taskData.hotelInfo) { // } else { let message = response.text(); message.then(function (result1) { results = result1; resolve(results); results = replacePriceFloor(taskData, results); // fs.writeFileSync("d:\\price.json", results); // console.log(results); }); } } }); if (taskData.hotelInfo) { request.respond({ status: 200, contentType: 'application/json; charset=utf-8', body: taskData.hotelInfo }); } else { request.continue(); } } else { request.continue(); } }); }).catch((e) => { console.err(e) }).then();
Cookie
const cookies = await page.cookies(); let newCookies = cookies.map(cookie => { let name = cookie.name; let value = cookie.value; let domain = cookie.domain; console.log("-----------------------------"); console.log(domain); console.log(name); console.log(value); console.log("-----------------------------"); if (name == "agoda.version.03") { if (value.search(/&DLang=[-A-Za-z]*/g) > -1) { value = value.replace(/&DLang=[-A-Za-z]*/g, "&DLang=" + taskData.langName); } else { value = value + "&DLang=" + taskData.langName; } if (value.search(/&CuLang=[0-9]*/g) > -1) { value = value.replace(/&CuLang=[0-9]*/g, "&CuLang=" + taskData.langNum); } else { value = value + "&CuLang=" + taskData.langNum; } if (value.search(/&CurLabel=[A-Za-z]*/g) > -1) { value = value.replace(/&CurLabel=[A-Za-z]*/g, "&CurLabel=" + taskData.currencyCode); } else { value = value + "&CurLabel=" + taskData.currencyCode; } if (value.search(/&CuCur=[0-9]*/g) > -1) { value = value.replace(/&CuCur=[0-9]*/g, "&CuCur=" + taskData.currencyNum); } else { value = value + "&CuCur=" + taskData.currencyNum; } console.log(value); } return {name, value, domain}; }); await Promise.all(cookies.map(cookie => { return page.setCookie(cookie); }));
常见问题
清磁盘缓存
@echo off echo clearing tmpfile del /f /s /q "%userprofile%\Local Settings\Temp\" del /f /s /q "%userprofile%\AppData\Local\Temp\" del /f /s /q c:\var\crawl\log\*.log del /f /s /q %systemdrive%\*.tmp del /f /s /q %systemdrive%\*._mp del /f /s /q %systemdrive%\*.log del /f /s /q %systemdrive%\*.gid del /f /s /q %systemdrive%\*.chk del /f /s /q %systemdrive%\*.old del /f /s /q %systemdrive%\recycled\*.* del /f /s /q %windir%\*.bak del /f /s /q %windir%\prefetch\*.* rd /s /q %windir%\temp & md %windir%\temp del /f /q %userprofile%\cookies\*.* del /f /q %userprofile%\recent\*.* del /f /s /q "%userprofile%\Local Settings\Temporary Internet Files\*.*" del /f /s /q "%userprofile%\Local Settings\Temp\*.*" del /f /s /q "%userprofile%\recent\*.*"
打开页面卡死
Stalls on browser.newPage()
running on node.js:
issues1543
禁用沙盒,显示浏览器并把IO Dump出来看错误在哪里:
const browser = await puppeteer.launch({args: ['--no-sandbox', '--disable-setuid-sandbox']}); const browser = await puppeteer.launch({dumpio: true, args: ['--no-sandbox', '--disable-setuid-sandbox']}); const browser = await puppeteer.launch({dumpio: true, headless: false, args: ['--no-sandbox', '--disable-setuid-sandbox']});