Jade Dungeon

puppeteer

安装

国内加速源:

PUPPETEER_DOWNLOAD_HOST=https://storage.googleapis.com.cnpmjs.org
PUPPETEER_DOWNLOAD_HOST=https://npm.taobao.org/mirrors
npm config set puppeteer_download_host=https://npm.taobao.org/mirrors

爬虫

特征隐藏

特征隐藏: ../../javascript/puppeteer/test-browser/puppeteerUtils.js

const puppeteer = require('puppeteer'); 

exports.disguise = async (page) => {
	await page.evaluate(() => Object.defineProperties(navigator, {
			webdriver:{ get: () => false }
		}));

	// Pass the Webdriver Test.
	await page.evaluateOnNewDocument(() => {
		Object.defineProperty(navigator, 'webdriver', { get: () => false});
	}); 

	// Pass the Chrome Test.
	await page.evaluateOnNewDocument(() => {
		// We can mock this in as much depth as we need for the test.
		window.navigator.chrome = { runtime: {} };
	});
	
	
	// Pass the Plugins Length Test.
	await page.evaluateOnNewDocument(() => {
		// Overwrite the `plugins` property to use a custom getter.
		Object.defineProperty(navigator, 'plugins', {
		// This just needs to have `length > 0` for the current test,
		// but we could mock the plugins too if necessary.
		get: () => [1, 2, 3, 4, 5],
		});
	});
	
	// Pass the Languages Test.
	await page.evaluateOnNewDocument(() => {
		// Overwrite the `plugins` property to use a custom getter.
		Object.defineProperty(navigator, 'languages', {
		get: () => ['zh-cn', 'en'],
		});
	});
};

浏览器特征测试

测试无头浏览器: 测试无头浏览器特征

<!DOCTYPE html>
<html lang="en">
	<head>
		<meta charset="UTF-8">
		<title>测试浏览器特征</title>
		<style>
.test-table{ width: 800px; height: 800px; margin: 0 auto; }
td:last-child { background-color: #c8d86d; max-width:300px; word-wrap:break-word; }
td.failed { background-color: #f45159; }
table, th, td { border: 1px solid black; }
		</style>
	</head>
	<body>
		<h1>h5s.club </h1>
		<h2> 此为浏览器特征值及是否被自动化及无头化测试页</h2>
		<h2>测试页:</h2>
		<table>
			<tr> <th>测试特征</th> <th>特征结果</th> </tr> 
			<tr> <td>User Agent</td> <td id="user-agent"></td> </tr>
			<tr> <td>WebDriver</td> <td id="Webdriver"></td> </tr>
			<tr> <td>Permissions</td> <td id="permissions-result"></td> </tr>
			<tr> <td>Chrome.runtime</td> <td id="chrome"></td> </tr>
			<tr> <td>Plugins Length</td> <td id="plugins-length"></td> </tr>
			<tr> <td>outerWidth</td> <td id="outerWidth"></td> </tr>
			<tr> <td>outerHeight</td> <td id="outerHeight"></td> </tr>
			<tr> <td>screen.width</td> <td id="screenWidth"></td> </tr>
			<tr> <td>screen.height</td> <td id="screenHeight"></td> </tr>
			<tr> <td>innerWidth</td> <td id="innerWidth"></td> </tr>
			<tr> <td>innerHeight</td> <td id="innerHeight"></td> </tr>
			<tr> <td>Languages</td> <td id="languages"></td> </tr>
			<tr> <td>Chromiun PDF Plugin</td> <td id="Chromiun-PDF"></td> </tr>
			<tr> <td>Private Evn</td> <td id="Private-Evn"></td> </tr>
			<tr> <td>WebGL Vendor</td> <td id="webgl-vendor"></td> </tr>
			<tr> <td>WebGL Renderer</td> <td id="webgl-renderer"></td> </tr>
		</table>
		</div>
		<script>
// User-Agent Test
const userAgentElement = document.getElementById('user-agent');
userAgentElement.innerHTML = window.navigator.userAgent;
if (/HeadlessChrome/.test(window.navigator.userAgent)) {
	userAgentElement.classList.add('failed');
}

//Webdriver Test
const webdriverElement = document.getElementById('Webdriver');
webdriverElement.innerHTML = window.navigator.webdriver;
if (navigator.webdriver) {
	webdriverElement.classList.add('failed');
}
// Chrome Test
const chromeElement = document.getElementById('chrome');
chromeElement.innerHTML = window.chrome.runtime;
if (!window.chrome.runtime) {
	chromeElement.innerHTML = 'none runtime'
	chromeElement.classList.add('failed');
}


//  Notification Test    //这个方法暂时保留
if ("Notification" in window) {
	const permissionsElement = document.getElementById('permissions-result');
	(async () => {
		const permissionStatus = await navigator.permissions.query({ name: 'notifications' });
		permissionsElement.innerHTML = permissionStatus.state + ' && '+Notification.permission;
		if(Notification.permission === 'denied' && permissionStatus.state === 'prompt') {
			permissionsElement.classList.add('failed');
		}
	})();
}


// Plugins Length Test
const pluginsLengthElement = document.getElementById('plugins-length');
pluginsLengthElement.innerHTML = navigator.plugins.length;
if (navigator.plugins.length === 0) {
	pluginsLengthElement.classList.add('failed');
}

//outerWidth Test
const outerWidthElement = document.getElementById('outerWidth');
outerWidthElement.innerHTML = window.outerWidth;
if (window.outerWidth === 800) {
	outerWidthElement.classList.add('failed');
}

//outerHeight Test
const outerHeightElement = document.getElementById('outerHeight');
outerHeightElement.innerHTML = window.outerHeight;
if (window.outerHeight === 600) {
	outerHeightElement.classList.add('failed');
}

//screen.width Test
const screenWidthElement = document.getElementById('screenWidth');
screenWidthElement.innerHTML = screen.width;
if (screen.width === 800) {
	screenWidthElement.classList.add('failed');
}

//screen.height Test
const screenHeightElement = document.getElementById('screenHeight');
screenHeightElement.innerHTML = screen.height;
if (screen.height === 600) {
	screenHeightElement.classList.add('failed');
}

// innerWidth Test
const innerWidthElement = document.getElementById('innerWidth');
innerWidthElement.innerHTML = window.innerWidth;
if (window.innerWidth=== 800) {
	innerWidthElement.classList.add('failed');
}

// innerHeight Test
const innerHeightElement = document.getElementById('innerHeight');
innerHeightElement.innerHTML = window.innerHeight;
if (window.innerHeight=== 600) {
	innerHeightElement.classList.add('failed');
}


// Languages Test
const languagesElement = document.getElementById('languages');
languagesElement.innerHTML = navigator.languages;
if (!navigator.languages || navigator.languages.length === 0) {
	languagesElement.classList.add('failed');
}

//Test Chromiun PDF Plugin
const ChromiunPDFElement = document.getElementById('Chromiun-PDF');
ChromiunPDFElement.innerHTML = navigator.plugins["Chromium PDF Plugin"] ;
if (navigator.plugins["Chromium PDF Plugin"]) {
	ChromiunPDFElement.classList.add('failed');
}

// WebGL Tests
const canvas = document.createElement('canvas');
const gl = canvas.getContext('webgl') || canvas.getContext('webgl-experimental');
if (gl) {
	const debugInfo = gl.getExtension('WEBGL_debug_renderer_info');
	// WebGL Vendor Test
	const webGLVendorElement = document.getElementById('webgl-vendor');
	const vendor = gl.getParameter(debugInfo.UNMASKED_VENDOR_WEBGL);
	webGLVendorElement.innerHTML = vendor;
	if (vendor === 'Brian Paul') {
		webGLVendorElement.classList.add('failed');
	}

	// WebGL Renderer Test
	const webGLRendererElement = document.getElementById('webgl-renderer');
	const renderer = gl.getParameter(debugInfo.UNMASKED_RENDERER_WEBGL);
	webGLRendererElement.innerHTML = renderer;
	if (renderer === 'Mesa OffScreen') {
		webGLRendererElement.classList.add('failed');
	}
}


//private Test
const PrivateEvn = document.getElementById('Private-Evn');
function detectPrivateMode(cb) {
	var db,
		on = cb.bind(null, true),
		off = cb.bind(null, false)

	function tryls() {
		var isPrivate = false;
		try {
			window.openDatabase(null,null,null,null);
		} catch (e) {
			isPrivate = true;
		}
		isPrivate ? on() : off()
	}
	function errorIndexDb(event) {
		event.preventDefault()
		//raised with no InvalidStateError
		if (this.error && this.error.name === 'InvalidStateError') {
			on()
		} else {
			off()
		}
	}

	// Blink (chrome & opera)
	window.webkitRequestFileSystem ? webkitRequestFileSystem(0, 0, off, on)
	// FF
	// : "MozAppearance" in document.documentElement.style ? (db = indexedDB.open("test"), db.onerror = on, db.onsuccess = off)
		: "MozAppearance" in document.documentElement.style ? (db = indexedDB.open("test"), db.onerror = errorIndexDb, db.onsuccess =off)
	// Safari
		: /constructor/i.test(window.HTMLElement) || window.safari ? tryls()
	// IE10+ & edge
		: !window.indexedDB && (window.PointerEvent || window.MSPointerEvent) ? on()
	// Rest
		: off()
}


detectPrivateMode(function (isPrivateMode) {
	if (isPrivateMode) {
		PrivateEvn.innerHTML= 'private';
		PrivateEvn.classList.add('failed');
	}else{
		PrivateEvn.innerHTML= 'no private'
	}
})
		</script>
	</body>
</html>

puppetteer性能测试

puppetteer性能测试

<!DOCTYPE html>
<html lang="en">
	<head>
		<meta charset="UTF-8">
		<title>puppetteer性能角度分析</title>
		<style> </style>
	</head>
	<body>
		<button id="myBtn">测试性能</button>
		<p id="totalTime"></p>
		<div class="wrap"></div>
		<script>
window.onload=function () {
	const wrapEle = document.querySelector('.wrap');
	const btn = document.getElementById('myBtn');
	const totalTimeEle = document.getElementById('totalTime');
	let liHtml=""

	const changeWrap = () =>{
		wrapEle.style.width='500px'
		wrapEle.style.height='500px'
		wrapEle.style.background='red'

		for(let i=0;i<500;i++) {
			liHtml += "<li></li>"
			wrapEle.innerHTML+=liHtml
		}
	}

	btn.addEventListener('click',function () {
		let perTime = Date.now()
		changeWrap()
		let bacTime = Date.now()
		let totleT = bacTime - perTime
		totalTimeEle.innerText = totleT;
	})
}
		</script>
	</body>
</html>

测试PhantomTest特征

测试PhantomTest特征

<html>
	<head>
		<meta charset="utf-8">
		<title>测试PhantomTest性能</title>
	</head>
	<body>
		<div><img src="ph.jpg" alt=""></div>
		<div><p>test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\test\</p></div>
		<p>outerWidth是:</p><p id="outerWidth"></p>
		<p>outerHeight是:</p><p id="outerHeight"></p>
		<p>screen.width是:</p><p id="screenWidth"></p>
		<p>screen.height:</p><p id="screenHeight"></p>
		<p>window.innerWidth是:</p> <p id="innerWidth"></p>
		<p>window.innerHeight是:</p> <p id="innerHeight"></p>
		<script>
//outerWidth Test
const outerWidthElement = document.getElementById('outerWidth');
outerWidthElement.innerHTML = window.outerWidth;

// //outerHeight Test
const outerHeightElement = document.getElementById('outerHeight');
outerHeightElement.innerHTML = window.outerHeight;


// //screen.width Test
const screenWidthElement = document.getElementById('screenWidth');
screenWidthElement.innerHTML = screen.width;


// //screen.height Test
const screenHeightElement = document.getElementById('screenHeight');
screenHeightElement.innerHTML = screen.height;


// innerWidth Test
const innerWidthElement = document.getElementById('innerWidth');
innerWidthElement.innerHTML = window.innerWidth;


// innerHeight Test
const innerHeightElement = document.getElementById('innerHeight');
innerHeightElement.innerHTML = window.innerHeight;
		</script>
	</body>
</html>


截图限制

https://zxc0328.github.io/2018/02/12/hdchrome-long-capture/

await page.setViewport({ width: 1440, height: 1024});
const {contentSize} = await page._client.send('Page.getLayoutMetrics');
// MAGIC NUMBER, DO NOT MODIFIY THIS OR YOU WILL BE FIRED
const maxScreenshotHeight = 7000;
if (contentSize.height >= maxScreenshotHeight) {

	let image;
	let lastBuffer;

	for (let ypos = 0; ypos < contentSize.height; ypos += maxScreenshotHeight) {
		const height = Math.min(contentSize.height - ypos, maxScreenshotHeight);
		let buffer = await page.screenshot({ clip: { x: 0, y: ypos, width: contentSize.width, height } });
		if (ypos === 0) {
			image = sharp(buffer);
			lastBuffer = await image.toBuffer();
		}else {
			image = sharp(lastBuffer);
			image = image.extend({top: 0, bottom: height, left: 0, right: 0})
				image = image.overlayWith(buffer, {top: ypos, left:0})
				lastBuffer = await image.toBuffer();
		}
	}
	fileData = lastBuffer;
}

启动浏览器

  • 开启浏览器界面:headless: false
  • 开启开发者控制台:devtools: true
  • 自定义浏览器宽高:page.setViewport
  • 产生两个tab页
    • 官方打开页面:await browser.pages(),会产生两个tab页,一个是目标tab页,一个是blank页
    • 修改后:(await browser.pages())[0],仅打开目标tab页
async init() {
    await this.openPage();
    await this.createCer();
}
 
async openPage() {
 
    // 打开浏览器
    browser = await puppeteer.launch({
        headless: false, // 开启界面,
        devtools: true,  // 开启开发者控制台   
    });
 
    // 打开一个空白页
    page = (await browser.pages())[0];
 
    try {
 
        // 设置 浏览器视窗
        await page.setViewport({
            width: 1300,
            height: 938,
        });
 
        // 跳转 目的页
        await page.goto("http://127.0.0.1/demo.html");
 
    } catch (error) {
        await this.openPage();
        throw new Error('请求页面超时,尝试重新连接');
    }
}

操作页面

  • 为了能够获取目标节点,当遇到页面跳转的时、点击下拉时、可先等待随机秒数:await page.waitFor(utilFun.random(1000, 3000));
  • 想获取元素的属性:page.$eval()
  • 想操作dom元素:page.evaluate()为了能够准确获取dom元素,可使用setTimeout延时诺干秒后,再进行相应操作
  • 正则中若想含有变量:let reg = new RegExp(${username});
async createCer() {
   
    const type = this.type;
 
    const Development = "#ios-nav > li:nth-child(1) ul > li:nth-child(3)";
    const Production = await page.$("#ios-nav > li:nth-child(1) ul > li:nth-child(4)");
 
    switch (type) {
        case "dev":
            await this.addIosCertificates(Development);
            break;
        case "dis":
            await this.addIosCertificates(Production);
            break;
        default:
            break
    }
}
 
async addIosCertificates(ele) {
 
    // 点击 侧边栏 类型
    await page.waitFor(utilFun.random(1000, 3000));
    await page.click(ele);
 
    // 点击 add 添加IOS证书
    await page.waitFor(utilFun.random(1000, 3000));
    await page.click(".toolbar-button.add");
 
    // 判断 radio 是否能点击
    await page.waitFor(utilFun.random(1000, 3000));
    const radioDisabled = await page.$eval("#type-development-0", async el => {
        return el.disabled;
    });
 
    // 如果证书数量满额,先删除,后增加
    if (radioDisabled) {
 
        // 点击 侧边栏 类型
        await page.waitFor(utilFun.random(1000, 3000));
        await page.click(`${ele}`);
 
        // 删除 IOS证书
        await page.waitFor(utilFun.random(1000, 3000));
        await this.deleteCer();
    } else {
        // 增加 IOS证书
        await this.addCer();
    }
    
}
 
async deleteCer() {
    await page.evaluate(async (username) => {
        let tableInfo = "";
        let reg = new RegExp(`${username}`);
        const table = document.querySelectorAll(".data-table")[1].querySelector("tbody");
 
        for (let i = 0; i < table.rows.length; i++) {
            for (let j = 0; j < table.rows[i].cells.length; j++) {
                tableInfo = table.rows[i].cells[j].innerText;
                if (reg.test(tableInfo) && (i % 2 == 0)) {
                    // 名字
                    let name = table.rows[i].cells[j].innerText;
                    // 类型
                    let type = table.rows[i].cells[j + 1].innerText;
                    // 期限
                    let expires = table.rows[i].cells[j + 2].innerText;
 
                    // 点击 下拉
                    table.rows[i].click();
 
                    // 点击 Revoke
                    setTimeout(() => {
                        document.querySelector(".button-no-padding-top.small.revoke-button").click();
                    }, 1000);
 
                    // 点击 弹窗 Revoke
                    setTimeout(() => {
                        document.querySelector(".ui-dialog-content.ui-widget-content .button.small.red.ok").click();
                    }, 3000);
 
                }
            }
        }
 
    }, username);
}

取得页面的节点$$

let recArr = [];
const elements = await page.$$('.do-xxkwo span');
if (elements && elements.length) {
	for (const el of elements) {
		const name = await el.evaluate(apan => span.textContent);
		recArr.push({name});
	}
}

取得节点作为参数传递执行$

// 取得节点
const element = await page.$('button[aria-label="Next Page"]');

// 之前取得的节点作为参数放回执行环境中
const isDisable = await page.evaluate((el) => el.getAttribute('disable'), element);
if (isDisable) {
	console.log('next page is disableed');
}

模拟点击动作

await page.click('button[aria-label="Next Page"]');
await page.waitForNetworkIdle();

滚动页面

async function autoScroll(page) {
	await page.evaluate(async () => {
		await new Promise((resolve, reject) => {
			let totalHeight =  0;
			let distance    = 100;
			let timer = setInterval(() => {
				let scrollHeight = document.body.scrollHeight;
				window.scrollBy(0, distance);
				totalHeight += distance;
				if (totalHeight >= scrollHeight) {
					clearinterval(timmer);
					resolve();
				}
			}, 100);
		});
	});
}

文件上传下载

上传文件

    // 点击 选择文件
    await page.waitFor(utilFun.random(1000, 3000));
    const upload_file = await page.$("input[type=file]");
    await upload_file.uploadFile("你的文件路径");

文件下载

    // 下载 IOS 证书
    await this.downloadFile("你的文件路径");
     
    await page.waitFor(utilFun.random(1000, 3000));
    await page.click(".button.small.blue");

请求修改

await page.setRequestInterception(true);
new Promise((resolve, reject) => {
	page.on('request', request => {
		if (request.url().indexOf('/pageparams/property?') > -1) {
			console.log("match price url : " + request.url());
			request.respond({
				status: 200,
				contentType: 'application/json; charset=utf-8',
				body: taskData.priceData
			});
		} else if (request.url().indexOf('/api/GetSecondaryData?') > -1) {
			console.log("match info url : " + request.url());
			page.on('response', response => {
				if (response.url().indexOf('/api/GetSecondaryData?') > -1) {
					// const req = response.request();
					// console.log("Response 的:" + req.method, response.status, req.url);
					if (taskData.hotelInfo) {
						// 
					} else {
						let message = response.text();
						message.then(function (result1) {
							results = result1;
							resolve(results);
							results = replacePriceFloor(taskData, results);
							// fs.writeFileSync("d:\\price.json", results);
							// console.log(results);
						});
					}
				}
			});
			if (taskData.hotelInfo) {
				request.respond({
					status: 200,
					contentType: 'application/json; charset=utf-8',
					body: taskData.hotelInfo
				});
			} else {
				request.continue();
			}
		} else {
			request.continue();
		}
	});
}).catch((e) => { console.err(e) }).then();

Cookie

const cookies = await page.cookies();
let newCookies = cookies.map(cookie => {
	let name  = cookie.name;
	let value = cookie.value;
	let domain = cookie.domain;
	console.log("-----------------------------");
	console.log(domain);
	console.log(name);
	console.log(value);
	console.log("-----------------------------");
	if (name == "agoda.version.03") {
		if (value.search(/&DLang=[-A-Za-z]*/g) > -1) {
			value = value.replace(/&DLang=[-A-Za-z]*/g, "&DLang=" + taskData.langName);
		} else {
			value = value + "&DLang=" + taskData.langName;
		}
		if (value.search(/&CuLang=[0-9]*/g) > -1) {
			value = value.replace(/&CuLang=[0-9]*/g, "&CuLang=" + taskData.langNum);
		} else {
			value = value + "&CuLang=" + taskData.langNum;
		}
		if (value.search(/&CurLabel=[A-Za-z]*/g) > -1) {
			value = value.replace(/&CurLabel=[A-Za-z]*/g, "&CurLabel=" + taskData.currencyCode);
		} else {
			value = value + "&CurLabel=" + taskData.currencyCode;
		}
		if (value.search(/&CuCur=[0-9]*/g) > -1) {
			value = value.replace(/&CuCur=[0-9]*/g, "&CuCur=" + taskData.currencyNum);
		} else {
			value = value + "&CuCur=" + taskData.currencyNum;
		}
		console.log(value);
	}
	return {name, value, domain};
});
await Promise.all(cookies.map(cookie => { return page.setCookie(cookie); }));	

常见问题

清磁盘缓存

@echo off        
echo clearing tmpfile     
del /f /s /q "%userprofile%\Local Settings\Temp\"        
del /f /s /q "%userprofile%\AppData\Local\Temp\"        
del /f /s /q c:\var\crawl\log\*.log
del /f /s /q %systemdrive%\*.tmp        
del /f /s /q %systemdrive%\*._mp        
del /f /s /q %systemdrive%\*.log        
del /f /s /q %systemdrive%\*.gid        
del /f /s /q %systemdrive%\*.chk        
del /f /s /q %systemdrive%\*.old        
del /f /s /q %systemdrive%\recycled\*.*        
del /f /s /q %windir%\*.bak        
del /f /s /q %windir%\prefetch\*.*        
rd /s /q %windir%\temp & md %windir%\temp        
del /f /q %userprofile%\cookies\*.*        
del /f /q %userprofile%\recent\*.*        
del /f /s /q "%userprofile%\Local Settings\Temporary Internet Files\*.*"        
del /f /s /q "%userprofile%\Local Settings\Temp\*.*"        
del /f /s /q "%userprofile%\recent\*.*"

打开页面卡死

Stalls on browser.newPage() running on node.js: issues1543

禁用沙盒,显示浏览器并把IO Dump出来看错误在哪里:

const browser = await puppeteer.launch({args: ['--no-sandbox', '--disable-setuid-sandbox']});
const browser = await puppeteer.launch({dumpio: true, args: ['--no-sandbox', '--disable-setuid-sandbox']});
const browser = await puppeteer.launch({dumpio: true, headless: false, args: ['--no-sandbox', '--disable-setuid-sandbox']});