1. puppeteer是一个无头浏览器,可以模拟访问网站或者生成html源文件,并返回pdf流的组件。
2. puppeteer使用需要预先安装nodejs, nodejs具体步骤如下[安装最新版本nodejs即可]:Node.js安装教程_北忆?的博客-CSDN博客_nodejs安装教程
3. puppeteer安装具体步骤如下:进入cmd控制台,执行如下命令:npm install -g puppeteer
之后会在如图上对应位置生成 puppeteer安装包。
4. 在puppeteer安装包下直接创建如下2个文件,此2个文件为2种实现方式:
方式1:puppeteer.js? ,表示从对应URL处获取信息,并返回pdf流。具体代码如下:
const puppeteer = require('puppeteer');
const options = process.argv;
var address, types;
(async() => {
if(options.length>=4){
address=options[2];
types=options[3];
}
const browser = await puppeteer.launch();
const page = await browser.newPage();
const userAgent = "Mozilla/5.0 (Linux; Android 8.1.0; MI 8 Build/OPM1.171019.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/62.0.3202.84 Mobile Safari/537.36";
page.setUserAgent(userAgent);
//await page.setViewport({ width: 1920, height: 1080 });
await page.setViewport({ width: 480, height: 800,isMobile: true});
await page.goto(address, {waitUntil: 'networkidle2'});
if(types == 'pdf') {
const pdf = await page.pdf({path: 'd://page.pdf', format: 'A4'});
await browser.close();
process.stdout.write(pdf);
}else {
await browser.close();
}
})();
?方式2:setContent.js? (window系统使用),表示从html文件获取源文件流,并返回pdf流。具体代码如下:
const puppeteer = require('puppeteer');
var fs = require('fs');
const options = process.argv;
var htmlContent;
(async() => {
htmlFilePath=options[2];
const browser = await puppeteer.launch();
const page = await browser.newPage();
const userAgent = "Mozilla/5.0 (Linux; Android 8.1.0; MI 8 Build/OPM1.171019.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/62.0.3202.84 Mobile Safari/537.36";
page.setUserAgent(userAgent);
//await page.setViewport({ width: 1920, height: 1080 });
await page.setViewport({ width: 480, height: 800,isMobile: true});
//const buff2 = Buffer.from(htmlContent, 'base64')
//const htmlContentResult = buff2.toString('utf-8')
var contentHtml = fs.readFileSync(htmlFilePath, 'utf-8');
//await page.goto('file://d:\\test2.html');
await page.setContent(contentHtml);
const pdf = await page.pdf({path: 'page.pdf', format: 'A4'});
await browser.close();
process.stdout.write(pdf);
})();
setContent.js? (linux系统使用)
const puppeteer = require('puppeteer');
var fs = require('fs');
const options = process.argv;
var htmlContent;
(async() => {
htmlFilePath=options[2];
const browser = await puppeteer.launch({
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
const page = await browser.newPage();
const userAgent = "Mozilla/5.0 (Linux; Android 8.1.0; MI 8 Build/OPM1.171019.011; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/62.0.3202.84 Mobile Safari/537.36";
page.setUserAgent(userAgent);
//await page.setViewport({ width: 1920, height: 1080 });
await page.setViewport({ width: 1800, height: 1800});
//const buff2 = Buffer.from(htmlContent, 'base64')
//const htmlContentResult = buff2.toString('utf-8')
var contentHtml = fs.readFileSync(htmlFilePath, 'utf-8');
//await page.goto('file://d:\\test2.html');
await page.setContent(contentHtml);
const pdf = await page.pdf({path: 'page.pdf', fullPage: true});
await browser.close();
process.stdout.write(pdf);
})();
3.??puppeteer.js对应的java调用代码如下:
/**
* html转pdf,直接通过流输出到浏览器
*
* @param response 浏览器响应
* @param fileName 文件名称
* @param puppeteerjs 要采用哪个js文件执行
* @param webSiteUrl 要生成pdf/图片的网页
* @param types 类型 :pdf代表要生成pdf文件,jpg代表要生成jpg图片
*/
public static void parseHtml2Pdf(HttpServletResponse response, String fileName, String puppeteerjs, String webSiteUrl, String types) {
try {
Runtime rt = Runtime.getRuntime();
//Process p = rt.exec("node C:\\Users\\boshi\\Desktop\\iview-admin-master\\hn.js https://www.baidu.com pdf");
Process p = rt.exec("node "+puppeteerjs+" "+webSiteUrl+" "+types);
InputStream is = p.getInputStream();
BufferedInputStream bf = new BufferedInputStream(is);
byte[] data = IOUtils.toByteArray(bf);
fileName = URLEncoder.encode(fileName, "UTF-8");
response.setHeader("Content-Disposition", "attachment; filename=\"" + fileName + "\"");
response.addHeader("Content-Length", "" + data.length);
response.setContentType("application/octet-stream;charset=UTF-8");
OutputStream outputStream = new BufferedOutputStream(response.getOutputStream());
outputStream.write(data);
outputStream.flush();
outputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
4.?setContent.js 对应的java调用代码如下:
public void parseHtml2Pdf(String htmlFileName) {
try {
Runtime rt = Runtime.getRuntime();
Process p = rt.exec("node " + createPdfJsPath + "setContent.js " + this.getHtmlTempPath(htmlFileName));
InputStream is = p.getInputStream();
BufferedInputStream bf = new BufferedInputStream(is);
byte[] data = IOUtils.toByteArray(bf);
File file = new File(diseaseControlPdfPath + htmlFileName + ".pdf");
IOUtils.write(data,new FileOutputStream(file));
} catch (IOException e) {
log.error("in parseHtml2Pdf has an error,e is ",e);
}
}
|