java项目实现html转pdf的需求（支持中文和CSS样式）

java项目中用到了html转pdf的需求，现在写一个自己认为优秀方案的总结
第一种使用wkhtmltopdf更简单也更好用，第二种做参考，对图形和表格支持不太友好
下面是第一种也是个人比较推荐的啦：wkhtmltopdf
注意：外部样式.css文件中的样式容易丢失，最好copy到html的style标签内
第一步：下载安装wkhtmltopdf.exe，插件安装位置，windows自己选，要在调用中可以自行修改路径，在Linux系统中将插件安装在opt目录下（/opt），
第二步：使用Process process执行wkhtmltopdf.exe命令有时会因为流的缓存问题出现阻塞，需要先写一个线程用来清空流缓存
下面：直接上代码了，有些注释没有去掉，自行参考吧
清除缓存：


import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
 
/**
 * 清理输入流缓存的线程
 * 
 */
public class ClearBufferThread implements Runnable {
    private InputStream inputStream;
    private String type;
 
    public ClearBufferThread(String type, InputStream inputStream){
        this.inputStream = inputStream;
        this.type = type;
    }
 
    public void run() {
    	BufferedReader br = null;
    	String line = null;
        try{
        	br = new BufferedReader(new InputStreamReader(inputStream));
            while((line = br.readLine()) != null) {
            	System.out.println(type+"===>"+line);
            };
        } catch(Exception e){
        	e.printStackTrace();
            throw new RuntimeException(e);
        }finally {
        	if(br != null) {
        		try {
					br.close();
				} catch (IOException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
        	}
        	if(inputStream != null){
				try {
					inputStream.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
        }
    }
}

正式开始转换啦：


import java.io.FileOutputStream;
import java.io.OutputStream;

 
/**
 * wkhtmltopdf工具类
 *
 * 约定：
 *      1. 插件安装位置，windows自己选，要在调用中可以自行修改路径， 在Linux系统中将插件安装在opt目录下（/opt）
 *
 * 注意：
 *      1. wkhtmltopdf的Linux版本中，解压后，默认的文件名为"wkhtmltox"，为了统一起见，一律将解压后的文件名，重命名为"wkhtmltopdf"（命令：mv wkhtmltox wkhtmltopdf）
 *
 */
public class WKHtmlToPdfUtil {
    /**
     * 将HTML文件内容输出为PDF文件
     *
     * @param htmlFilePath HTML文件路径
     * @param pdfFilePath  PDF文件路径
     */
    public static void htmlToPdf(String htmlFilePath, String pdfFilePath) {
    	Process process = null;
        try {//注意命令调用路径与安装路径保持一致
        	process = Runtime.getRuntime().exec(getCommand(htmlFilePath, pdfFilePath));
        	//为了防止waitFor因为流缓存而阻塞，启用两个线程进行流的读取
            new Thread(new ClearBufferThread("Input",process.getInputStream())).start();
            new Thread(new ClearBufferThread("Error",process.getErrorStream())).start();
            process.waitFor();
        } catch (Exception e) {
        	e.printStackTrace();
            throw new RuntimeException(e);
        }finally {
        	process.destroy();
        }
    }
 
 
    /**
     * 将HTML字符串转换为HTML文件
     *
     * @param htmlStr HTML字符串
     * @return HTML文件的绝对路径
     */
    public static String strToHtmlFile(String htmlStr,String path) {
        OutputStream outputStream = null;
        try {
            String htmlFilePath = path+ ".html";// UUID.randomUUID().toString() +
            outputStream = new FileOutputStream(htmlFilePath);
            outputStream.write(htmlStr.getBytes("UTF-8"));
            return htmlFilePath;
        } catch (Exception e) {
            throw new RuntimeException(e);
        } finally {
            try {
                if (outputStream != null) {
                    outputStream.close();
                    outputStream = null;
                }
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    }
 
    /**
     * 获得HTML转PDF的命令语句,注意命令调用路径与安装路径保持一致
     *（一些命令参数可以自行去做修改，或者使用）
     * @param htmlFilePath HTML文件路径
     * @param pdfFilePath  PDF文件路径
     * @return HTML转PDF的命令语句
     */
    private static String getCommand(String htmlFilePath, String pdfFilePath) {
        String osName = System.getProperty("os.name");
        StringBuilder cmd = new StringBuilder();
        cmd.append("C:/Program Files/wkhtmltopdf/bin/wkhtmltopdf.exe ");
        cmd.append(" ");
//        cmd.append(" --header-line");//页眉下面的线
        //cmd.append(" --header-center 这里是页眉这里是页眉这里是页眉这里是页眉 ");//页眉中间内容
        cmd.append(" --margin-top 1.8cm ");//设置页面上边距 (default 10mm) 
        cmd.append(" --margin-right 1.5cm ");//设置页面下边距 (default 10mm) 
        cmd.append(" --margin-bottom 1.8cm ");//设置页面下边距 (default 10mm) 
        cmd.append(" --margin-left 1.5cm ");//设置页面下边距 (default 10mm) 
        cmd.append(" --page-size Letter ");//纸张大小A4, Letter, etc.
//        cmd.append(" --header-html file:///"+WebUtil.getServletContext().getRealPath("")+FileUtil.convertSystemFilePath("\style\pdf\head.html"));// (添加一个HTML页眉,后面是网址)
//        cmd.append(" --header-spacing 6 ");// (设置页眉和内容的距离,默认0)
        //cmd.append(" --footer-center (设置在中心位置的页脚内容)");//设置在中心位置的页脚内容
//        cmd.append(" --footer-html file:///"+WebUtil.getServletContext().getRealPath("")+FileUtil.convertSystemFilePath("\style\pdf\foter.html"));// (添加一个HTML页脚,后面是网址)
//        cmd.append(" --footer-line");//* 显示一条线在页脚内容上)
//        cmd.append(" --footer-spacing 6 ");// (设置页脚和内容的距离)
        cmd.append(" %s %s");
        // Windows
        if (osName.startsWith("Windows")) {//C:/Program Files/
//            return String.format("C:/Program Files/wkhtmltopdf/bin/wkhtmltopdf.exe %s %s", htmlFilePath, pdfFilePath);
//            return String.format("wkhtmltopdf/bin/wkhtmltopdf.exe %s %s", htmlFilePath, pdfFilePath);
            return String.format(cmd.toString(), htmlFilePath, pdfFilePath);
        }
        // Linux
        else {
            return String.format("/opt/wkhtmltopdf/bin/wkhtmltopdf %s %s", htmlFilePath, pdfFilePath);
        }
    }
 
}

第二种只是参考，也是我最开始用的方法，遇到图片和表格转换会有问题，不太理想，而且也比较麻烦
为了保证中文的支持需要在被转换的html添加body的添加中文字体样式,保持这个字体和后边转换pdf的字体一致就可以了，不需要必须是SimSun字体

 body{
	font-family:SimSun; 
 }

下面放上jar包的引用pom
注意：org.xhtmlrenderer.core-renderer.R8的jar包，其实需要做一些修改，直接用的话不支持中文换行，可以点击下载支持中文换行的jar包，修改jar包名字替掉R8的jar包

<dependency>
	<groupId>org.xhtmlrenderer</groupId>
	<artifactId>core-renderer</artifactId>
	<version>R8</version>
</dependency>
<dependency>
	<groupId>org.apache.commons</groupId>
	<artifactId>commons-lang3</artifactId>
	<version>3.0</version>
</dependency>
<dependency>
	<groupId>com.itextpdf</groupId>
	<artifactId>itextpdf</artifactId>
	<version>5.5.10</version>
</dependency>
		<dependency>
		    <groupId>com.itextpdf</groupId>
		    <artifactId>html2pdf</artifactId>
		    <version>2.0.0</version>
		</dependency>

这种方式对html里的代码样式要求比较严格，类似xhtml的标准，要求html中的标签都是闭合的，像meta和link这样的标签以及空格&nbsp；这类的转义符都会导致转换pdf报错，因此最好在之前将html代码进行处理，以下是处理代码：


import java.io.File;
import java.io.FileInputStream;

import com.itextpdf.html2pdf.jsoup.Jsoup;
import com.itextpdf.html2pdf.jsoup.nodes.Document;
import com.itextpdf.html2pdf.jsoup.nodes.Entities;

public class HtmlToXHtmlJsoup {
	public static String html2xhtml(String html) {
        Document doc = Jsoup.parse(html);
        doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml).escapeMode(Entities.EscapeMode.xhtml);
        return doc.html();
    }
 
    public static void main(String[] args) throws Exception {
//        File file = new File("E:\html2xhtml.html");
        File file = new File("report01.html");
        FileInputStream input = new FileInputStream(file);
        int size = input.available();
        byte[] buff = new byte[size];
        input.read(buff);
        input.close();
        String html = new String(buff, "utf-8");
        System.out.println("============html===================");
        System.out.println(html);
        String xhtml = HtmlToXHtmlJsoup.html2xhtml(html);
        System.out.println("============xhtml===================");
        System.out.println(xhtml);
    }
}

图片Base64支持类，没有用到可以不需要此类


import java.io.IOException;

import org.w3c.dom.Element;
import org.xhtmlrenderer.extend.FSImage;
import org.xhtmlrenderer.extend.ReplacedElement;
import org.xhtmlrenderer.extend.ReplacedElementFactory;
import org.xhtmlrenderer.extend.UserAgentCallback;
import org.xhtmlrenderer.layout.LayoutContext;
import org.xhtmlrenderer.pdf.ITextFSImage;
import org.xhtmlrenderer.pdf.ITextImageElement;
import org.xhtmlrenderer.render.BlockBox;
import org.xhtmlrenderer.simple.extend.FormSubmissionListener;

import com.lowagie.text.BadElementException;
import com.lowagie.text.Image;
import com.lowagie.text.pdf.codec.Base64;

/**
 *  * 图片base64支持，把图片转换为itext自己的图片对象  * @author Administrator  *  
 */
public class Base64ImgReplacedElementFactory implements ReplacedElementFactory {
	/**
     * 实现createReplacedElement 替换html中的Img标签
     * 
     * @param c 上下文
     * @param box 盒子
     * @param uac 回调
     * @param cssWidth css宽
     * @param cssHeight css高
     * @return ReplacedElement
     */
	public ReplacedElement createReplacedElement(LayoutContext c, BlockBox box, UserAgentCallback uac, int cssWidth,int cssHeight) {
		Element e = box.getElement();
		if (e == null) {
			return null;
		}
		String nodeName = e.getNodeName();
		// 找到img标签
		if (nodeName.equals("img")) {
			String attribute = e.getAttribute("src");
			FSImage fsImage;
			try {
				// 生成itext图像
				fsImage = buildImage(attribute, uac);
			} catch (BadElementException e1) {
				fsImage = null;
			} catch (IOException e2) {
				fsImage = null;
			}
			if (fsImage != null) {
				// 对图像进行缩放
				if (cssWidth != -1 || cssHeight != -1) {
					fsImage.scale(cssWidth, cssHeight);
				}
//				if(fsImage.getHeight()>5000) {
//					fsImage.scale(cssWidth, 5000);
//				}
				return new ITextImageElement(fsImage);
			}
		}

		return null;
	}
	/**
	 * 编解码base64并生成itext图像    
	 * @param srcAttr
	 * @param uac
	 * @return
	 * @throws IOException
	 * @throws BadElementException
	 */
	protected FSImage buildImage(String srcAttr, UserAgentCallback uac) throws IOException,BadElementException {
		FSImage fiImg=null;
		if (srcAttr.toLowerCase().startsWith("data:image/")) {
			String base64Code= srcAttr.substring(srcAttr.indexOf("base64,") + "base64,".length(),srcAttr.length());
			byte[] decodedBytes = Base64.decode(base64Code);
			fiImg= new ITextFSImage(Image.getInstance(decodedBytes));
		}else {
			fiImg= uac.getImageResource(srcAttr).getImage();
		}
		return fiImg;
	}
	@Override
	public void reset() {
		
	}
	@Override
	public void remove(Element e) {
		
	}
	@Override
	public void setFormSubmissionListener(FormSubmissionListener listener) {
		
	}
	
}

具体的java中html转pdf的实现，不需要图片base64支持的可以注释掉if(true)代码块，中文字体支持的Fonts/SimSun.ttc是我自己的字体存放位置，具体的路径需要改为自己的

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.MalformedURLException;

import org.xhtmlrenderer.pdf.ITextFontResolver;
import org.xhtmlrenderer.pdf.ITextRenderer;

import com.lowagie.text.DocumentException;
import com.lowagie.text.pdf.BaseFont;

public class HtmltoPDF {
	public static void main(String[] ages) throws Exception {
		html2pdf("‪C:/Users/I/Desktop/新建文本文档.html", "C:/Users/I/Desktop/新建文本文档.pdf");
	}

	public static void html2pdf(String inputFile, String outFile) throws Exception {
		String url;
		OutputStream os = null;
		try {
			url = new File(inputFile).toURI().toURL().toString();
			os = new FileOutputStream(outFile);
			ITextRenderer renderer = new ITextRenderer();
			renderer.setDocument(url);
			// 解决中文不显示问题
			ITextFontResolver fontResolver = renderer.getFontResolver();
			// 图片base64支持，把图片转换为itext自己的图片对象，如果不需要可以去掉if代码块
			if(true) {// 如果携带图片则加上以下代码,将图片标签转换为Itext自己的图片对象
				renderer.getSharedContext().setReplacedElementFactory(new Base64ImgReplacedElementFactory());
				renderer.getSharedContext().getTextRenderer().setSmoothingThreshold(0);
			}
// 解决图片的相对路径问题       
//			renderer.getSharedContext().setBaseURL("file:D:/");       
			/* 字体具体路径：Fonts/SimSun.ttc，所选的字体需要与转换的html中的字体一致，可添加多种字体 */
			fontResolver.addFont("Fonts/SimSun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);// 宋体字
			renderer.layout();
			renderer.createPDF(os);
			renderer.finishPDF();
		} catch (MalformedURLException e) {
			e.printStackTrace();
			throw new Exception("生成pdf失败");
		} catch (FileNotFoundException e) {
			e.printStackTrace();
			throw new Exception("生成pdf失败");
		} catch (com.lowagie.text.DocumentException e) {
			e.printStackTrace();
			throw new Exception("生成pdf失败");
		} catch (IOException e) {
			e.printStackTrace();
			throw new Exception("生成pdf失败");
		} finally {
			if (os != null) {
				try {
					os.close();
				} catch (IOException e) {
					e.printStackTrace();
					throw new Exception("生成pdf失败");
				}
			}
		}
	}
	public static void html2pdf2(String html,String outputFile) { 
	    OutputStream os = null;
	    try {
	    	ITextRenderer renderer = new ITextRenderer();       
	    	ITextFontResolver fontResolver = renderer.getFontResolver();       
	    	os = new FileOutputStream(outputFile);
			fontResolver.addFont("Fonts/SimSun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
			renderer.setDocumentFromString(html);       
			// 解决图片的相对路径问题       
//			renderer.getSharedContext().setBaseURL("file:D:/");       
			renderer.layout();       
			renderer.createPDF(os);  
		} catch (DocumentException | IOException e) {
			e.printStackTrace();
		} finally {
			if (os != null) {
				try {
					os.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
	}

}

以下基本上就可以进行测试了。
还有一些bug，其中一个就是图片出现在换页的位置就会被截断，另一个表格的样式可能会丢失，图片问题如下：

还是建议使用第一种方案啦，更为简单，不需要考虑特别多的东西

版权声明：本文来源CSDN，感谢博主原创文章，遵循 CC 4.0 by-sa 版权协议，转载请附上原文出处链接和本声明。
原文链接：https://blog.csdn.net/shuchongqu/article/details/84104801
站方申明：本站部分内容来自社区用户分享，若涉及侵权，请联系站方删除。

发表于 2020-03-01 22:33:24
阅读 ( 702 )
分类：前端

java项目实现html转pdf的需求（支持中文和CSS样式）

你可能感兴趣的文章

精选的优质文章

0 条评论

官方社群

GO教程

推荐文章

猜你喜欢

随便看看