社区微信群开通啦,扫一扫抢先加入社区官方微信群
社区微信群
/**
* doc转换为html
*
* @param fileName
* @param outPutFile
* @throws TransformerException
* @throws IOException
* @throws ParserConfigurationException
*/
public static void doc2Html(String fileName, String outPutFile) throws TransformerException, IOException, ParserConfigurationException {
long startTime = System.currentTimeMillis();
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
return suggestedName;
}
});
wordToHtmlConverter.processDocument(wordDocument);
// 保存图片
List<Picture> pics = wordDocument.getPicturesTable().getAllPictures();
if (pics != null) {
for (int i = 0; i < pics.size(); i++) {
Picture pic = (Picture) pics.get(i);
try {
pic.writeImageContent(new FileOutputStream("E://temp//" + pic.suggestFullFileName()));
} catch (FileNotFoundException e) {
e.printStackTrace();
}
}
}
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
out.close();
writeFile(new String(out.toByteArray()), outPutFile);
System.out.println("Generate " + outPutFile + " with " + (System.currentTimeMillis() - startTime) + " ms.");
}
/**
* 写文件
*
* @param content
* @param path
*/
public static void writeFile(String content, String path) {
FileOutputStream fos = null;
BufferedWriter bw = null;
try {
File file = new File(path);
fos = new FileOutputStream(file);
bw = new BufferedWriter(new OutputStreamWriter(fos, "utf-8"));
bw.write(content);
} catch (FileNotFoundException fnfe) {
fnfe.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
} finally {
try {
if (bw != null)
bw.close();
if (fos != null)
fos.close();
} catch (IOException ie) {
}
}
}
/**
* docx格式word转换为html
*
* @param fileName docx文件路径
* @param outPutFile html输出文件路径
* @throws TransformerException
* @throws IOException
* @throws ParserConfigurationException
*/
public static void docx2Html(String fileName, String outPutFile) throws IOException {
String fileOutName = outPutFile;
long startTime = System.currentTimeMillis();
XWPFDocument document = new XWPFDocument(new FileInputStream(fileName));
XHTMLOptions options = XHTMLOptions.create().indent(4);
// 导出图片
File imageFolder = new File("E:/temp/images");
options.setExtractor(new FileImageExtractor(imageFolder));
// URI resolver word的html中图片的目录路径
options.URIResolver(new BasicURIResolver("images"));
File outFile = new File(fileOutName);
outFile.getParentFile().mkdirs();
OutputStream out = new FileOutputStream(outFile);
XHTMLConverter.getInstance().convert(document, out, options);
System.out.println("Generate " + fileOutName + " with " + (System.currentTimeMillis() - startTime) + " ms.");
}
jar包这里比较多坑,大家看自己的报错信息就好啦,一般是jar包的版本问题
如果觉得我的文章对您有用,请随意打赏。你的支持将鼓励我继续创作!