使用POI将doc文件转换为html
生活随笔
收集整理的這篇文章主要介紹了
使用POI将doc文件转换为html
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
?
需要的jar包有:有一些是依賴包,可以使用maven下載
doc文件轉換為html文件
package com.gsww.sxzz.controller.service;import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.PicturesManager; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hwpf.usermodel.PictureType; import org.jsoup.Jsoup; import org.w3c.dom.Document;import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import java.io.*; import java.util.List;/*** Created by Carey on 15-2-2.*/ public class docTohtml {public static void main(String argv[]) {try {convert2Html("D:\\b.doc","D:\\1.html");} catch (Exception e) {e.printStackTrace();}}//輸出html文件 public static void writeFile(String content, String path) {FileOutputStream fos = null; BufferedWriter bw = null;org.jsoup.nodes.Document doc = Jsoup.parse(content);String styleOld=doc.getElementsByTag("style").html();//統一字體格式為宋體styleOld=styleOld.replaceAll("font-family:.+(?=;\\b)", "font-family:SimSun");doc.getElementsByTag("head").empty();doc.getElementsByTag("head").append("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"></meta>");doc.getElementsByTag("head").append(" <style type=\"text/css\"></style>");doc.getElementsByTag("style").append(styleOld);/*正則表達式查詢字體內容:font-family:.+(?=;\b)*/System.out.println(content);content=doc.html();content=content.replace("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\">", "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"></meta>");try {File file = new File(path);fos = new FileOutputStream(file);bw = new BufferedWriter(new OutputStreamWriter(fos,"UTF-8"));bw.write(content);} catch (FileNotFoundException fnfe) {fnfe.printStackTrace();} catch (IOException ioe) {ioe.printStackTrace();} finally {try {if (bw != null)bw.close();if (fos != null)fos.close();} catch (IOException ie) {}}}//word 轉 html public static void convert2Html(String fileName, String outPutFile)throws TransformerException, IOException,ParserConfigurationException {HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));//WordToHtmlUtils.loadDoc(new FileInputStream(inputFile));//兼容2007 以上版本 // XSSFWorkbook xssfwork=new XSSFWorkbook(new FileInputStream(fileName));WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());wordToHtmlConverter.setPicturesManager( new PicturesManager(){public String savePicture( byte[] content,PictureType pictureType, String suggestedName,float widthInches, float heightInches ){return "test/"+suggestedName;}} );wordToHtmlConverter.processDocument(wordDocument);//save picturesList pics=wordDocument.getPicturesTable().getAllPictures();if(pics!=null){for(int i=0;i<pics.size();i++){Picture pic = (Picture)pics.get(i);System.out.println();try {pic.writeImageContent(new FileOutputStream("D:/test/"+ pic.suggestFullFileName()));} catch (FileNotFoundException e) {e.printStackTrace();}}}Document htmlDocument = wordToHtmlConverter.getDocument();ByteArrayOutputStream out = new ByteArrayOutputStream();DOMSource domSource = new DOMSource(htmlDocument);StreamResult streamResult = new StreamResult(out);TransformerFactory tf = TransformerFactory.newInstance();Transformer serializer = tf.newTransformer();serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");serializer.setOutputProperty(OutputKeys.INDENT, "yes");serializer.setOutputProperty(OutputKeys.METHOD, "HTML");serializer.transform(domSource, streamResult);out.close();writeFile(new String(out.toByteArray()), outPutFile);} }遇到的問題,當doc轉換為html時不會將圖像的線條給轉換過來。只有在table表格中才可以轉換為span標簽。如果要作下滑線,可以放一個table的單元格只設定下邊框就可以完美轉換為html了。
?
將html轉換為pdf
package com.gsww.sxzz.controller.service;import com.lowagie.text.pdf.BaseFont; import org.xhtmlrenderer.pdf.ITextFontResolver; import org.xhtmlrenderer.pdf.ITextRenderer;import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.OutputStream;/*** Created by Carey on 15-2-2.*/ public class htmlToPdf {public boolean convertHtmlToPdf(String inputFile, String outputFile){try {OutputStream os = new FileOutputStream(outputFile);ITextRenderer renderer = new ITextRenderer();String url = new File(inputFile).toURI().toURL().toString();renderer.setDocument(url);// 解決中文支持問題ITextFontResolver fontResolver = renderer.getFontResolver();/*fontResolver.addFont("C:\\Windows\\Fonts\\simsunb.ttf", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); *///宋體文件的相對路徑fontResolver.addFont("C:\\Windows\\Fonts\\simsun.ttc", BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); renderer.getSharedContext().setBaseURL("file:/D:/");renderer.layout();renderer.createPDF(os);os.flush();os.close();} catch (Exception e) {// TODO Auto-generated catch block e.printStackTrace();}return true;}public static void main(String [] args){htmlToPdf html2Pdf =new htmlToPdf();try {html2Pdf.convertHtmlToPdf("D:\\1.html","D:\\index.pdf");} catch (Exception e) {e.printStackTrace();}} }?
轉載于:https://www.cnblogs.com/gynbk/p/7230849.html
總結
以上是生活随笔為你收集整理的使用POI将doc文件转换为html的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 满纸荒唐言,一把辛酸泪--红楼一梦
- 下一篇: 蒙特卡罗方法验证凯利公式