1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175
| import net.arnx.wmf2svg.gdi.svg.SvgGdi; import net.arnx.wmf2svg.gdi.wmf.WmfParser; import org.apache.commons.io.FileUtils; import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.converter.PicturesManager; import org.apache.poi.hwpf.converter.WordToHtmlConverter; import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hwpf.usermodel.PictureType; import org.w3c.dom.Document;
import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.OutputKeys; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import java.io.*; import java.util.List;
public class DocToHtmlUtils {
public static void convertDocToHtml(File docFile, File htmlDir) throws Exception { convertDocToHtml(docFile, htmlDir, "index.html", "img", "UTF-8"); }
public static void convertDocToHtml(File docFile, File htmlDir, String htmlFileName, String imageDirName, String encoding) throws Exception { File imgDir = htmlDir;
if (imageDirName == null || imageDirName.trim().equals("")) { imageDirName = null; } else { imageDirName = imageDirName.trim(); imgDir = new File(htmlDir, imageDirName); }
FileUtils.forceMkdir(imgDir);
HWPFDocument docDocument = new HWPFDocument(new FileInputStream(docFile)); Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); WordToHtmlConverter docToHtmlConverter = new WordToHtmlConverter(doc);
final String imgDirName = imageDirName; docToHtmlConverter.setPicturesManager(new PicturesManager() { public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches ) { if (suggestedName.toLowerCase().endsWith(".wmf")) { suggestedName += ".svg"; }
return imgDirName == null ? suggestedName : imgDirName + File.separator + suggestedName; } } );
docToHtmlConverter.processDocument(docDocument);
List<Picture> pictures = docDocument.getPicturesTable().getAllPictures();
if(pictures != null){ for (Picture picture : pictures) { try { String suggestedName = picture.suggestFullFileName();
if (suggestedName.toLowerCase().endsWith(".wmf")) { FileUtils.writeByteArrayToFile(new File(imgDir, suggestedName + ".svg"), convertWmfToSvg(picture)); } else { picture.writeImageContent(new FileOutputStream(new File(imgDir, suggestedName))); } } catch (FileNotFoundException e) { e.printStackTrace(); } } }
Document htmlDocument = docToHtmlConverter.getDocument(); ByteArrayOutputStream out = new ByteArrayOutputStream(); DOMSource domSource = new DOMSource(htmlDocument); StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance(); Transformer serializer = tf.newTransformer(); serializer.setOutputProperty(OutputKeys.ENCODING, encoding); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); serializer.setOutputProperty(OutputKeys.METHOD, "html"); serializer.transform(domSource, streamResult); out.close();
FileUtils.writeByteArrayToFile(new File(htmlDir, htmlFileName), out.toByteArray()); }
public static byte[] convertWmfToSvg(Picture wmfPicture) throws Exception { ByteArrayOutputStream wmfContent = new ByteArrayOutputStream(); wmfPicture.writeImageContent(wmfContent);
ByteArrayInputStream in = new ByteArrayInputStream(wmfContent.toByteArray()); ByteArrayOutputStream out = new ByteArrayOutputStream(); convertWmfToSvg(in, out);
return out.toByteArray(); }
public static void convertWmfToSvg(InputStream in, OutputStream out) throws Exception { WmfParser parser = new WmfParser(); SvgGdi gdi = new SvgGdi(false);
parser.parse(in, gdi); Document doc = gdi.getDocument();
TransformerFactory factory = TransformerFactory.newInstance(); Transformer transformer = factory.newTransformer();
transformer.setOutputProperty(OutputKeys.METHOD, "xml"); transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC,"-//W3C//DTD SVG 1.0//EN"); transformer.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,"http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd");
transformer.transform(new DOMSource(doc), new StreamResult(out)); ByteArrayOutputStream buffer = new ByteArrayOutputStream(); transformer.transform(new DOMSource(doc), new StreamResult(buffer)); out.flush(); }
public static void main(String argv[]) throws Exception { convertDocToHtml(new File("/Users/Biao/Desktop/2011届高考数学强化复习训练题11.doc"), new File("/Users/Biao/Desktop/doc/x"));
} }
|