docx poi 原理_POI实现DOC/DOCX转HTML

docx poi 原理_POI实现DOC/DOCX转HTML本文介绍如何将 DOC 和 DOCX 文件转换为 HTML 格式

1.使用HWPF处理DOC

public class DocToHtml {

private static final String encoding = "UTF-8";

public static String convert2Html(String wordPath)

throws FileNotFoundException, TransformerException, IOException,

ParserConfigurationException {

if( wordPath == null || "".equals(wordPath) ) return "";

File file = new File(wordPath);

if( file.exists() && file.isFile() )

return convert2Html(new FileInputStream(file));

else

return "";

}

public static String convert2Html(String wordPath, String context)

throws FileNotFoundException, TransformerException, IOException,

ParserConfigurationException {

if( wordPath == null || "".equals(wordPath) ) return "";

File file = new File(wordPath);

if( file.exists() && file.isFile() )

return convert2Html(new FileInputStream(file), context);

else

return "";

}

public static String convert2Html(InputStream is)

throws TransformerException, IOException,

ParserConfigurationException {

return convert2Html(is, "");

}

public static String convert2Html(InputStream is, HttpServletRequest req) throws TransformerException, IOException, ParserConfigurationException {

return convert2Html(is, req.getContextPath());

}

public static String convert2Html(InputStream is, final String context) throws IOException, ParserConfigurationException, TransformerException {

HWPFDocument wordDocument = new HWPFDocument(is);

WordToHtmlConverter converter = new WordToHtmlConverter(

DocumentBuilderFactory.newInstance().newDocumentBuilder()

.newDocument());

SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMddHHmmssSSS");

final String prefix = sdf.format(new Date());

final Map suffixMap = new HashMap();

converter.setPicturesManager(new PicturesManager() {

public String savePicture(byte[] content, PictureType pictureType,

String suggestedName, float widthInches, float heightInches) {

String prefixContext = context.replace("\\", "").replace("/", "");

prefixContext = StringUtils.isNotBlank(prefixContext) ? "/" + prefixContext + "/" : prefixContext;

suffixMap.put(new String(content).replace(" ", "").length(), suggestedName);

return prefixContext

+ UeConstants.VIEW_IMAGE_PATH + "/" + UeConstants.UEDITOR_PATH

+ "/" + UeConstants.UEDITOR_IMAGE_PATH + "/"

+ prefix + "_"

+ suggestedName;

}

});

converter.processDocument(wordDocument);

List pics = wordDocument.getPicturesTable().getAllPictures();

if (pics != null) {

for(Picture pic : pics) {

try {

pic.writeImageContent(new FileOutputStream(

UeConstants.IMAGE_PATH

+ "/" + prefix + "_" + suffixMap.get(new String(pic.getContent()).replace(" ", "").length())));

} catch (FileNotFoundException e) {

e.printStackTrace();

}

}

}

StringWriter writer = new StringWriter();

Transformer serializer = TransformerFactory.newInstance().newTransformer();

serializer.setOutputProperty(OutputKeys.ENCODING, encoding);

serializer.setOutputProperty(OutputKeys.INDENT, "yes");

serializer.setOutputProperty(OutputKeys.METHOD, "html");

serializer.transform(

new DOMSource(converter.getDocument()),

new StreamResult(writer) );

writer.close();

return writer.toString();

}

}

2.使用XWPFDocument处理DOCX

public class XHTMLConverterTestCase

extends AbstractXWPFPOIConverterTest

{

protected void doGenerate( String fileInName )

throws IOException

{

doGenerateSysOut( fileInName );

doGenerateHTMLFile( fileInName );

}

protected void doGenerateSysOut( String fileInName )

throws IOException

{

long startTime = System.currentTimeMillis();

XWPFDocument document = new XWPFDocument( AbstractXWPFPOIConverterTest.class.getResourceAsStream( fileInName ) );

XHTMLOptions options = XHTMLOptions.create().indent( 4 );

OutputStream out = System.out;

XHTMLConverter.getInstance().convert( document, out, options );

System.err.println( "Elapsed time=" + ( System.currentTimeMillis() - startTime ) + "(ms)" );

}

protected void doGenerateHTMLFile( String fileInName )

throws IOException

{

String root = "target";

String fileOutName = root + "/" + fileInName + ".html";

long startTime = System.currentTimeMillis();

XWPFDocument document = new XWPFDocument( AbstractXWPFPOIConverterTest.class.getResourceAsStream( fileInName ) );

XHTMLOptions options = XHTMLOptions.create();// .indent( 4 );

// Extract image

File imageFolder = new File( root + "/images/" + fileInName );

options.setExtractor( new FileImageExtractor( imageFolder ) );

// URI resolver

options.URIResolver( new FileURIResolver( imageFolder ) );

OutputStream out = new FileOutputStream( new File( fileOutName ) );

XHTMLConverter.getInstance().convert( document, out, options );

System.out.println( "Generate " + fileOutName + " with " + ( System.currentTimeMillis() - startTime ) + " ms." );

}

}

项目下载地址:http://download.csdn.net/detail/luka2008/

今天的文章 docx poi 原理_POI实现DOC/DOCX转HTML分享到此就结束了,感谢您的阅读。
编程小号
上一篇 2024-12-15 17:33
下一篇 2024-12-15 17:30

相关推荐

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 举报,一经查实,本站将立刻删除。
如需转载请保留出处:https://bianchenghao.cn/bian-cheng-ji-chu/86997.html