1500字范文,内容丰富有趣,写作好帮手!
1500字范文 > java实现pdf转word 解决个别排版错乱问题

java实现pdf转word 解决个别排版错乱问题

时间:2021-02-05 18:23:12

相关推荐

java实现pdf转word 解决个别排版错乱问题

项目中要实现客户上传完pdf,上传成功后直接就转成word格式的,之前网上一些免费的转出来的word大致还行,但是有些排版就错乱了,如下图:

下面这个是用java改造后的,效果如下,排版整齐,和pdf中的格式基本保持一样

下面直接上干货,首先引入maven坐标

<!-- /artifact/com.aspose/aspose-pdf -->

<dependency>

<groupId>com.aspose</groupId>

<artifactId>aspose-pdf</artifactId>

<version>21.8</version>

</dependency>

然后引入仓库地址

<repositories>

<!--pdf转word仓库地址 -->

<repository>

<id>AsposeJavaAPI</id>

<name>Aspose Java API</name>

<url>/repo/</url>

</repository>

</repositories>

下来下载依赖包下载到本地仓库

第一步生成新的jar包,完整代码如下

import javassist.*;import java.io.*;import java.util.ArrayList;import java.util.Enumeration;import java.util.List;import java.util.jar.JarEntry;import java.util.jar.JarFile;import java.util.jar.JarOutputStream;/*** pdf 转 word 第一步生成新的jar包*/public class PDFJarCrack {public static void main(String[] args) throws Exception{String jarPath = "D:\\repo\\com\\aspose\\aspose-pdf\\21.8\\aspose-pdf-21.8.jar";crack(jarPath);}private static void crack(String jarName) throws Exception {try {ClassPool.getDefault().insertClassPath(jarName);CtClass ctClass = ClassPool.getDefault().getCtClass("com.aspose.pdf.ADocument");CtMethod[] declaredMethods = ctClass.getDeclaredMethods();int num = 0;for (int i = 0; i < declaredMethods.length; i++) {if (num == 2) {break;}CtMethod method = declaredMethods[i];CtClass[] ps = method.getParameterTypes();if (ps.length == 2&& method.getName().equals("lI")&& ps[0].getName().equals("com.aspose.pdf.ADocument")&& ps[1].getName().equals("int")) {method.setBody("{return false;}");num = 1;}if (ps.length == 0 && method.getName().equals("lt")) {// 水印处理method.setBody("{return true;}");num = 2;}}File file = new File(jarName);ctClass.writeFile(file.getParent());disposeJar(jarName, file.getParent() + "/com/aspose/pdf/ADocument.class");} catch (Exception e) {e.printStackTrace();}}private static void disposeJar(String jarName, String replaceFile) {List<String> deletes = new ArrayList<>();deletes.add("META-INF/37E3C32D.SF");deletes.add("META-INF/37E3C32D.RSA");File oriFile = new File(jarName);if (!oriFile.exists()) {System.out.println("######Not Find File:" + jarName);return;}//将文件名命名成备份文件String bakJarName = jarName.substring(0, jarName.length() - 3) + "cracked.jar";try {//创建文件(根据备份文件并删除部分)JarFile jarFile = new JarFile(jarName);JarOutputStream jos = new JarOutputStream(new FileOutputStream(bakJarName));Enumeration entries = jarFile.entries();while (entries.hasMoreElements()) {JarEntry entry = (JarEntry) entries.nextElement();if (!deletes.contains(entry.getName())) {if (entry.getName().equals("com/aspose/pdf/ADocument.class")) {System.out.println("Replace:-------" + entry.getName());JarEntry jarEntry = new JarEntry(entry.getName());jos.putNextEntry(jarEntry);FileInputStream fin = new FileInputStream(replaceFile);byte[] bytes = readStream(fin);jos.write(bytes, 0, bytes.length);} else {jos.putNextEntry(entry);byte[] bytes = readStream(jarFile.getInputStream(entry));jos.write(bytes, 0, bytes.length);}} else {System.out.println("Delete:-------" + entry.getName());}}jos.flush();jos.close();jarFile.close();}catch (Exception e) {e.printStackTrace();}}private static byte[] readStream(InputStream inStream) throws Exception {ByteArrayOutputStream outSteam = new ByteArrayOutputStream();byte[] buffer = new byte[1024];int len = -1;while ((len = inStream.read(buffer)) != -1) {outSteam.write(buffer, 0, len);}outSteam.close();inStream.close();return outSteam.toByteArray();}}

第2步pdf转word

import com.aspose.pdf.Document;import com.aspose.pdf.SaveFormat;import java.io.FileOutputStream;import java.io.IOException;/*** 第2步 pdf转word*/public class Pdf2Word {public static void main(String[] args) throws IOException {pdf2doc("D:\\test.pdf");}//pdf转docpublic static void pdf2doc(String pdfPath) {long old = System.currentTimeMillis();try {//新建一个word文档String wordPath=pdfPath.substring(0,pdfPath.lastIndexOf("."))+".docx";FileOutputStream os = new FileOutputStream(wordPath);//doc是将要被转化的word文档Document doc = new Document(pdfPath);//全面支持DOC, DOCX, OOXML, RTF HTML, OpenDocument, PDF, EPUB, XPS, SWF 相互转换doc.save(os, SaveFormat.DocX);os.close();//转化用时long now = System.currentTimeMillis();System.out.println("Pdf 转 Word 共耗时:" + ((now - old) / 1000.0) + "秒");} catch (Exception e) {System.out.println("Pdf 转 Word 失败...");e.printStackTrace();}}}

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。