前言
前段时间一直使用到word文档转pdf或者pdf转word,寻思着用Java应该是可以实现的,于是花了点时间写了个文件转换工具
源码weloe/FileConversion ()
主要功能就是word和pdf的文件转换,如下
pdf 转 wordpdf 转 图片word 转 图片word 转 htmlword 转 pdf
实现方法
主要使用了pdfbox Apache PDFBox | A Java PDF Library以及spire.doc Free Spire.Doc for Java | 100% 免费 Java Word 组件 (e-)两个工具包
pom.xml
<repositories><repository><id>com.e-iceblue</id><url>http://repo.e-/repository/maven-public/</url></repository></repositories><properties><piler.source>8</piler.source><piler.target>8</piler.target></properties><dependencies><dependency><groupId>org.apache.pdfbox</groupId><artifactId>pdfbox</artifactId><version>2.0.4</version></dependency><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>4.13.2</version><scope>test</scope></dependency><dependency><groupId>e-iceblue</groupId><artifactId>spire.doc.free</artifactId><version>3.9.0</version></dependency></dependencies>
策略接口
public interface FileConversion {boolean isSupport(String s);String convert(String pathName,String dirAndFileName) throws Exception;}
PDF转图片实现
public class PDF2Image implements FileConversion{private String suffix = ".jpg";public static final int DEFAULT_DPI = 150;@Overridepublic boolean isSupport(String s) {return "pdf2image".equals(s);}@Overridepublic String convert(String pathName,String dirAndFileName) throws Exception {String outPath = dirAndFileName + suffix;if(Files.exists(Paths.get(outPath))){throw new RuntimeException(outPath+" 文件已存在");}pdf2multiImage(pathName,outPath,DEFAULT_DPI);return outPath;}/*** pdf转图片* 多页PDF会每页转换为一张图片,下面会有多页组合成一页的方法** @param pdfFile pdf文件路径* @param outPath 图片输出路径* @param dpi 相当于图片的分辨率,值越大越清晰,但是转换时间变长*/public void pdf2multiImage(String pdfFile, String outPath, int dpi) {if (dpi <= 0) {// 如果没有设置DPI,默认设置为150dpi = DEFAULT_DPI;}try (PDDocument pdf = PDDocument.load(new FileInputStream(pdfFile))) {int actSize = pdf.getNumberOfPages();List<BufferedImage> picList = new ArrayList<>();for (int i = 0; i < actSize; i++) {BufferedImage image = new PDFRenderer(pdf).renderImageWithDPI(i, dpi, ImageType.RGB);picList.add(image);}// 组合图片ImageUtil.yPic(picList, outPath);} catch (IOException e) {e.printStackTrace();}}}
PDF转word实现
public class PDF2Word implements FileConversion {private String suffix = ".doc";@Overridepublic boolean isSupport(String s) {return "pdf2word".equals(s);}/**** @param pathName* @throws IOException*/@Overridepublic String convert(String pathName,String dirAndFileName) throws Exception {String outPath = dirAndFileName + suffix;if(Files.exists(Paths.get(outPath))){throw new RuntimeException(outPath+" 文件已存在");}pdf2word(pathName, outPath);return outPath;}private void pdf2word(String pathName, String outPath) throws IOException {PDDocument doc = PDDocument.load(new File(pathName));int pagenumber = doc.getNumberOfPages();// 创建文件createFile(Paths.get(outPath));FileOutputStream fos = new FileOutputStream(outPath);Writer writer = new OutputStreamWriter(fos, "UTF-8");PDFTextStripper stripper = new PDFTextStripper();stripper.setSortByPosition(true);//排序stripper.setStartPage(1);//设置转换的开始页stripper.setEndPage(pagenumber);//设置转换的结束页stripper.writeText(doc, writer);writer.close();doc.close();}}
word转html
public class Word2HTML implements FileConversion{private String suffix = ".html";@Overridepublic boolean isSupport(String s) {return "word2html".equals(s);}@Overridepublic String convert(String pathName, String dirAndFileName) {String outPath = dirAndFileName + suffix;if(Files.exists(Paths.get(outPath))){throw new RuntimeException(outPath+" 文件已存在");}Document doc = new Document();doc.loadFromFile(pathName);doc.saveToFile(outPath, FileFormat.Html);doc.dispose();return outPath;}}
word转图片
public class Word2Image implements FileConversion{private String suffix = ".jpg";@Overridepublic boolean isSupport(String s) {return "word2image".equals(s);}@Overridepublic String convert(String pathName, String dirAndFileName) throws Exception {String outPath = dirAndFileName + suffix;if(Files.exists(Paths.get(outPath))){throw new RuntimeException(outPath+" 文件已存在");}Document doc = new Document();//加载文件doc.loadFromFile(pathName);//上传文档页数,也是最后要生成的图片数Integer pageCount = doc.getPageCount();// 参数第一个和第三个都写死 第二个参数就是生成图片数BufferedImage[] image = doc.saveToImages(0, pageCount, ImageType.Bitmap);// 组合图片List<BufferedImage> imageList = Arrays.asList(image);ImageUtil.yPic(imageList, outPath);return outPath;}}
word转pdf
public class Word2PDF implements FileConversion{private String suffix = ".pdf";@Overridepublic boolean isSupport(String s) {return "word2pdf".equals(s);}@Overridepublic String convert(String pathName, String dirAndFileName) throws Exception {String outPath = dirAndFileName + suffix;if(Files.exists(Paths.get(outPath))){throw new RuntimeException(outPath+" 文件已存在");}//加载wordDocument document = new Document();document.loadFromFile(pathName, FileFormat.Docx);//保存结果文件document.saveToFile(outPath, FileFormat.PDF);document.close();return outPath;}}
使用
输入转换方法,文件路径,输出路径(输出路径如果输入’null’则为文件同目录下同名不同后缀文件)
转换方法可选项:
pdf2wordpdf2imageword2htmlword2imageword2pdf
例如输入:
pdf2word D:\test\testpdf.pdf null
控制台输出:
转换方法: pdf2word 文件: D:\test\testFile.pdf转换成功!文件路径: D:\test\testFile.doc