当前位置：首页 > news >正文

JAVA实现将富文本内容插入已有word文档并下载（dock4j+jsoup）

news 2025/7/15 4:43:53

JAVA实现将富文本内容插入已有word文档并下载（dock4j+jsoup）

需求描述：

最近公司项目需要开发一个功能，需要将前端保存的富文本内容和目录插入到已有的word文档模版里，并提供下载功能。参考了很多方法，也踩了一些坑，最后使用dock4j+jsoup实现了；因为图片在富文本里保存的是相当路径，需要使用jsoup将富文本的标签解析出来并处理，dock4j无法直接将HTML的路径图片转换成word，所以需要将图片下载，并转换成base64编码格式。

引用依赖：

此处依赖是针对JDK8的，其实也写了一个JDK11的，提交代码的时候发现编译不通过，才想起公司运行的JDK版本是JDK1.8的。（一定要注意依赖版本）

 <dependency><groupId>org.docx4j</groupId><artifactId>docx4j-ImportXHTML</artifactId><version>8.3.10</version><exclusions><exclusion><groupId>com.sun.xml.bind</groupId><artifactId>jaxb-impl</artifactId></exclusion><exclusion><groupId>javax.xml.bind</groupId><artifactId>jaxb-api</artifactId></exclusion></exclusions></dependency><dependency><groupId>org.docx4j</groupId><artifactId>docx4j-JAXB-Internal</artifactId><version>8.3.10</version><exclusions><exclusion><groupId>com.sun.xml.bind</groupId><artifactId>jaxb-impl</artifactId></exclusion></exclusions></dependency><!-- 手动指定新版JAXB依赖 --><dependency><groupId>javax.xml.bind</groupId><artifactId>jaxb-api</artifactId><version>2.3.1</version></dependency><dependency><groupId>com.sun.xml.bind</groupId><artifactId>jaxb-impl</artifactId><version>2.3.8</version></dependency><dependency><groupId>javax.activation</groupId><artifactId>activation</artifactId><version>1.1.1</version></dependency><dependency><groupId>org.docx4j</groupId><artifactId>docx4j-JAXB-ReferenceImpl</artifactId><version>8.3.10</version></dependency><!-- 其他工具 --><dependency><groupId>org.jsoup</groupId><artifactId>jsoup</artifactId><version>1.14.3</version></dependency>

代码实现

 private static final Map<String, String> IMAGE_CACHE = new ConcurrentHashMap<>();private static final ExecutorService IMAGE_EXECUTOR = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 2);public String exportSowToWord(String fileName, HashMap<String, String> param)throws Exception {// 1. 批量获取数据String versionId = param.getOrDefault("versionId", "test");List<CatalogTressDTO> catalogList = zentaoProSowCatalogMapper.queryTreeMode(versionId);// 批量获取所有内容List<String> catalogIds = catalogList.stream().map(CatalogTressDTO::getId).collect(Collectors.toList());Map<String, ZentaoProSowContent> contentMap = zentaoProSowContentMapper.selectList(new LambdaQueryWrapper<ZentaoProSowContent>().in(ZentaoProSowContent::getCatalogId, catalogIds)).stream().collect(Collectors.toMap(ZentaoProSowContent::getCatalogId, Function.identity()));// 2. 构建完整HTML内容StringBuilder contentHtml = new StringBuilder();for (CatalogTressDTO catalog : catalogList) {// 处理标题if (StringUtils.isNotBlank(catalog.getIndentedTitle())) {contentHtml.append(buildHeadingTag(catalog));}// 处理内容ZentaoProSowContent content = contentMap.get(catalog.getId());if (content != null && StringUtils.isNotBlank(content.getContent())) {contentHtml.append(content.getContent());}}// 3. 统一处理图片和HTMLString fullHtml = "<!DOCTYPE html><html><head><meta charset='UTF-8'></head><body>"+ contentHtml.toString() + "</body></html>";String processedHtml = processHtmlWithImages(fullHtml);// 4. 生成Word文档ClassPathResource templateResource = new ClassPathResource("templates/sow_V2.0.docx");WordprocessingMLPackage wordPackage = WordprocessingMLPackage.load(templateResource.getInputStream());MainDocumentPart mainDoc = wordPackage.getMainDocumentPart();// 查找插入位置int insertIndex = findInsertPosition(mainDoc);// 添加HTML内容mainDoc.addAltChunk(AltChunkType.Html, processedHtml.getBytes(), mainDoc, insertIndex);mainDoc.convertAltChunks();ByteArrayOutputStream outputStream = new ByteArrayOutputStream();// 生成目录generateTableOfContents(wordPackage, insertIndex);// 保存文档wordPackage.save(outputStream);return buildResponse(fileName, outputStream.toByteArray());}private String buildHeadingTag(CatalogTressDTO catalog) {int level = catalog.getLevel() != null ? Math.min(Integer.parseInt(catalog.getLevel()), 6) : 1;return String.format("<h%d style='mso-style-name:标题%d'>%s</h%d>",level, level, catalog.getIndentedTitle(), level);}private int findInsertPosition(MainDocumentPart mainDoc) {List<Object> content = mainDoc.getContent();for (int i = 0; i < content.size(); i++) {if (content.get(i) instanceof P) {P p = (P) content.get(i);String text= TextUtils.getText(p);if (text != null && text.contains("插入的内容")) {content.remove(i);  // 移除占位符段落return i+1;          // 返回插入位置}}}return content.size();  // 默认插入到文档末尾}private void generateTableOfContents(WordprocessingMLPackage wordPackage, int insertIndex) throws Exception {TocGenerator tocGenerator = new TocGenerator(wordPackage);Toc.setTocHeadingText("目录");tocGenerator.generateToc(insertIndex - 1, "TOC \\o \"1-3\" \\h \\z \\u ", true);}private String processHtmlWithImages(String html) {Document doc = Jsoup.parse(html);Elements imgs = doc.select("img");// 并行处理图片List<CompletableFuture<Void>> futures = imgs.stream().map(img -> CompletableFuture.runAsync(() -> processImageTag(img), IMAGE_EXECUTOR)).collect(Collectors.toList());// 等待所有任务完成CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])).join();return doc.html();}private void processImageTag(Element img) {try {String src = img.attr("src");if (StringUtils.isBlank(src)) return;String networkUrl = convertToNetworkUrl(src);String base64 = IMAGE_CACHE.computeIfAbsent(networkUrl, this::fetchImageBase64);// 异步获取图片尺寸CompletableFuture<BufferedImage> imageFuture = CompletableFuture.supplyAsync(() -> {try {return ImageIO.read(new URL(networkUrl));} catch (Exception e) {return null;}}, IMAGE_EXECUTOR);BufferedImage image = imageFuture.get(3, TimeUnit.SECONDS);if (image != null) {int scaledWidth = (int) (image.getWidth() * 0.9);int scaledHeight = (int) (image.getHeight() * 0.9);img.attr("width", String.valueOf(scaledWidth)).attr("height", String.valueOf(scaledHeight));}img.attr("src", base64);} catch (Exception e) {img.attr("src", "#error");}}private String fetchImageBase64(String imageUrl) {try (InputStream in = new URL(imageUrl).openStream()) {byte[] bytes = IOUtils.toByteArray(in);String mimeType = getMimeType(imageUrl);return "data:" + mimeType + ";base64," + Base64.getEncoder().encodeToString(bytes);} catch (Exception e) {return "#error";}}// 以下为原有工具方法保持不变private String convertToNetworkUrl(String relativePath) {//富文本保存的是相对路径return "http://10.80.88.93:8090/" + relativePath.replaceFirst("^(?:\\.\\./)+", "");}private String getMimeType(String url) {if (url.endsWith(".png")) return "image/png";if (url.endsWith(".jpg") || url.endsWith(".jpeg")) return "image/jpeg";if (url.endsWith(".gif")) return "image/gif";return "application/octet-stream";}private String buildResponse(String fileName, byte[] content) throws UnsupportedEncodingException {//直接返回文件
//        String encodeFileName = URLEncoder.encode(fileName, "UTF-8").replace("\\+", "%20");
//        HttpHeaders header = new HttpHeaders();
//        header.add("Content-Type", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
//        header.add("Content-Disposition", "attachment; filename=" + encodeFileName);
//        return new ResponseEntity<>(content, header, HttpStatus.OK);//上传到MINISOMultipartFile multipartFile = convertByteArrayToMultipartFile(content, fileName);Result result = fileFeign.addFileByInfo(multipartFile);String id = ((Map<String, Object>) result.getData()).get("id").toString();return id;}public MultipartFile convertByteArrayToMultipartFile(byte[] fileBytes, String filename) {return new MultipartFile() {@Overridepublic String getName() {return "file"; // 表单字段名}@Overridepublic String getOriginalFilename() {return filename;}@Overridepublic String getContentType() {return "application/octet-stream"; // 默认二进制流，可自定义（如 "image/png"）}@Overridepublic boolean isEmpty() {return fileBytes == null || fileBytes.length == 0;}@Overridepublic long getSize() {return fileBytes.length;}@Overridepublic byte[] getBytes() throws IOException {return fileBytes;}@Overridepublic InputStream getInputStream() throws IOException {return new ByteArrayInputStream(fileBytes);}@Overridepublic void transferTo(File dest) throws IOException, IllegalStateException {try (FileOutputStream fos = new FileOutputStream(dest)) {fos.write(fileBytes);}}};}
}