| | |
| | | import java.nio.charset.StandardCharsets; |
| | | import java.nio.file.Files; |
| | | import java.util.ArrayList; |
| | | import java.util.Arrays; |
| | | import java.util.List; |
| | | import java.util.Map; |
| | | import java.util.concurrent.TimeUnit; |
| | |
| | | |
| | | public String wordToPdf(String filePath, String fileName) { |
| | | try { |
| | | // 1. 首先确保输入文件是UTF-8编码 |
| | | String inputFile = filePath + fileName; |
| | | String outputDir = filePath + "/pdf"; |
| | | String outputFileName = fileName.substring(0, fileName.lastIndexOf(".")) + ".pdf"; |
| | | // 确保路径正确性 |
| | | String inputFile = new File(filePath, fileName).getAbsolutePath(); |
| | | String outputDir = new File(filePath, "pdf").getAbsolutePath(); |
| | | |
| | | // 2. 创建临时文件用于转换 |
| | | String tempDocx = createTempFileWithEncoding(inputFile); |
| | | // 创建输出目录 |
| | | new File(outputDir).mkdirs(); |
| | | |
| | | // 3. 使用更详细的LibreOffice转换参数 |
| | | List<String> command = new ArrayList<>(); |
| | | command.add("/usr/bin/soffice"); |
| | | command.add("--headless"); |
| | | command.add("--convert-to"); |
| | | command.add("pdf:writer_pdf_Export:PDFExport{'EmbedStandardFonts':true}"); |
| | | command.add("--outdir"); |
| | | command.add(outputDir); |
| | | command.add(tempDocx); |
| | | // 使用完整的转换参数 |
| | | List<String> command = Arrays.asList( |
| | | "/usr/bin/libreoffice", // 使用完整路径 |
| | | "--headless", |
| | | "--norestore", |
| | | "--convert-to", |
| | | "pdf:writer_pdf_Export:PDFExport{" + |
| | | "EmbedStandardFonts=1;" + |
| | | "EmbedFonts=1;" + |
| | | "EmbedOnlyUsedFonts=0;" + |
| | | "UseTaggedPDF=1" + |
| | | "}", |
| | | "--outdir", |
| | | outputDir, |
| | | inputFile |
| | | ); |
| | | |
| | | // 创建进程构建器 |
| | | ProcessBuilder pb = new ProcessBuilder(command); |
| | | |
| | | // 4. 设置更完整的环境变量 |
| | | // 设置环境变量 |
| | | Map<String, String> env = pb.environment(); |
| | | env.put("LC_ALL", "zh_CN.UTF-8"); |
| | | env.put("LANG", "zh_CN.UTF-8"); |
| | | env.put("LANGUAGE", "zh_CN.UTF-8"); |
| | | env.put("PYTHONIOENCODING", "utf8"); |
| | | env.put("JAVA_TOOL_OPTIONS", "-Dfile.encoding=UTF-8"); |
| | | |
| | | // 5. 执行转换 |
| | | // 重定向错误流到标准输出 |
| | | pb.redirectErrorStream(true); |
| | | |
| | | // 启动进程 |
| | | Process process = pb.start(); |
| | | |
| | | // 6. 读取输出 |
| | | // 读取输出 |
| | | StringBuilder output = new StringBuilder(); |
| | | try (BufferedReader reader = new BufferedReader( |
| | | new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8))) { |
| | | String line; |
| | | while ((line = reader.readLine()) != null) { |
| | | output.append(line).append("\n"); |
| | | System.out.println(line); |
| | | } |
| | | } |
| | | |
| | | // 7. 等待进程完成 |
| | | int exitCode = process.waitFor(); |
| | | |
| | | // 8. 清理临时文件 |
| | | new File(tempDocx).delete(); |
| | | |
| | | if (exitCode == 0) { |
| | | return outputDir + "/" + outputFileName; |
| | | } else { |
| | | throw new RuntimeException("转换失败: " + output.toString()); |
| | | // 等待进程完成,设置超时 |
| | | if (!process.waitFor(120, TimeUnit.SECONDS)) { |
| | | process.destroyForcibly(); |
| | | throw new RuntimeException("转换超时"); |
| | | } |
| | | |
| | | int exitCode = process.exitValue(); |
| | | if (exitCode != 0) { |
| | | throw new RuntimeException("转换失败,退出码:" + exitCode + "\n输出:" + output); |
| | | } |
| | | |
| | | // 检查生成的PDF文件 |
| | | String pdfFileName = fileName.substring(0, fileName.lastIndexOf(".")) + ".pdf"; |
| | | File pdfFile = new File(outputDir, pdfFileName); |
| | | |
| | | if (!pdfFile.exists() || pdfFile.length() == 0) { |
| | | throw new RuntimeException("PDF文件未生成或为空"); |
| | | } |
| | | String absolutePath = pdfFile.getAbsolutePath(); |
| | | |
| | | MultipartFile multipartFile = convertFileToMultipartFile(pdfFile); |
| | | String s = tencentCosUtil.upLoadFile(multipartFile,"/wordToPdf"); |
| | | return s; |
| | | |
| | | } catch (Exception e) { |
| | | throw new RuntimeException("PDF转换失败: " + e.getMessage(), e); |
| | | } |
| | | } |
| | | private static String createTempFileWithEncoding(String inputFile) { |
| | | |
| | | public MultipartFile convertFileToMultipartFile(File file) throws IOException { |
| | | // 读取文件内容到字节数组 |
| | | byte[] fileContent = Files.readAllBytes(file.toPath()); |
| | | |
| | | // 创建 MultipartFile 对象 |
| | | MultipartFile multipartFile = new MockMultipartFile( |
| | | file.getName(), // 文件名 |
| | | file.getName(), // 原始文件名 |
| | | "application/pdf", // 内容类型,根据实际情况调整 |
| | | fileContent // 文件内容 |
| | | ); |
| | | |
| | | return multipartFile; |
| | | } |
| | | |
| | | // 在使用前检查和配置环境 |
| | | public static void setupEnvironment() { |
| | | try { |
| | | // 1. 读取原始文档 |
| | | XWPFDocument doc = new XWPFDocument(new FileInputStream(inputFile)); |
| | | // 1. 检查LibreOffice安装 |
| | | checkLibreOffice(); |
| | | |
| | | // 2. 修改文档字体和编码 |
| | | for (XWPFParagraph paragraph : doc.getParagraphs()) { |
| | | for (XWPFRun run : paragraph.getRuns()) { |
| | | // 设置中文字体 |
| | | run.setFontFamily("WenQuanYi Zen Hei"); |
| | | run.setFontFamily("WenQuanYi Zen Hei", XWPFRun.FontCharRange.eastAsia); |
| | | // 2. 检查和安装字体 |
| | | installFonts(); |
| | | |
| | | // 确保文本是UTF-8编码 |
| | | String text = run.getText(0); |
| | | if (text != null) { |
| | | text = new String(text.getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8); |
| | | run.setText(text, 0); |
| | | } |
| | | } |
| | | } |
| | | // 3. 配置字体 |
| | | configureFonts(); |
| | | |
| | | // 3. 处理表格中的文本 |
| | | for (XWPFTable table : doc.getTables()) { |
| | | for (XWPFTableRow row : table.getRows()) { |
| | | for (XWPFTableCell cell : row.getTableCells()) { |
| | | for (XWPFParagraph paragraph : cell.getParagraphs()) { |
| | | for (XWPFRun run : paragraph.getRuns()) { |
| | | run.setFontFamily("WenQuanYi Zen Hei"); |
| | | run.setFontFamily("WenQuanYi Zen Hei", XWPFRun.FontCharRange.eastAsia); |
| | | |
| | | String text = run.getText(0); |
| | | if (text != null) { |
| | | text = new String(text.getBytes(StandardCharsets.UTF_8), StandardCharsets.UTF_8); |
| | | run.setText(text, 0); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | } |
| | | } |
| | | |
| | | // 4. 保存为临时文件 |
| | | String tempFile = inputFile + ".temp.docx"; |
| | | try (FileOutputStream out = new FileOutputStream(tempFile)) { |
| | | doc.write(out); |
| | | } |
| | | |
| | | return tempFile; |
| | | // 4. 验证环境变量 |
| | | checkEnvironment(); |
| | | |
| | | } catch (Exception e) { |
| | | throw new RuntimeException("处理文档编码失败: " + e.getMessage(), e); |
| | | throw new RuntimeException("环境设置失败: " + e.getMessage(), e); |
| | | } |
| | | } |
| | | |
| | | // 在转换之前执行的环境检查 |
| | | private static void preConversionCheck() { |
| | | try { |
| | | // 1. 检查并安装必要的字体 |
| | | installRequiredFonts(); |
| | | |
| | | // 2. 验证LibreOffice安装 |
| | | verifyLibreOfficeInstallation(); |
| | | |
| | | // 3. 设置字体配置 |
| | | setupFontConfig(); |
| | | |
| | | } catch (Exception e) { |
| | | throw new RuntimeException("环境检查失败: " + e.getMessage(), e); |
| | | private static void checkLibreOffice() throws IOException, InterruptedException { |
| | | Process process = Runtime.getRuntime().exec("which libreoffice"); |
| | | if (process.waitFor() != 0) { |
| | | throw new RuntimeException("LibreOffice未安装"); |
| | | } |
| | | } |
| | | |
| | | private static void installRequiredFonts() { |
| | | try { |
| | | ProcessBuilder pb = new ProcessBuilder( |
| | | "sudo", "apt-get", "install", "-y", |
| | | "fonts-wqy-zenhei", |
| | | "fonts-wqy-microhei", |
| | | "fonts-arphic-ukai", |
| | | "fonts-arphic-uming" |
| | | ); |
| | | pb.inheritIO(); |
| | | Process p = pb.start(); |
| | | p.waitFor(); |
| | | } catch (Exception e) { |
| | | System.err.println("警告: 安装字体失败 - " + e.getMessage()); |
| | | } |
| | | private static void installFonts() throws IOException, InterruptedException { |
| | | // 创建字体安装脚本 |
| | | String scriptContent = |
| | | "#!/bin/bash\n" + |
| | | "apt-get update\n" + |
| | | "apt-get install -y fonts-wqy-zenhei fonts-wqy-microhei fonts-arphic-ukai fonts-arphic-uming\n" + |
| | | "fc-cache -fv\n"; |
| | | |
| | | File script = new File("/tmp/install_fonts.sh"); |
| | | Files.write(script.toPath(), scriptContent.getBytes()); |
| | | script.setExecutable(true); |
| | | |
| | | // 执行脚本 |
| | | Process process = Runtime.getRuntime().exec("sudo /tmp/install_fonts.sh"); |
| | | process.waitFor(); |
| | | |
| | | // 清理脚本 |
| | | script.delete(); |
| | | } |
| | | |
| | | private static void verifyLibreOfficeInstallation() { |
| | | try { |
| | | Process p = Runtime.getRuntime().exec("soffice --version"); |
| | | try (BufferedReader reader = new BufferedReader( |
| | | new InputStreamReader(p.getInputStream()))) { |
| | | String version = reader.readLine(); |
| | | if (version == null || !version.contains("LibreOffice")) { |
| | | throw new RuntimeException("LibreOffice未正确安装"); |
| | | } |
| | | private static void configureFonts() throws IOException { |
| | | // 创建字体配置文件 |
| | | String fontConfig = |
| | | "<?xml version=\"1.0\"?>\n" + |
| | | "<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n" + |
| | | "<fontconfig>\n" + |
| | | " <match target=\"pattern\">\n" + |
| | | " <test name=\"family\"><string>serif</string></test>\n" + |
| | | " <edit name=\"family\" mode=\"prepend\">\n" + |
| | | " <string>WenQuanYi Zen Hei</string>\n" + |
| | | " </edit>\n" + |
| | | " </match>\n" + |
| | | " <match target=\"pattern\">\n" + |
| | | " <test name=\"family\"><string>sans-serif</string></test>\n" + |
| | | " <edit name=\"family\" mode=\"prepend\">\n" + |
| | | " <string>WenQuanYi Zen Hei</string>\n" + |
| | | " </edit>\n" + |
| | | " </match>\n" + |
| | | "</fontconfig>"; |
| | | |
| | | // 写入配置文件 |
| | | File configFile = new File(System.getProperty("user.home") + "/.fonts.conf"); |
| | | Files.write(configFile.toPath(), fontConfig.getBytes()); |
| | | } |
| | | |
| | | private static void checkEnvironment() { |
| | | // 检查环境变量 |
| | | String[] requiredVars = {"LANG", "LC_ALL", "LANGUAGE"}; |
| | | for (String var : requiredVars) { |
| | | String value = System.getenv(var); |
| | | if (value == null || !value.contains("zh_CN")) { |
| | | System.err.println("警告: " + var + " 环境变量未正确设置"); |
| | | } |
| | | } catch (Exception e) { |
| | | throw new RuntimeException("LibreOffice检查失败: " + e.getMessage()); |
| | | } |
| | | } |
| | | |
| | | private static void setupFontConfig() { |
| | | try { |
| | | String fontConfig = |
| | | "<?xml version='1.0'?>\n" + |
| | | "<!DOCTYPE fontconfig SYSTEM 'fonts.dtd'>\n" + |
| | | "<fontconfig>\n" + |
| | | " <match target=\"pattern\">\n" + |
| | | " <test name=\"family\"><string>SimSun</string></test>\n" + |
| | | " <edit name=\"family\" mode=\"assign\" binding=\"same\">\n" + |
| | | " <string>WenQuanYi Zen Hei</string>\n" + |
| | | " </edit>\n" + |
| | | " </match>\n" + |
| | | "</fontconfig>"; |
| | | |
| | | String userHome = System.getProperty("user.home"); |
| | | File fontConfigFile = new File(userHome + "/.fonts.conf"); |
| | | |
| | | try (FileWriter writer = new FileWriter(fontConfigFile)) { |
| | | writer.write(fontConfig); |
| | | } |
| | | } catch (Exception e) { |
| | | System.err.println("警告: 设置字体配置失败 - " + e.getMessage()); |
| | | } |
| | | } |
| | | |