在有多CPU的情况下,充分发挥多CPU的性能,可以采用如下方式进行文档合并,合并效率显著提升。int numCpus = Runtime.getRuntime().availableProcessors();
log.info("CPU内核数:"+numCpus);
List<byte[]> tempFiles = mergeDocumentsInChunks(files, numCpus);log.info("合并所有部分");
com.spire.doc.Document finalDocument = new Document();
int i = 0;
for (byte[] tempFile : tempFiles) {log.info("合并所有部分,合并保存第{}个分块,共{}个分块.............",++i, tempFiles.size());InputStream inputStream = new ByteArrayInputStream(tempFile);finalDocument.insertTextFromStream(inputStream, FileFormat.Docx_2013);
}log.info("合并文件开始保存到磁盘:" + mergDoc);
finalDocument.saveToFile(mergDoc, FileFormat.Docx_2013);
log.info("保存成功");
return finalDocument;
public static List<byte[]> mergeDocumentsInChunks(List<Path> files, int numCpus) throws Exception {int numFiles = files.size();int chunkSize = Math.max(1, numFiles / numCpus);List<List<Path>> fileChunks = new ArrayList<>();for (int i = 0; i < numFiles; i += chunkSize) {fileChunks.add(files.subList(i, Math.min(i + chunkSize, numFiles)));}log.info("文件分块:将所有文件路径按CPU核数分块,每个分块包含一部分文件,共"+fileChunks.size()+"个块,每块文件数:"+chunkSize+"共\"+numFiles+\"个文件");log.info("使用并行合并文档");ExecutorService executor = Executors.newFixedThreadPool(numCpus);List<Future<byte[]>> futures = new ArrayList<>();for (int i = 0; i < fileChunks.size(); i++) {List<Path> chunk = fileChunks.get(i);futures.add(executor.submit(() -> mergeDocuments(chunk)));}List<byte[]> tempFiles = new ArrayList<>();for (Future<byte[]> future : futures) {tempFiles.add(future.get());}executor.shutdown();return tempFiles;
}
public static byte[] mergeDocuments(List<Path> filePaths) throws Exception {Document combinedDocument = new Document();for (Path filePath : filePaths) {log.info("并行文档合并中......{}",filePath.toString());combinedDocument.insertTextFromFile(filePath.toString(), FileFormat.Docx_2013);}ByteArrayOutputStream outputStream = new ByteArrayOutputStream();combinedDocument.saveToStream(outputStream, FileFormat.Docx_2013);return outputStream.toByteArray();
}