Browse Source

!296 新增PDF线程管理,超时管理,内存缓存管理,更新PDF解析组件版本

Merge pull request !296 from 高雄/pdfddd
pull/289/MERGE
陈精华 6 months ago committed by Gitee
parent
commit
0a4ae41b0c
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
  1. 2
      pom.xml
  2. 8
      server/src/main/config/application.properties
  3. 67
      server/src/main/java/cn/keking/config/ConfigConstants.java
  4. 12
      server/src/main/java/cn/keking/config/ConfigRefreshComponent.java
  5. 86
      server/src/main/java/cn/keking/service/FileHandlerService.java
  6. 25
      server/src/main/java/cn/keking/service/cache/NotResourceCache.java

2
pom.xml

@ -22,7 +22,7 @@
<antlr.version>2.7.7</antlr.version>
<concurrentlinkedhashmap.version>1.4.2</concurrentlinkedhashmap.version>
<rocksdb.version>5.17.2</rocksdb.version>
<pdfbox.version>2.0.29</pdfbox.version>
<pdfbox.version>3.0.2</pdfbox.version>
<jai-imageio.version>1.4.0</jai-imageio.version>
<jbig2-imageio.version>3.0.4</jbig2-imageio.version>
<galimatias.version>0.2.1</galimatias.version>

8
server/src/main/config/application.properties

@ -110,6 +110,14 @@ convertMedias = ${KK_CONVERTMEDIAS:avi,mov,wmv,mkv,3gp,rm}
#PDF预览模块设置
#配置PDF文件生成图片的像素大小dpi 越高图片质量越清晰同时也会消耗更多的计算资源
pdf2jpg.dpi = ${KK_PDF2JPG_DPI:144}
#PDF转换超时设置 (低于50页) 温馨提示这里数字仅供参考
pdf.timeout =${KK_pdf_TIMEOUT:90}
#PDF转换超时设置 (高于50小于200页)
pdf.timeout80 =${KK_PDF_TIMEOUT80:180}
#PDF转换超时设置 (大于200页)
pdf.timeout200 =${KK_PDF_TIMEOUT200:300}
#PDF转换线程设置
pdf.thread =${KK_PDF_THREAD:5}
#是否禁止演示模式
pdf.presentationMode.disable = ${KK_PDF_PRESENTATION_MODE_DISABLE:true}
#是否禁止打开文件

67
server/src/main/java/cn/keking/config/ConfigConstants.java

@ -67,6 +67,10 @@ public class ConfigConstants {
private static String homePagination;
private static String homePageSize;
private static String homeSearch;
private static int pdfTimeout;
private static int pdfTimeout80;
private static int pdfTimeout200;
private static int pdfThread;
public static final String DEFAULT_CACHE_ENABLED = "true";
public static final String DEFAULT_TXT_TYPE = "txt,html,htm,asp,jsp,xml,json,properties,md,gitignore,log,java,py,c,cpp,sql,sh,bat,m,bas,prg,cmd,xbrl";
@ -107,6 +111,10 @@ public class ConfigConstants {
public static final String DEFAULT_HOME_PAGINATION = "true";
public static final String DEFAULT_HOME_PAGSIZE = "15";
public static final String DEFAULT_HOME_SEARCH = "true";
public static final String DEFAULT_PDF_TIMEOUT = "90";
public static final String DEFAULT_PDF_TIMEOUT80 = "180";
public static final String DEFAULT_PDF_TIMEOUT200 = "300";
public static final String DEFAULT_PDF_THREAD = "5";
public static Boolean isCacheEnabled() {
return cacheEnabled;
@ -580,6 +588,65 @@ public class ConfigConstants {
ConfigConstants.cadThread = cadThread;
}
/**
* 以下为pdf转换模块设置
*/
public static int getPdfTimeout() {
return pdfTimeout;
}
@Value("${pdf.timeout:90}")
public void setPdfTimeout(int pdfTimeout) {
setPdfTimeoutValue(pdfTimeout);
}
public static void setPdfTimeoutValue(int pdfTimeout) {
ConfigConstants.pdfTimeout = pdfTimeout;
}
public static int getPdfTimeout80() {
return pdfTimeout80;
}
@Value("${pdf.timeout80:180}")
public void setPdfTimeout80(int pdfTimeout80) {
setPdfTimeout80Value(pdfTimeout80);
}
public static void setPdfTimeout80Value(int pdfTimeout80) {
ConfigConstants.pdfTimeout80 = pdfTimeout80;
}
public static int getPdfTimeout200() {
return pdfTimeout200;
}
@Value("${pdf.timeout200:300}")
public void setPdfTimeout200(int pdfTimeout200) {
setPdfTimeout200Value(pdfTimeout200);
}
public static void setPdfTimeout200Value(int pdfTimeout200) {
ConfigConstants.pdfTimeout200 = pdfTimeout200;
}
public static int getPdfThread() {
return pdfThread;
}
@Value("${pdf.thread:5}")
public void setPdfThread(int pdfThread) {
setPdfThreadValue(pdfThread);
}
public static void setPdfThreadValue(int pdfThread) {
ConfigConstants.pdfThread = pdfThread;
}
/**
* 以下为OFFICE转换模块设置
*/

12
server/src/main/java/cn/keking/config/ConfigRefreshComponent.java

@ -78,6 +78,10 @@ public class ConfigRefreshComponent {
String homePagination;
String homePageSize;
String homeSearch;
int pdfTimeout;
int pdfTimeout80;
int pdfTimeout200;
int pdfThread;
while (true) {
FileReader fileReader = new FileReader(configFilePath);
BufferedReader bufferedReader = new BufferedReader(fileReader);
@ -126,6 +130,10 @@ public class ConfigRefreshComponent {
homePageSize = properties.getProperty("home.pagesize", ConfigConstants.DEFAULT_HOME_PAGSIZE);
homeSearch = properties.getProperty("home.search", ConfigConstants.DEFAULT_HOME_SEARCH);
cadThread = Integer.parseInt(properties.getProperty("cad.thread", ConfigConstants.DEFAULT_CAD_THREAD));
pdfTimeout = Integer.parseInt(properties.getProperty("pdf.timeout", ConfigConstants.DEFAULT_PDF_TIMEOUT));
pdfTimeout80 = Integer.parseInt(properties.getProperty("pdf.timeout80", ConfigConstants.DEFAULT_PDF_TIMEOUT80));
pdfTimeout200 = Integer.parseInt(properties.getProperty("pdf.timeout200", ConfigConstants.DEFAULT_PDF_TIMEOUT200));
pdfThread = Integer.parseInt(properties.getProperty("pdf.thread", ConfigConstants.DEFAULT_PDF_THREAD));
prohibitArray = prohibit.split(",");
ConfigConstants.setCacheEnabledValueValue(cacheEnabled);
@ -169,6 +177,10 @@ public class ConfigRefreshComponent {
ConfigConstants.setHomePaginationValue(homePagination);
ConfigConstants.setHomePageSizeValue(homePageSize);
ConfigConstants.setHomeSearchValue(homeSearch);
ConfigConstants.setPdfTimeoutValue(pdfTimeout);
ConfigConstants.setPdfTimeout80Value(pdfTimeout80);
ConfigConstants.setPdfTimeout200Value(pdfTimeout200);
ConfigConstants.setPdfThreadValue(pdfThread);
setWatermarkConfig(properties);
bufferedReader.close();
fileReader.close();

86
server/src/main/java/cn/keking/service/FileHandlerService.java

@ -14,8 +14,8 @@ import com.aspose.cad.*;
import com.aspose.cad.fileformats.cad.CadDrawTypeMode;
import com.aspose.cad.fileformats.tiff.enums.TiffExpectedFormat;
import com.aspose.cad.imageoptions.*;
import com.itextpdf.text.pdf.PdfReader;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
@ -37,7 +37,10 @@ import java.io.*;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.*;
import java.util.stream.IntStream;
@ -236,9 +239,9 @@ public class FileHandlerService implements InitializingBean {
boolean forceUpdatedCache = fileAttribute.forceUpdatedCache();
boolean usePasswordCache = fileAttribute.getUsePasswordCache();
String filePassword = fileAttribute.getFilePassword();
String pdfPassword = null;
PDDocument doc = null;
PdfReader pdfReader = null;
PDDocument doc;
final String[] pdfPassword = {null};
final int[] pageCount = new int[1];
if (!forceUpdatedCache) {
List<String> cacheResult = this.loadPdf2jpgCache(pdfFilePath);
if (!CollectionUtils.isEmpty(cacheResult)) {
@ -246,64 +249,77 @@ public class FileHandlerService implements InitializingBean {
}
}
List<String> imageUrls = new ArrayList<>();
try {
File pdfFile = new File(fileNameFilePath);
if (!pdfFile.exists()) {
return null;
}
doc = PDDocument.load(pdfFile, filePassword);
doc.setResourceCache(new NotResourceCache());
int pageCount = doc.getNumberOfPages();
PDFRenderer pdfRenderer = new PDFRenderer(doc);
int index = pdfFilePath.lastIndexOf(".");
String folder = pdfFilePath.substring(0, index);
File path = new File(folder);
if (!path.exists() && !path.mkdirs()) {
logger.error("创建转换文件【{}】目录失败,请检查目录权限!", folder);
}
String imageFilePath;
for (int pageIndex = 0; pageIndex < pageCount; pageIndex++) {
imageFilePath = folder + File.separator + pageIndex + PDF2JPG_IMAGE_FORMAT;
BufferedImage image = pdfRenderer.renderImageWithDPI(pageIndex, ConfigConstants.getPdf2JpgDpi(), ImageType.RGB);
ImageIOUtil.writeImage(image, imageFilePath, ConfigConstants.getPdf2JpgDpi());
String imageUrl = this.getPdf2jpgUrl(pdfFilePath, pageIndex);
imageUrls.add(imageUrl);
}
try {
if (!ObjectUtils.isEmpty(filePassword)) { //获取到密码 判断是否是加密文件
pdfReader = new PdfReader(fileNameFilePath); //读取PDF文件 通过异常获取该文件是否有密码字符
}
} catch (Exception e) { //获取异常方法 判断是否有加密字符串
doc = Loader.loadPDF(pdfFile, filePassword);
doc.setResourceCache(new NotResourceCache());
pageCount[0] = doc.getNumberOfPages();
} catch (IOException e) {
Throwable[] throwableArray = ExceptionUtils.getThrowables(e);
for (Throwable throwable : throwableArray) {
if (throwable instanceof IOException || throwable instanceof EncryptedDocumentException) {
if (e.getMessage().toLowerCase().contains(PDF_PASSWORD_MSG)) {
pdfPassword = PDF_PASSWORD_MSG; //查询到该文件是密码文件 输出带密码的值
pdfPassword[0] = PDF_PASSWORD_MSG; //查询到该文件是密码文件 输出带密码的值
}
}
}
if (!PDF_PASSWORD_MSG.equals(pdfPassword)) { //该文件异常 错误原因非密码原因输出错误
if (!PDF_PASSWORD_MSG.equals(pdfPassword[0])) { //该文件异常 错误原因非密码原因输出错误
logger.error("Convert pdf exception, pdfFilePath:{}", pdfFilePath, e);
}
} finally {
if (pdfReader != null) { //关闭
pdfReader.close();
}
throw new Exception(e);
}
if (usePasswordCache || !PDF_PASSWORD_MSG.equals(pdfPassword)) { //加密文件 判断是否启用缓存命令
this.addPdf2jpgCache(pdfFilePath, pageCount);
Callable <List<String>> call = () -> {
try {
String imageFilePath;
BufferedImage image = null;
PDFRenderer pdfRenderer = new PDFRenderer(doc);
pdfRenderer.setSubsamplingAllowed(true);
for (int pageIndex = 0; pageIndex < pageCount[0]; pageIndex++) {
imageFilePath = folder + File.separator + pageIndex + PDF2JPG_IMAGE_FORMAT;
image = pdfRenderer.renderImageWithDPI(pageIndex, ConfigConstants.getPdf2JpgDpi(), ImageType.RGB);
ImageIOUtil.writeImage(image, imageFilePath, ConfigConstants.getPdf2JpgDpi());
String imageUrl = this.getPdf2jpgUrl(pdfFilePath, pageIndex);
imageUrls.add(imageUrl);
}
image.flush();
} catch (IOException e) {
if (!e.getMessage().contains(PDF_PASSWORD_MSG)) {
logger.error("Convert pdf to jpg exception, pdfFilePath:{}", pdfFilePath, e);
throw new Exception(e);
} finally {
doc.close();
}
return imageUrls;
};
Future<List<String>> result = pool.submit(call);
int pdftimeout;
if(pageCount[0] <=50){
pdftimeout = ConfigConstants.getPdfTimeout();
}else if(pageCount[0] <=200){
pdftimeout = ConfigConstants.getPdfTimeout80();
}else {
pdftimeout = ConfigConstants.getPdfTimeout200();
}
try {
result.get(pdftimeout, TimeUnit.SECONDS);
// 如果在超时时间内,没有数据返回:则抛出TimeoutException异常
} catch (InterruptedException | ExecutionException e) {
throw new Exception(e);
} catch (TimeoutException e) {
throw new Exception("overtime");
} finally {
if (doc != null) { //关闭
//关闭
doc.close();
}
if (usePasswordCache || ObjectUtils.isEmpty(filePassword)) { //加密文件 判断是否启用缓存命令
this.addPdf2jpgCache(pdfFilePath, pageCount[0]);
}
return imageUrls;
}

25
server/src/main/java/cn/keking/service/cache/NotResourceCache.java vendored

@ -2,9 +2,12 @@ package cn.keking.service.cache;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.pdmodel.DefaultResourceCache;
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import java.io.IOException;
import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern;
import org.apache.pdfbox.pdmodel.graphics.shading.PDShading;
import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
/**
* @author: Sawyer.Yong
@ -14,7 +17,21 @@ import java.io.IOException;
public class NotResourceCache extends DefaultResourceCache {
@Override
public void put(COSObject indirect, PDXObject xobject) throws IOException {
// do nothing
public void put(COSObject indirect, PDColorSpace colorSpace) {
}
@Override
public void put(COSObject indirect, PDExtendedGraphicsState extGState) {
}
@Override
public void put(COSObject indirect, PDShading shading) {
}
@Override
public void put(COSObject indirect, PDAbstractPattern pattern) {
}
@Override
public void put(COSObject indirect, PDPropertyList propertyList) {
}
@Override
public void put(COSObject indirect, PDXObject xobject) {
}
}

Loading…
Cancel
Save