!296 新增PDF线程管理,超时管理,内存缓存管理,更新PDF解析组件版本

Merge pull request !296 from 高雄/pdfddd
pull/289/MERGE
陈精华 6 months ago committed by Gitee
commit 0a4ae41b0c
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F

@ -22,7 +22,7 @@
<antlr.version>2.7.7</antlr.version> <antlr.version>2.7.7</antlr.version>
<concurrentlinkedhashmap.version>1.4.2</concurrentlinkedhashmap.version> <concurrentlinkedhashmap.version>1.4.2</concurrentlinkedhashmap.version>
<rocksdb.version>5.17.2</rocksdb.version> <rocksdb.version>5.17.2</rocksdb.version>
<pdfbox.version>2.0.29</pdfbox.version> <pdfbox.version>3.0.2</pdfbox.version>
<jai-imageio.version>1.4.0</jai-imageio.version> <jai-imageio.version>1.4.0</jai-imageio.version>
<jbig2-imageio.version>3.0.4</jbig2-imageio.version> <jbig2-imageio.version>3.0.4</jbig2-imageio.version>
<galimatias.version>0.2.1</galimatias.version> <galimatias.version>0.2.1</galimatias.version>

@ -110,6 +110,14 @@ convertMedias = ${KK_CONVERTMEDIAS:avi,mov,wmv,mkv,3gp,rm}
#PDF #PDF
#PDFdpi #PDFdpi
pdf2jpg.dpi = ${KK_PDF2JPG_DPI:144} pdf2jpg.dpi = ${KK_PDF2JPG_DPI:144}
#PDF (50)
pdf.timeout =${KK_pdf_TIMEOUT:90}
#PDF (50200)
pdf.timeout80 =${KK_PDF_TIMEOUT80:180}
#PDF (200)
pdf.timeout200 =${KK_PDF_TIMEOUT200:300}
#PDF线
pdf.thread =${KK_PDF_THREAD:5}
# #
pdf.presentationMode.disable = ${KK_PDF_PRESENTATION_MODE_DISABLE:true} pdf.presentationMode.disable = ${KK_PDF_PRESENTATION_MODE_DISABLE:true}
# #

@ -67,6 +67,10 @@ public class ConfigConstants {
private static String homePagination; private static String homePagination;
private static String homePageSize; private static String homePageSize;
private static String homeSearch; private static String homeSearch;
private static int pdfTimeout;
private static int pdfTimeout80;
private static int pdfTimeout200;
private static int pdfThread;
public static final String DEFAULT_CACHE_ENABLED = "true"; public static final String DEFAULT_CACHE_ENABLED = "true";
public static final String DEFAULT_TXT_TYPE = "txt,html,htm,asp,jsp,xml,json,properties,md,gitignore,log,java,py,c,cpp,sql,sh,bat,m,bas,prg,cmd,xbrl"; public static final String DEFAULT_TXT_TYPE = "txt,html,htm,asp,jsp,xml,json,properties,md,gitignore,log,java,py,c,cpp,sql,sh,bat,m,bas,prg,cmd,xbrl";
@ -107,6 +111,10 @@ public class ConfigConstants {
public static final String DEFAULT_HOME_PAGINATION = "true"; public static final String DEFAULT_HOME_PAGINATION = "true";
public static final String DEFAULT_HOME_PAGSIZE = "15"; public static final String DEFAULT_HOME_PAGSIZE = "15";
public static final String DEFAULT_HOME_SEARCH = "true"; public static final String DEFAULT_HOME_SEARCH = "true";
public static final String DEFAULT_PDF_TIMEOUT = "90";
public static final String DEFAULT_PDF_TIMEOUT80 = "180";
public static final String DEFAULT_PDF_TIMEOUT200 = "300";
public static final String DEFAULT_PDF_THREAD = "5";
public static Boolean isCacheEnabled() { public static Boolean isCacheEnabled() {
return cacheEnabled; return cacheEnabled;
@ -580,6 +588,65 @@ public class ConfigConstants {
ConfigConstants.cadThread = cadThread; ConfigConstants.cadThread = cadThread;
} }
/**
* pdf
*/
public static int getPdfTimeout() {
return pdfTimeout;
}
@Value("${pdf.timeout:90}")
public void setPdfTimeout(int pdfTimeout) {
setPdfTimeoutValue(pdfTimeout);
}
public static void setPdfTimeoutValue(int pdfTimeout) {
ConfigConstants.pdfTimeout = pdfTimeout;
}
public static int getPdfTimeout80() {
return pdfTimeout80;
}
@Value("${pdf.timeout80:180}")
public void setPdfTimeout80(int pdfTimeout80) {
setPdfTimeout80Value(pdfTimeout80);
}
public static void setPdfTimeout80Value(int pdfTimeout80) {
ConfigConstants.pdfTimeout80 = pdfTimeout80;
}
public static int getPdfTimeout200() {
return pdfTimeout200;
}
@Value("${pdf.timeout200:300}")
public void setPdfTimeout200(int pdfTimeout200) {
setPdfTimeout200Value(pdfTimeout200);
}
public static void setPdfTimeout200Value(int pdfTimeout200) {
ConfigConstants.pdfTimeout200 = pdfTimeout200;
}
public static int getPdfThread() {
return pdfThread;
}
@Value("${pdf.thread:5}")
public void setPdfThread(int pdfThread) {
setPdfThreadValue(pdfThread);
}
public static void setPdfThreadValue(int pdfThread) {
ConfigConstants.pdfThread = pdfThread;
}
/** /**
* OFFICE * OFFICE
*/ */

@ -78,6 +78,10 @@ public class ConfigRefreshComponent {
String homePagination; String homePagination;
String homePageSize; String homePageSize;
String homeSearch; String homeSearch;
int pdfTimeout;
int pdfTimeout80;
int pdfTimeout200;
int pdfThread;
while (true) { while (true) {
FileReader fileReader = new FileReader(configFilePath); FileReader fileReader = new FileReader(configFilePath);
BufferedReader bufferedReader = new BufferedReader(fileReader); BufferedReader bufferedReader = new BufferedReader(fileReader);
@ -126,6 +130,10 @@ public class ConfigRefreshComponent {
homePageSize = properties.getProperty("home.pagesize", ConfigConstants.DEFAULT_HOME_PAGSIZE); homePageSize = properties.getProperty("home.pagesize", ConfigConstants.DEFAULT_HOME_PAGSIZE);
homeSearch = properties.getProperty("home.search", ConfigConstants.DEFAULT_HOME_SEARCH); homeSearch = properties.getProperty("home.search", ConfigConstants.DEFAULT_HOME_SEARCH);
cadThread = Integer.parseInt(properties.getProperty("cad.thread", ConfigConstants.DEFAULT_CAD_THREAD)); cadThread = Integer.parseInt(properties.getProperty("cad.thread", ConfigConstants.DEFAULT_CAD_THREAD));
pdfTimeout = Integer.parseInt(properties.getProperty("pdf.timeout", ConfigConstants.DEFAULT_PDF_TIMEOUT));
pdfTimeout80 = Integer.parseInt(properties.getProperty("pdf.timeout80", ConfigConstants.DEFAULT_PDF_TIMEOUT80));
pdfTimeout200 = Integer.parseInt(properties.getProperty("pdf.timeout200", ConfigConstants.DEFAULT_PDF_TIMEOUT200));
pdfThread = Integer.parseInt(properties.getProperty("pdf.thread", ConfigConstants.DEFAULT_PDF_THREAD));
prohibitArray = prohibit.split(","); prohibitArray = prohibit.split(",");
ConfigConstants.setCacheEnabledValueValue(cacheEnabled); ConfigConstants.setCacheEnabledValueValue(cacheEnabled);
@ -169,6 +177,10 @@ public class ConfigRefreshComponent {
ConfigConstants.setHomePaginationValue(homePagination); ConfigConstants.setHomePaginationValue(homePagination);
ConfigConstants.setHomePageSizeValue(homePageSize); ConfigConstants.setHomePageSizeValue(homePageSize);
ConfigConstants.setHomeSearchValue(homeSearch); ConfigConstants.setHomeSearchValue(homeSearch);
ConfigConstants.setPdfTimeoutValue(pdfTimeout);
ConfigConstants.setPdfTimeout80Value(pdfTimeout80);
ConfigConstants.setPdfTimeout200Value(pdfTimeout200);
ConfigConstants.setPdfThreadValue(pdfThread);
setWatermarkConfig(properties); setWatermarkConfig(properties);
bufferedReader.close(); bufferedReader.close();
fileReader.close(); fileReader.close();

@ -14,8 +14,8 @@ import com.aspose.cad.*;
import com.aspose.cad.fileformats.cad.CadDrawTypeMode; import com.aspose.cad.fileformats.cad.CadDrawTypeMode;
import com.aspose.cad.fileformats.tiff.enums.TiffExpectedFormat; import com.aspose.cad.fileformats.tiff.enums.TiffExpectedFormat;
import com.aspose.cad.imageoptions.*; import com.aspose.cad.imageoptions.*;
import com.itextpdf.text.pdf.PdfReader;
import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType; import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer; import org.apache.pdfbox.rendering.PDFRenderer;
@ -37,7 +37,10 @@ import java.io.*;
import java.net.URLDecoder; import java.net.URLDecoder;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
import java.util.*; import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.*; import java.util.concurrent.*;
import java.util.stream.IntStream; import java.util.stream.IntStream;
@ -236,9 +239,9 @@ public class FileHandlerService implements InitializingBean {
boolean forceUpdatedCache = fileAttribute.forceUpdatedCache(); boolean forceUpdatedCache = fileAttribute.forceUpdatedCache();
boolean usePasswordCache = fileAttribute.getUsePasswordCache(); boolean usePasswordCache = fileAttribute.getUsePasswordCache();
String filePassword = fileAttribute.getFilePassword(); String filePassword = fileAttribute.getFilePassword();
String pdfPassword = null; PDDocument doc;
PDDocument doc = null; final String[] pdfPassword = {null};
PdfReader pdfReader = null; final int[] pageCount = new int[1];
if (!forceUpdatedCache) { if (!forceUpdatedCache) {
List<String> cacheResult = this.loadPdf2jpgCache(pdfFilePath); List<String> cacheResult = this.loadPdf2jpgCache(pdfFilePath);
if (!CollectionUtils.isEmpty(cacheResult)) { if (!CollectionUtils.isEmpty(cacheResult)) {
@ -246,64 +249,77 @@ public class FileHandlerService implements InitializingBean {
} }
} }
List<String> imageUrls = new ArrayList<>(); List<String> imageUrls = new ArrayList<>();
File pdfFile = new File(fileNameFilePath);
if (!pdfFile.exists()) {
return null;
}
int index = pdfFilePath.lastIndexOf(".");
String folder = pdfFilePath.substring(0, index);
File path = new File(folder);
if (!path.exists() && !path.mkdirs()) {
logger.error("创建转换文件【{}】目录失败,请检查目录权限!", folder);
}
try { try {
File pdfFile = new File(fileNameFilePath); doc = Loader.loadPDF(pdfFile, filePassword);
if (!pdfFile.exists()) {
return null;
}
doc = PDDocument.load(pdfFile, filePassword);
doc.setResourceCache(new NotResourceCache()); doc.setResourceCache(new NotResourceCache());
int pageCount = doc.getNumberOfPages(); pageCount[0] = doc.getNumberOfPages();
PDFRenderer pdfRenderer = new PDFRenderer(doc); } catch (IOException e) {
int index = pdfFilePath.lastIndexOf("."); Throwable[] throwableArray = ExceptionUtils.getThrowables(e);
String folder = pdfFilePath.substring(0, index); for (Throwable throwable : throwableArray) {
File path = new File(folder); if (throwable instanceof IOException || throwable instanceof EncryptedDocumentException) {
if (!path.exists() && !path.mkdirs()) { if (e.getMessage().toLowerCase().contains(PDF_PASSWORD_MSG)) {
logger.error("创建转换文件【{}】目录失败,请检查目录权限!", folder); pdfPassword[0] = PDF_PASSWORD_MSG; //查询到该文件是密码文件 输出带密码的值
}
}
} }
String imageFilePath; if (!PDF_PASSWORD_MSG.equals(pdfPassword[0])) { //该文件异常 错误原因非密码原因输出错误
for (int pageIndex = 0; pageIndex < pageCount; pageIndex++) { logger.error("Convert pdf exception, pdfFilePath{}", pdfFilePath, e);
imageFilePath = folder + File.separator + pageIndex + PDF2JPG_IMAGE_FORMAT;
BufferedImage image = pdfRenderer.renderImageWithDPI(pageIndex, ConfigConstants.getPdf2JpgDpi(), ImageType.RGB);
ImageIOUtil.writeImage(image, imageFilePath, ConfigConstants.getPdf2JpgDpi());
String imageUrl = this.getPdf2jpgUrl(pdfFilePath, pageIndex);
imageUrls.add(imageUrl);
} }
throw new Exception(e);
}
Callable <List<String>> call = () -> {
try { try {
if (!ObjectUtils.isEmpty(filePassword)) { //获取到密码 判断是否是加密文件 String imageFilePath;
pdfReader = new PdfReader(fileNameFilePath); //读取PDF文件 通过异常获取该文件是否有密码字符 BufferedImage image = null;
PDFRenderer pdfRenderer = new PDFRenderer(doc);
pdfRenderer.setSubsamplingAllowed(true);
for (int pageIndex = 0; pageIndex < pageCount[0]; pageIndex++) {
imageFilePath = folder + File.separator + pageIndex + PDF2JPG_IMAGE_FORMAT;
image = pdfRenderer.renderImageWithDPI(pageIndex, ConfigConstants.getPdf2JpgDpi(), ImageType.RGB);
ImageIOUtil.writeImage(image, imageFilePath, ConfigConstants.getPdf2JpgDpi());
String imageUrl = this.getPdf2jpgUrl(pdfFilePath, pageIndex);
imageUrls.add(imageUrl);
} }
} catch (Exception e) { //获取异常方法 判断是否有加密字符串 image.flush();
Throwable[] throwableArray = ExceptionUtils.getThrowables(e); } catch (IOException e) {
for (Throwable throwable : throwableArray) { throw new Exception(e);
if (throwable instanceof IOException || throwable instanceof EncryptedDocumentException) {
if (e.getMessage().toLowerCase().contains(PDF_PASSWORD_MSG)) {
pdfPassword = PDF_PASSWORD_MSG; //查询到该文件是密码文件 输出带密码的值
}
}
}
if (!PDF_PASSWORD_MSG.equals(pdfPassword)) { //该文件异常 错误原因非密码原因输出错误
logger.error("Convert pdf exception, pdfFilePath{}", pdfFilePath, e);
}
} finally { } finally {
if (pdfReader != null) { //关闭 doc.close();
pdfReader.close();
}
}
if (usePasswordCache || !PDF_PASSWORD_MSG.equals(pdfPassword)) { //加密文件 判断是否启用缓存命令
this.addPdf2jpgCache(pdfFilePath, pageCount);
}
} catch (IOException e) {
if (!e.getMessage().contains(PDF_PASSWORD_MSG)) {
logger.error("Convert pdf to jpg exception, pdfFilePath{}", pdfFilePath, e);
} }
return imageUrls;
};
Future<List<String>> result = pool.submit(call);
int pdftimeout;
if(pageCount[0] <=50){
pdftimeout = ConfigConstants.getPdfTimeout();
}else if(pageCount[0] <=200){
pdftimeout = ConfigConstants.getPdfTimeout80();
}else {
pdftimeout = ConfigConstants.getPdfTimeout200();
}
try {
result.get(pdftimeout, TimeUnit.SECONDS);
// 如果在超时时间内没有数据返回则抛出TimeoutException异常
} catch (InterruptedException | ExecutionException e) {
throw new Exception(e); throw new Exception(e);
} catch (TimeoutException e) {
throw new Exception("overtime");
} finally { } finally {
if (doc != null) { //关闭 //关闭
doc.close(); doc.close();
} }
if (usePasswordCache || ObjectUtils.isEmpty(filePassword)) { //加密文件 判断是否启用缓存命令
this.addPdf2jpgCache(pdfFilePath, pageCount[0]);
} }
return imageUrls; return imageUrls;
} }

@ -2,9 +2,12 @@ package cn.keking.service.cache;
import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.pdmodel.DefaultResourceCache; import org.apache.pdfbox.pdmodel.DefaultResourceCache;
import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList;
import org.apache.pdfbox.pdmodel.graphics.PDXObject; import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
import java.io.IOException; import org.apache.pdfbox.pdmodel.graphics.pattern.PDAbstractPattern;
import org.apache.pdfbox.pdmodel.graphics.shading.PDShading;
import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState;
/** /**
* @author: Sawyer.Yong * @author: Sawyer.Yong
@ -14,7 +17,21 @@ import java.io.IOException;
public class NotResourceCache extends DefaultResourceCache { public class NotResourceCache extends DefaultResourceCache {
@Override @Override
public void put(COSObject indirect, PDXObject xobject) throws IOException { public void put(COSObject indirect, PDColorSpace colorSpace) {
// do nothing }
@Override
public void put(COSObject indirect, PDExtendedGraphicsState extGState) {
}
@Override
public void put(COSObject indirect, PDShading shading) {
}
@Override
public void put(COSObject indirect, PDAbstractPattern pattern) {
}
@Override
public void put(COSObject indirect, PDPropertyList propertyList) {
}
@Override
public void put(COSObject indirect, PDXObject xobject) {
} }
} }

Loading…
Cancel
Save