1. 程式人生 > >利用jpedal進行pdf轉換成jpeg,jpg,png,tiff,tif等格式的圖片

利用jpedal進行pdf轉換成jpeg,jpg,png,tiff,tif等格式的圖片

         專案中運用到pdf檔案轉換成image圖片,開始時使用pdfbox開源庫進行圖片轉換,但是轉換出來的檔案中含有部分亂碼的情況.下面是pdfBox 的pdf轉換圖片的程式碼示例.

try{	
         String password = null;         
         int startPage = 1;
         String imageType = "jpg";	
         File imageFile = new File("E:\\upload\\pdf\\20140424\\Servlet."+ imageType);					
         File pdfFile = new File("E:\\upload\\pdf\\20140424\\Servlet.pdf");		
	 PDDocument document = PDDocument.load(pdfFile);
	 endPage = document.getPageCount();
	 PDFImageWriter imageWriter = new PDFImageWriter();
	 imageWriter.writeImage(document,imageType,password,startPage, endPage,imageFile.getAbsolutePath());
	 document.close();			

}catch(IOException  e){
	e.printStackTrace();			
}	
        比較了其他的開源庫之後,準備採用jpedal。但是jpedal的治療非常少,除了官方網站外,即使是英文資料也很少。而且官方提供的程式碼示例中的一些方法在的lgpl授權的
jpeal的程式碼庫中不存在。下面是收集到的一些資料

1、jpedal文件:http://javadoc.idrsolutions.com/org/jpedal/PdfDecoder.html

2、簡單呼叫示例:http://www.idrsolutions.com/java-pdf-code-faq/#pdf2img
3、lgpl授權的jpedal庫的下載地址:http://sourceforge.net/projects/jpedal/
4、轉換示例示例地址:http://files.idrsolutions.com/samplecode/org/jpedal/examples/images/ConvertPagesToImages.java.html

5、高清圖片轉換示例地址:http://files.idrsolutions.com/samplecode/org/jpedal/examples/images/ConvertPagesToHiResImages.java.html

        於是稍微修改了官方的轉換示例,下面是經過測試可以使用的轉換程式碼

import cn.com.pujiConvert.util.Common;

import com.sun.imageio.plugins.jpeg.JPEGImageWriter;
import org.jpedal.*;
import org.jpedal.color.ColorSpaces;
import org.jpedal.constants.PageInfo;
import org.jpedal.exception.PdfException;
import org.jpedal.external.Options;
import org.jpedal.fonts.FontMappings;
import org.jpedal.objects.PdfFileInformation;
import org.jpedal.utils.LogWriter;
import org.w3c.dom.Element;

import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import javax.imageio.ImageTypeSpecifier;
import javax.imageio.metadata.IIOMetadata;
import javax.imageio.plugins.jpeg.JPEGImageWriteParam;
import javax.imageio.stream.ImageOutputStream;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.Iterator;

public class ConvertPagesToImages{
    
    /**
     * show if image transparent 
     */
    boolean isTransparent=false;
    
    /**output where we put files */
    private String user_dir = System.getProperty("user.dir");
    
    /**use 96 dpi as default so pages correct size (72 will be smaller) */
    private float pageScaling =1.33f;
    
    /**flag to show if we print messages */
    public static boolean outputMessages = false;
    
    String output_dir=null;
    
    /**correct separator for OS */
    String separator = System.getProperty("file.separator");
    
    /**the decoder object which decodes the pdf and returns a data object */
    PdfDecoder decode_pdf = null;
    
    //type of image to save thumbnails
    private String format = "png";
    
    /** holding all creators that produce OCR pdf's ocr*/
    private String[] ocr = {"TeleForm"};
    
    /**scaling to use - default is 100 percent */
    private int scaling=100;
    
    /**file password or null */
    private String password=null;
    
    //only used if between 0 and 1 
    private float JPEGcompression=-1f;
    
    private int pageCount = 0;
    
    public ConvertPagesToImages() { 

    }
    
    public void init(String file_name, int scaling, String format, String output_dir, String password, int pageCount){
        /*縮小比率*/
    	this.scaling = scaling;
        /*圖片格式*/
        this.format = format;
        /*輸出目錄*/
        this.output_dir = output_dir;
        /*pdf密碼*/
        this.password = password;
        /*輸出圖片數*/
        this.pageCount = pageCount;
        
        /*判斷檔案是否存在*/
        File pdf_file = new File(file_name);
        if (!pdf_file.exists()) {
            System.out.println("File " + pdf_file + " not found");
            System.out.println("May need full path");
            
            return;
        }
       
        extraction(file_name, output_dir);   
    }
    
    private void extraction(String file_name, String output_dir) {        
        this.output_dir=output_dir;

        if (!user_dir.endsWith(separator)){
            user_dir = user_dir + separator;
        }
        
        if (file_name.toLowerCase().endsWith(".pdf")) {
            
            if(output_dir==null){
                output_dir=user_dir + "thumbnails" + separator;
            }
            
            decodeFile(file_name,output_dir);
        } else {
            String[] files = null;
            File inputFiles;
            
            if (!file_name.endsWith(separator)){
                file_name = file_name + separator;
            }
            
            try {
                inputFiles = new File(file_name);
                
                if (!inputFiles.isDirectory()) {
                    System.err.println(file_name + " is not a directory. Exiting program");
                }else{
                    files = inputFiles.list();
                }
            } catch (Exception ee) {
                LogWriter.writeLog("Exception trying to access file " + ee.getMessage());
                
            }
            
            if(files!=null){
                for (String file : files) {
                    
                    if (file.toLowerCase().endsWith(".pdf")) {
                        if (outputMessages){
                            System.out.println(file_name + file);
                        }
                        
                        decodeFile(file_name + file, output_dir);
                    }
                }
            }
        }
        
        if(outputMessages){
            System.out.println("Thumbnails created");
        }
    }
    
    /**
     * routine to decode a file 
     */
    private void decodeFile(String file_name,String output_dir) {
        String name = "demo"; //set a default just in case
        
        int pointer = file_name.lastIndexOf(separator);
        
        if(pointer==-1){
            pointer = file_name.lastIndexOf('/');
        }
        
        if (pointer != -1){
            name = file_name.substring(pointer + 1, file_name.length() - 4);
        }else if((file_name.toLowerCase().endsWith(".pdf"))){
            name=file_name.substring(0,file_name.length()-4);
        }
        
        //fix for odd files on Linux created when you view pages
        if(name.startsWith(".")){
            return;
        }
        
        //create output dir for images
        if(output_dir==null){
            output_dir = user_dir + "thumbnails" + separator ;
        }
        
        //PdfDecoder returns a PdfException if there is a problem
        try {
            if(decode_pdf==null){
                decode_pdf = new PdfDecoder(true);
            }
            
            /**optional JAI code for faster rendering*/
            org.jpedal.external.ImageHandler myExampleImageHandler=new org.jpedal.examples.handlers.ExampleImageDrawOnScreenHandler();
            decode_pdf.addExternalHandler(myExampleImageHandler, Options.ImageHandler);
            
            //mappings for non-embedded fonts to use
            FontMappings.setFontReplacements();

            //true as we are rendering page
            decode_pdf.setExtractionMode(0, pageScaling);
            //don't bother to extract text and images
            
            /**
             * open the file (and read metadata including pages in  file)
             */
            if (outputMessages){
                System.out.println("Opening file :" + file_name);
            }
            
            if(password != null && password != ""){
                decode_pdf.openPdfFile(file_name,password);
            }else{
                decode_pdf.openPdfFile(file_name);
            }
            
        } catch (Exception e) {
            System.err.println("8.Exception " + e + " in pdf code in "+file_name);
        }
        
        /**
         * extract data from pdf (if allowed).
         */
        if(decode_pdf.isEncrypted() && !decode_pdf.isFileViewable()){
        	throw new RuntimeException("Wrong password password used=>"+password+ '<');
        }else if ((decode_pdf.isEncrypted()&&(!decode_pdf.isPasswordSupplied())) && (!decode_pdf.isExtractionAllowed())) {
            throw new RuntimeException("Extraction not allowed");
        } else {
        	extractPageAsImage(file_name, output_dir, name, isTransparent);
        }
        
        /**close the pdf file */
        decode_pdf.closePdfFile();   
    }
    
    private void extractPageAsImage(String file_name, String output_dir, String name, boolean isTransparent) {   
        //create a directory if it doesn't exist
        File output_path = new File(output_dir);
        if (!output_path.exists()){
            output_path.mkdirs();
        }
        
        boolean isSingleOutputFile=false;
        boolean compressTiffs = false;
        String rawJPEGComp = null;   
        String jpgFlag = "96";        
        
        //page range
        int start = 1,  end = decode_pdf.getPageCount();
        
        end = (pageCount == 0) ? end : pageCount;
        
        if (outputMessages){
            System.out.println("Thumbnails will be in  " + output_dir);
        }
        
        try {
            BufferedImage[] multiPages = new BufferedImage[1 + (end - start)];
            
            for (int page = start; page < end + 1; page++){
            	getPage(output_dir, name, isTransparent, isSingleOutputFile,rawJPEGComp, jpgFlag, compressTiffs, start, end,multiPages, page);
            }
        } catch (Exception e) {
            decode_pdf.closePdfFile();
            throw new RuntimeException("Exception " + e.getMessage()+" with thumbnails on File="+file_name);
        }
    }
    
    private void getPage(
    		String output_dir, 
    		String name, 
    		boolean isTransparent,
            boolean isSingleOutputFile, 
            String rawJPEGComp, 
            String jpgFlag,
            boolean compressTiffs, 
            int start, 
            int end,
            BufferedImage[] multiPages, 
            int page
	) throws PdfException, IOException, FileNotFoundException {
        if (outputMessages ){
            System.out.println("Page " + page);
        }
        
        /**
         * 補0操作
         */
        String pageAsString	= String.valueOf(page);
        String maxPageSize	= String.valueOf(end);
        int padding			= maxPageSize.length()-pageAsString.length();
            
        for(int ii = 0; ii < padding; ii++){
            pageAsString = '0' + pageAsString;
        }
        
        String image_name;
        if(isSingleOutputFile){
            image_name =name;
        }else{
            image_name =name+"_page_" + pageAsString;
        }
        
        /**
         * get PRODUCER and if OCR disable text printing
         */
        PdfFileInformation currentFileInformation = decode_pdf.getFileInformationData();
        
        String[] values=currentFileInformation.getFieldValues();
        String[] fields=PdfFileInformation.getFieldNames();
            
        for(int i=0;i<fields.length;i++){
            if(fields[i].equals("Creator")){      
                for (String anOcr : ocr) {  
                    if (values[i].equals(anOcr)) {                           
                        decode_pdf.setRenderMode(PdfDecoder.RENDERIMAGES);                            
                    }
                }
            }
        }
            
        BufferedImage image_to_save;
        if(!isTransparent){
            image_to_save=decode_pdf.getPageAsImage(page);
        }else{ 
        	//use this if you want a transparent image 
            image_to_save =decode_pdf.getPageAsTransparentImage(page);
            
            //java adds odd tint if you save this as JPEG which does not have transparency
            // so put as RGB on white background
            // (or save as PNG or TIFF which has transparency)
            // or just call decode_pdf.getPageAsImage(page)
            if(image_to_save!=null && format.toLowerCase().startsWith("jp")){
                
                BufferedImage rawVersion=image_to_save;
                
                int w=rawVersion.getWidth(), h=rawVersion.getHeight();
                //blank canvas
                image_to_save = new BufferedImage(w,h , BufferedImage.TYPE_INT_RGB);
                
                //
                Graphics2D g2 = image_to_save.createGraphics();
                //white background
                g2.setPaint(Color.WHITE);
                g2.fillRect(0,0,w,h);
                //paint on image
                g2.drawImage(rawVersion, 0, 0,null);
            }
        }
        
        /*if just gray we can reduce memory usage by converting image to Grayscale

        
        @SuppressWarnings("rawtypes")
		Iterator colorspacesUsed = decode_pdf.getPageInfo(PageInfo.COLORSPACES);
        
        int nextID;
        boolean isGrayOnly=colorspacesUsed!=null; //assume true and disprove
        while(colorspacesUsed!=null && colorspacesUsed.hasNext()){
            nextID= (Integer) (colorspacesUsed.next());
            
            if(nextID!= ColorSpaces.DeviceGray && nextID!=ColorSpaces.CalGray){
                isGrayOnly=false;
            }
        }
        
        //draw onto GRAY image to reduce colour depth
        if(isGrayOnly){
            BufferedImage image_to_save2=new BufferedImage(image_to_save.getWidth(),image_to_save.getHeight(), BufferedImage.TYPE_BYTE_GRAY);
            image_to_save2.getGraphics().drawImage(image_to_save,0,0,null);
            image_to_save = image_to_save2;
        }
        
        //put image in array if multi-images
        if(isSingleOutputFile){
            multiPages[page-start] = image_to_save;
        }
        
        if (image_to_save != null) {
            
            /**BufferedImage does not support any dpi concept. A higher dpi can be created
             * using JAI to convert to a higher dpi image*/
            
            //shrink the page to 50% with graphics2D transformation
            //- add your own parameters as needed
            //you may want to replace null with a hints object if you
            //want to fine tune quality.
            
            /** example 1 biliniear scaling
             AffineTransform scale = new AffineTransform();
             scale.scale(.5, .5); //50% as a decimal
             AffineTransformOp scalingOp =new AffineTransformOp(scale, null);
             image_to_save =scalingOp.filter(image_to_save, null);

             */
            
            /** example 2 bicubic scaling - better quality but slower
             to preserve aspect ratio set newWidth or newHeight to -1*/
            
            /**allow user to specify maximum dimension for thumbnail*/
            int maxDimension = -1;
            
            if(scaling!=100 || maxDimension != -1){
                int newWidth=image_to_save.getWidth()*scaling/100;
                int newHeight=image_to_save.getHeight()*scaling/100;
                
                Image scaledImage;
                if(maxDimension != -1 && (newWidth > maxDimension || newHeight > maxDimension)){
                    if(newWidth > newHeight){
                        newWidth = maxDimension;
                        scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH);
                    } else {
                        newHeight = maxDimension;
                        scaledImage= image_to_save.getScaledInstance(-1,newHeight,BufferedImage.SCALE_SMOOTH);
                    }
                } else {
                    scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH);
                }
                
                if(format.toLowerCase().startsWith("jp")){
                    image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null) , BufferedImage.TYPE_INT_RGB);
                }else{
                    image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null) , BufferedImage.TYPE_INT_ARGB);
                }
                
                Graphics2D g2 = image_to_save.createGraphics();
                
                g2.drawImage(scaledImage, 0, 0,null);
            }

            if (format.startsWith("jp")) {
            	saveAsJPEG(jpgFlag, image_to_save, JPEGcompression, new BufferedOutputStream(new FileOutputStream(output_dir + pageAsString + image_name + '.' + format)));                
            } else {
                //save image
                decode_pdf.getObjectStore().saveStoredImage(
                        output_dir + pageAsString + image_name,
                        image_to_save,
                        true,
                        false,
                        format);
            }   
        }
        
        //flush images in case we do more than 1 page so only contains
        //images from current page
        decode_pdf.flushObjectValues(true);            
    }
    
    private static void saveAsJPEG(String jpgFlag,BufferedImage image_to_save, float JPEGcompression, BufferedOutputStream fos) throws IOException {
        JPEGImageWriter imageWriter = (JPEGImageWriter) ImageIO.getImageWritersBySuffix("jpeg").next();
        ImageOutputStream ios = ImageIO.createImageOutputStream(fos);
        imageWriter.setOutput(ios);
        
        IIOMetadata imageMetaData = imageWriter.getDefaultImageMetadata(new ImageTypeSpecifier(image_to_save), null);
        
        if (Common.isInteger(jpgFlag)){
            
            int dpi = 96;
            
            try {
                dpi = Integer.parseInt(jpgFlag);
            } catch (Exception e) {
                e.printStackTrace();
            }
            
            Element tree = (Element) imageMetaData.getAsTree("javax_imageio_jpeg_image_1.0");
            Element jfif = (Element)tree.getElementsByTagName("app0JFIF").item(0);
            jfif.setAttribute("Xdensity", Integer.toString(dpi));
            jfif.setAttribute("Ydensity", Integer.toString(dpi));
        }
        
        JPEGImageWriteParam jpegParams = (JPEGImageWriteParam) imageWriter.getDefaultWriteParam();
        if(JPEGcompression>=0 && JPEGcompression<=1f){     
            jpegParams.setCompressionMode(JPEGImageWriteParam.MODE_EXPLICIT);
            jpegParams.setCompressionQuality(JPEGcompression);
            
        }
        
        imageWriter.write(imageMetaData, new IIOImage(image_to_save, null, null), jpegParams);
        ios.close();
        imageWriter.dispose();
    }
    
     public static void main(String[] args) {   
		 long start=System.currentTimeMillis();
			 
		 String pdfPath = "E:\\upload\\pdf\\20140424\\Servlet.pdf";
		 int scaling = -1;
		 String format = "jpg";
		 String output_dir = "E:\\upload\\pdf\\20140424\\jpg\\";
		 String password = null;
		 int pageCount = 10;

		 ConvertPagesToImages convertPagesToImages = new ConvertPagesToImages();
		 convertPagesToImages.init(pdfPath, scaling, format, output_dir, password, pageCount);
	 
		 System.out.println("花費時間為="+(System.currentTimeMillis()-start)/1000 + "秒");
 	}         
}
功能說明:

       1、支援對資料夾下的所有pdf轉換成圖片,同時也支援對單個pdf進行轉換操作。

       2、支援轉換成jpg,jpeg,tiff,tif,png格式的圖片

       3、支援指定轉換的圖片數。

       4、支援指定圖片的儲存位置

傳入引數說明

 1、pdfPath pdf檔案絕對路徑,可以是pdf所在的目錄也可以是pdf檔案路徑 
 2、format  圖片格式 (支援jpg,jpeg,tiff,png) ,傳參時不能帶有點號
 3、scaling 圖片比率從1到100(100 = 全尺寸) 支援設定為-1 將保持高質量
 4、output_dir 輸出路徑,輸出路徑為絕對路徑
 5、password 檔案密碼 若沒有傳入null值