Extracting images from a PDF
I am having a little query regarding Extracting specifically images ( only images ) from a supplied PDF document in iPhone Application.
I have gone through the documentation of apple - But I am failure to find it.
I have done following efforts to have the Image from PDF Document.
-(IBAction)btnTappedImages:(id)sender{
// MyGetPDFDocumentRef is custom c method
// & filePath is path to pdf document.
CGPDFDocumentRef document = MyGetPDFDocumentRef ([filePath UTF8String]);
int pgcnt = CGPDFDocumentGetNumberOfPages( document );
for( int i1 = 0; i1 < pgcnt; ++i1 ) {
// 1. Open Document page
CGPDFPageRef pg = CGPDFDocumentGetPage (document, i1+1);
if( !pg ) {
NSLog(@"Couldn't open page.");
}
// 2. get page dictionary
CGPDFDictionaryRef dict = CGPDFPageGetDictionary( pg );
if( !dict ) {
NSLog(@"Couldn't open page dictionary.");
}
// 3开发者_C百科. get page contents stream
CGPDFStreamRef cont;
if( !CGPDFDictionaryGetStream( dict, "Contents", &cont ) ) {
NSLog(@"Couldn't open page stream.");
}
// 4. copy page contents steam
// CFDataRef contdata = CGPDFStreamCopyData( cont, NULL );
// 5. get the media array from stream
CGPDFArrayRef media;
if( !CGPDFDictionaryGetArray( dict, "MediaBox", &media ) ) {
NSLog(@"Couldn't open page Media.");
}
// 6. open media & get it's size
CGPDFInteger mediatop, medialeft;
CGPDFReal mediaright, mediabottom;
if( !CGPDFArrayGetInteger( media, 0, &mediatop ) || !CGPDFArrayGetInteger( media, 1, &medialeft ) || !CGPDFArrayGetNumber( media, 2, &mediaright ) || !CGPDFArrayGetNumber( media, 3, &mediabottom ) ) {
NSLog(@"Couldn't open page Media Box.");
}
// 7. set media size
double mediawidth = mediaright - medialeft, mediaheight = mediabottom - mediatop;
// 8. get media resources
CGPDFDictionaryRef res;
if( !CGPDFDictionaryGetDictionary( dict, "Resources", &res ) ) {
NSLog(@"Couldn't Open Page Media Reopsources.");
}
// 9. get xObject from media resources
CGPDFDictionaryRef xobj;
if( !CGPDFDictionaryGetDictionary( res, "XObject", &xobj ) ) {
NSLog(@"Couldn't load page Xobjects.");
}
char imagestr[16];
sprintf( imagestr, "Im%d", i1 );
// 10. get x object stream
CGPDFStreamRef strm;
if( !CGPDFDictionaryGetStream( xobj, imagestr, &strm ) ) {
NSLog(@"Couldn't load stream for xObject");
}
// 11. get dictionary from xObject Stream
CGPDFDictionaryRef strmdict = CGPDFStreamGetDictionary( strm );
if( !strmdict ) {
NSLog(@"Failed to load dictionary for xObject");
}
// 12. get type of xObject
const char * type;
if( !CGPDFDictionaryGetName( strmdict, "Type", &type ) || strcmp(type, "XObject" ) ) {
NSLog(@"Couldn't load xObject Type");
}
// 13. Check weather subtype is image or not
const char * subtype;
if( !CGPDFDictionaryGetName( strmdict, "Subtype", &subtype ) || strcmp( subtype, "Image" ) ) {
NSLog(@"xObject is not image");
}
// 14. Bits per component
CGPDFInteger bitsper;
if( !CGPDFDictionaryGetInteger( strmdict, "BitsPerComponent",&bitsper ) || bitsper != 1 ) {
NSLog(@"Bits per component not loaded");
}
// 15. Type of filter of image
const char * filter;
if( !CGPDFDictionaryGetName( strmdict, "Filter", &filter ) || strcmp( filter, "FlateDecode" ) ) {
NSLog(@"Filter not loaded");
}
// 16. Image height width
CGPDFInteger width, height;
if( !CGPDFDictionaryGetInteger( strmdict, "Width", &width ) || !CGPDFDictionaryGetInteger( strmdict, "Height", &height ) ) {
NSLog(@"Image Height - width not loaded.");
}
// 17. Load image bytes & verify it
CGPDFDataFormat fmt = CGPDFDataFormatRaw;
CFDataRef data = CGPDFStreamCopyData( strm, &fmt );
int32_t len = CFDataGetLength( data );
const void * bytes = CFDataGetBytePtr( data );
// now I have bytes for images in "bytes" pointer the problem is how to covert it into UIImage
NSLog(@"Image bytes length - %i",len);
int32_t rowbytes = (width + 7) / 8;
if( rowbytes * height != len ) {
NSLog(@"Invalid Image");
}
double xres = width / mediawidth * 72.0, yres = height / mediaheight * 72.0;
xres = round( xres * 1000 ) / 1000;
yres = round( yres * 1000 ) / 1000;
}
}
Yes ! I found it. But It looks very scary - huge code.
NSMutableArray *aRefImgs;
void setRefImgs(NSMutableArray *ref){
aRefImgs=ref;
}
NSMutableArray* ImgArrRef(){
return aRefImgs;
}
CGPDFDocumentRef MyGetPDFDocumentRef (const char *filename) {
CFStringRef path;
CFURLRef url;
CGPDFDocumentRef document;
path = CFStringCreateWithCString (NULL, filename,kCFStringEncodingUTF8);
url = CFURLCreateWithFileSystemPath (NULL, path, kCFURLPOSIXPathStyle, 0);
CFRelease (path);
document = CGPDFDocumentCreateWithURL (url);// 2
CFRelease(url);
int count = CGPDFDocumentGetNumberOfPages (document);// 3
if (count == 0) {
printf("`%s' needs at least one page!", filename);
return NULL;
}
return document;
}
CGFloat *decodeValuesFromImageDictionary(CGPDFDictionaryRef dict, CGColorSpaceRef cgColorSpace, NSInteger bitsPerComponent) {
CGFloat *decodeValues = NULL;
CGPDFArrayRef decodeArray = NULL;
if (CGPDFDictionaryGetArray(dict, "Decode", &decodeArray)) {
size_t count = CGPDFArrayGetCount(decodeArray);
decodeValues = malloc(sizeof(CGFloat) * count);
CGPDFReal realValue;
int i;
for (i = 0; i < count; i++) {
CGPDFArrayGetNumber(decodeArray, i, &realValue);
decodeValues[i] = realValue;
}
} else {
size_t n;
switch (CGColorSpaceGetModel(cgColorSpace)) {
case kCGColorSpaceModelMonochrome:
decodeValues = malloc(sizeof(CGFloat) * 2);
decodeValues[0] = 0.0;
decodeValues[1] = 1.0;
break;
case kCGColorSpaceModelRGB:
decodeValues = malloc(sizeof(CGFloat) * 6);
for (int i = 0; i < 6; i++) {
decodeValues[i] = i % 2 == 0 ? 0 : 1;
}
break;
case kCGColorSpaceModelCMYK:
decodeValues = malloc(sizeof(CGFloat) * 8);
for (int i = 0; i < 8; i++) {
decodeValues[i] = i % 2 == 0 ? 0.0 :
1.0;
}
break;
case kCGColorSpaceModelLab:
// ????
break;
case kCGColorSpaceModelDeviceN:
n =
CGColorSpaceGetNumberOfComponents(cgColorSpace) * 2;
decodeValues = malloc(sizeof(CGFloat) * (n *
2));
for (int i = 0; i < n; i++) {
decodeValues[i] = i % 2 == 0 ? 0.0 :
1.0;
}
break;
case kCGColorSpaceModelIndexed:
decodeValues = malloc(sizeof(CGFloat) * 2);
decodeValues[0] = 0.0;
decodeValues[1] = pow(2.0,
(double)bitsPerComponent) - 1;
break;
default:
break;
}
}
return (CGFloat *)CFMakeCollectable(decodeValues);
}
UIImage *getImageRef(CGPDFStreamRef myStream) {
CGPDFArrayRef colorSpaceArray = NULL;
CGPDFStreamRef dataStream;
CGPDFDataFormat format;
CGPDFDictionaryRef dict;
CGPDFInteger width, height, bps, spp;
CGPDFBoolean interpolation = 0;
// NSString *colorSpace = nil;
CGColorSpaceRef cgColorSpace;
const char *name = NULL, *colorSpaceName = NULL, *renderingIntentName = NULL;
CFDataRef imageDataPtr = NULL;
CGImageRef cgImage;
//maskImage = NULL,
CGImageRef sourceImage = NULL;
CGDataProviderRef dataProvider;
CGColorRenderingIntent renderingIntent;
CGFloat *decodeValues = NULL;
UIImage *image;
if (myStream == NULL)
return nil;
dataStream = myStream;
dict = CGPDFStreamGetDictionary(dataStream);
// obtain the basic image information
if (!CGPDFDictionaryGetName(dict, "Subtype", &name))
return nil;
if (strcmp(name, "Image") != 0)
return nil;
if (!CGPDFDictionaryGetInteger(dict, "Width", &width))
return nil;
if (!CGPDFDictionaryGetInteger(dict, "Height", &height))
return nil;
if (!CGPDFDictionaryGetInteger(dict, "BitsPerComponent", &bps))
return nil;
if (!CGPDFDictionaryGetBoolean(dict, "Interpolate", &interpolation))
interpolation = NO;
if (!CGPDFDictionaryGetName(dict, "Intent", &renderingIntentName))
renderingIntent = kCGRenderingIntentDefault;
else{
renderingIntent = kCGRenderingIntentDefault;
// renderingIntent = renderingIntentFromName(renderingIntentName);
}
imageDataPtr = CGPDFStreamCopyData(dataStream, &format);
dataProvider = CGDataProviderCreateWithCFData(imageDataPtr);
CFRelease(imageDataPtr);
if (CGPDFDictionaryGetArray(dict, "ColorSpace", &colorSpaceArray)) {
cgColorSpace = CGColorSpaceCreateDeviceRGB();
// cgColorSpace = colorSpaceFromPDFArray(colorSpaceArray);
spp = CGColorSpaceGetNumberOfComponents(cgColorSpace);
} else if (CGPDFDictionaryGetName(dict, "ColorSpace", &colorSpaceName)) {
if (strcmp(colorSpaceName, "DeviceRGB") == 0) {
cgColorSpace = CGColorSpaceCreateDeviceRGB();
// CGColorSpaceCreateWithName(kCGColorSpaceGenericRGB);
spp = 3;
} else if (strcmp(colorSpaceName, "DeviceCMYK") == 0) {
cgColorSpace = CGColorSpaceCreateDeviceCMYK();
// CGColorSpaceCreateWithName(kCGColorSpaceGenericCMYK);
spp = 4;
} else if (strcmp(colorSpaceName, "DeviceGray") == 0) {
cgColorSpace = CGColorSpaceCreateDeviceGray();
// CGColorSpaceCreateWithName(kCGColorSpaceGenericGray);
spp = 1;
} else if (bps == 1) { // if there's no colorspace entry, there's still one we can infer from bps
cgColorSpace = CGColorSpaceCreateDeviceGray();
// colorSpace = NSDeviceBlackColorSpace;
spp = 1;
}
}
decodeValues = decodeValuesFromImageDictionary(dict, cgColorSpace, bps);
int rowBits = bps * spp * width;
int rowBytes = rowBits / 8;
// pdf image row lengths are padded to byte-alignment
if (rowBits % 8 != 0)
++rowBytes;
// maskImage = SMaskImageFromImageDictionary(dict);
if (format == CGPDFDataFormatRaw)
{
sourceImage = CGImageCreate(width, height, bps, bps * spp, rowBytes, cgColorSpace, 0, dataProvider, decodeValues, interpolation, renderingIntent);
CGDataProviderRelease(dataProvider);
cgImage = sourceImage;
// if (maskImage != NULL) {
// cgImage = CGImageCreateWithMask(sourceImage, maskImage);
// CGImageRelease(sourceImage);
// CGImageRelease(maskImage);
// } else {
// cgImage = sourceImage;
// }
} else {
if (format == CGPDFDataFormatJPEGEncoded){ // JPEG data requires a CGImage; AppKit can't decode it {
sourceImage =
CGImageCreateWithJPEGDataProvider(dataProvider,decodeValues,interpolation,renderingIntent);
CGDataProviderRelease(dataProvider);
cgImage = sourceImage;
// if (maskImage != NULL) {
// cgImage = CGImageCreateWithMask(sourceImage,maskImage);
// CGImageRelease(sourceImage);
// CGImageRelease(maskImage);
// } else {
// cgImage = sourceImage;
// }
}
// note that we could have handled JPEG with ImageIO as well
else if (format == CGPDFDataFormatJPEG2000) { // JPEG2000 requires ImageIO {
CFDictionaryRef dictionary = CFDictionaryCreate(NULL, NULL, NULL, 0, NULL, NULL);
sourceImage=
CGImageCreateWithJPEGDataProvider(dataProvider, decodeValues, interpolation, renderingIntent);
// CGImageSourceRef cgImageSource = CGImageSourceCreateWithDataProvider(dataProvider, dictionary);
CGDataProviderRelease(dataProvider);
cgImage=sourceImage;
// cgImage = CGImageSourceCreateImageAtIndex(cgImageSource, 0, dictionary);
CFRelease(dictionary);
} else // some format we don't know about or an error in the PDF
return nil;
}
image=[UIImage imageWithCGImage:cgImage];
return image;
}
@implementation DashBoard
// Implement viewDidLoad to do additional setup after loading the view, typically from a nib.
- (void)viewDidLoad {
[super viewDidLoad];
filePath=[[NSString alloc] initWithString:[[NSBundle mainBundle] pathForResource:@"per" ofType:@"pdf"]];
}
-(IBAction)btnTappedText:(id)sender{
if(arrImgs!=nil && [arrImgs retainCount]>0 ) { [arrImgs release]; arrImgs=nil; }
arrImgs=[[NSMutableArray alloc] init];
setRefImgs(arrImgs);
// if(nxtTxtDtlVCtr!=nil && [nxtTxtDtlVCtr retainCount]>0) { [nxtTxtDtlVCtr release]; nxtTxtDtlVCtr=nil; }
// nxtTxtDtlVCtr=[[TxtDtlVCtr alloc] initWithNibName:@"TxtDtlVCtr" bundle:nil];
// nxtTxtDtlVCtr.str=StringRef();
// [self.navigationController pushViewController:nxtTxtDtlVCtr animated:YES];
// 1. Open Document page
CGPDFDocumentRef document = MyGetPDFDocumentRef ([filePath UTF8String]);
int pgcnt = CGPDFDocumentGetNumberOfPages( document );
for( int i1 = 0; i1 < pgcnt; ++i1 ) {
CGPDFPageRef pg = CGPDFDocumentGetPage (document, i1+1);
if( !pg ) {
NSLog(@"Couldn't open page.");
} else {
// 2. get page dictionary
CGPDFDictionaryRef dict = CGPDFPageGetDictionary( pg );
if( !dict ) {
NSLog(@"Couldn't open page dictionary.");
} else {
// 3. get page contents stream
CGPDFStreamRef cont;
if( !CGPDFDictionaryGetStream( dict, "Contents", &cont ) ) {
NSLog(@"Couldn't open page stream.");
} else {
// 4. copy page contents steam
// CFDataRef contdata = CGPDFStreamCopyData( cont, NULL );
// 5. get the media array from stream
CGPDFArrayRef media;
if( !CGPDFDictionaryGetArray( dict, "MediaBox", &media ) ) {
NSLog(@"Couldn't open page Media.");
} else {
// 6. open media & get it's size
CGPDFInteger mediatop, medialeft;
CGPDFReal mediaright, mediabottom;
if( !CGPDFArrayGetInteger( media, 0, &mediatop ) || !CGPDFArrayGetInteger( media, 1, &medialeft ) || !CGPDFArrayGetNumber( media, 2, &mediaright ) || !CGPDFArrayGetNumber( media, 3, &mediabottom ) ) {
NSLog(@"Couldn't open page Media Box.");
} else {
// 7. set media size
// double mediawidth = mediaright - medialeft, mediaheight = mediabottom - mediatop;
// 8. get media resources
CGPDFDictionaryRef res;
if( !CGPDFDictionaryGetDictionary( dict, "Resources", &res ) ) {
NSLog(@"Couldn't Open Page Media Reopsources.");
} else {
// 9. get xObject from media resources
CGPDFDictionaryRef xobj;
if( !CGPDFDictionaryGetDictionary( res, "XObject", &xobj ) ) {
NSLog(@"Couldn't load page Xobjects.");
} else {
CGPDFDictionaryApplyFunction(xobj, pdfDictionaryFunction, NULL);
}
}
}
}
}
}
}
}
NSLog(@"Total images are - %i",[arrImgs count]);
if(nxtImgVCtr!=nil && [nxtImgVCtr retainCount]>0 ) { [nxtImgVCtr release]; nxtImgVCtr=nil; }
nxtImgVCtr=[[ImgVCtr alloc] initWithNibName:@"ImgVCtr" bundle:nil];
nxtImgVCtr.arrImg=arrImgs;
[self.navigationController pushViewController:nxtImgVCtr animated:YES];
}
Converting a CGPDFStreamRef to a UIImage doesn't really make conceptual sense; a PDF represents a document with potentially multiple pages and no obvious mapping to a bitmap image.
Even if you know your PDF contains only one page, you'll still need to do some parsing and finagling. There's a very short discussion of that here: http://lists.apple.com/archives/Cocoa-dev/2006/Sep/msg01407.html
//it completely fit in view in uiview,pdf pages are get images in view
CGPDFDocumentRef PDFfile;
CFURLRef pdfURL = CFBundleCopyResourceURL(CFBundleGetMainBundle(), CFSTR("iPhoneAppProgrammingGuide.pdf"), NULL, NULL);
PDFfile = CGPDFDocumentCreateWithURL((CFURLRef)pdfURL);
CFRelease(pdfURL);
CGPDFPageRef page = CGPDFDocumentGetPage(PDFfile,currentpage);
context = UIGraphicsGetCurrentContext();
CGContextSaveGState(context);
CGContextSetRGBFillColor(context, 1.0, 1.0, 1.0, 1.0);
CGContextFillRect(context,self.bounds);
CGContextTranslateCTM(context, -1.0, [self bounds].size.height);
CGContextScaleCTM(context, 1.0, -1.0);
CGContextConcatCTM(context, CGPDFPageGetDrawingTransform(page, kCGPDFArtBox, [self bounds], 0, true));
CGContextDrawPDFPage(context, page);
CGContextRestoreGState(context);
CGAffineTransform transform = aspectFit(CGPDFPageGetBoxRect(page, kCGPDFMediaBox),
CGContextGetClipBoundingBox(context));
CGContextConcatCTM(context, transform);
UIGraphicsBeginImageContext(CGSizeMake(self.bounds.size.width, self.bounds.size.height));
精彩评论