How to Grab PDF image elements using c#
I wanted to know how to grab an image element in a PDF file using c#. I know how to add the elements to a PDF file I just need to know how to access the image elements.
I'm a开发者_运维问答lso using iTextSharp.
I believe you can do this with itextsharp.
I had this code stored on my machine, but never used it. I got it off a forum and it's not tested, but I'm sure you could make it work.
using iTextSharp.text;
using iTextSharp.text.pdf;
#region ExtractImagesFromPDF
public static void ExtractImagesFromPDF(string sourcePdf, string outputPath)
{
// NOTE: This will only get the first image it finds per page.
PdfReader pdf = new PdfReader(sourcePdf);
RandomAccessFileOrArray raf = new iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdf);
try
{
for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++)
{
PdfDictionary pg = pdf.GetPageN(pageNumber);
PdfDictionary res =
(PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
PdfDictionary xobj =
(PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
if (xobj != null)
{
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (obj.IsIndirect())
{
PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
PdfName type =
(PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));
if (PdfName.IMAGE.Equals(type))
{
int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
PdfObject pdfObj = pdf.GetPdfObject(XrefIndex);
PdfStream pdfStrem = (PdfStream)pdfObj;
byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem);
if ((bytes != null))
{
using (System.IO.MemoryStream memStream = new System.IO.MemoryStream(bytes))
{
memStream.Position = 0;
System.Drawing.Image img = System.Drawing.Image.FromStream(memStream);
// must save the file while stream is open.
if (!Directory.Exists(outputPath))
Directory.CreateDirectory(outputPath);
string path = Path.Combine(outputPath, String.Format(@"{0}.jpg", pageNumber));
System.Drawing.Imaging.EncoderParameters parms = new System.Drawing.Imaging.EncoderParameters(1);
parms.Param[0] = new System.Drawing.Imaging.EncoderParameter(System.Drawing.Imaging.Encoder.Compression, 0);
// GetImageEncoder is found below this method
System.Drawing.Imaging.ImageCodecInfo jpegEncoder = GetImageEncoder("JPEG");
img.Save(path, jpegEncoder, parms);
break;
}
}
}
}
}
}
}
}
catch
{
throw;
}
finally
{
pdf.Close();
}
}
#endregion
#region GetImageEncoder
public static System.Drawing.Imaging.ImageCodecInfo GetImageEncoder(string imageType)
{
imageType = imageType.ToUpperInvariant();
foreach (ImageCodecInfo info in ImageCodecInfo.GetImageEncoders())
{
if (info.FormatDescription == imageType)
{
return info;
}
}
return null;
}
#endregion
Bear in mind the image may not exist as an image but as a set of blobs (ie raw data, colorspace data, ICC profile or colorspace) which you will need to put together. The raw image may also be manipulated in the display (ie scaled, rotated, inverted, masked, clipped).
精彩评论