Remove images from PDF
I read Create a tiff with only text and no images from a postscript file with ghostscript and try to u开发者_运维技巧se KenS`s answer. But this method remove only "black" images - image contain data only in black channel (PDF has colorspace CMYK). How can i remove all images in my case?
This does a better job, but its incomplete. It doesn't deal with images using multiple data sources for example. Its essentially untested, except that I did test your smaller file (pages.pdf) by using ps2write to convert to PostScript and then the PostScript program below, and teh pdfwrite device, to convert back to PDF.
One of the first things you will notice is that almost all the text has vanished from your document. That's because the fonts you are using are bitmap fonts, and the program can't tell the difference between a bitmap representing a character, and any other kind of bitmap. For this file you can solve that by removing the definition of imagemask because all the characters use imagemask, and the other images use 'image'.
I have a sneaky suspicion the formatting of the program is going to get messed up here :-(
8<------------------------------8<--------------------------8<-------------------------
%!
%
% numbytes -file- ConsumeFileData -
%
/ConsumeFileData {
userdict begin
/DataString 256 string def
/DataFile exch def
/BytesToRead exch def
%(BytesToRead = ) print BytesToRead ==
mark
{
DataFile DataString readstring { % read bytes
/BytesToRead BytesToRead 256 sub def % not EOF subtract 256 from required amount.
%(Read 256 bytes) ==
%(BytesToRead now = ) print BytesToRead ==
} {
length
%(Read ) print dup 256 string cvs print (bytes) ==
BytesToRead exch sub /BytesToRead exch def % Reached EOF, subtract length read froom required amount
%(BytesToRead now = ) print BytesToRead ==
exit % and exit loop
} ifelse
} loop
%BytesToRead ==
BytesToRead 0 gt {
(Ran out of image data reading from DataSource\n) ==
} if
cleartomark
end
} bind def
%
% numbytes -proc- ConsumeProcData -
%
/ConsumeProcData {
userdict begin
/DataProc exch def
/BytesToRead exch def
{
DataProc exec % returns a string
length BytesToRead exch sub % subtract # bytes read
/BytesToRead exch def
BytesToRead 0 le {
exit % exit when read enough
} if
} loop
end
} bind def
/image {
(image) ==
dup type /dicttype eq {
dup /MultipleDataSources known {
dup /MultipleDataSources get {
(Can't handle image with multiple sources!) ==
} if
} if
dup /Width get % stack = -dict- width
exch dup /BitsPerComponent get % stack = width -dict- bpc
exch dup /Decode get % stack = width bpc -dict- decode
length 2 div % decode = 2 * num components
exch 4 1 roll % stack = -dict- width bpc ncomps
mul mul % stack = -dict- width*bpc*ncomps
7 add cvi 8 idiv % stack = -dict- width(bytes)
exch dup /Height get % stack = width -dict- height
exch /DataSource get % stack = width height DataSource
3 1 roll % stack = DataSource width height
mul % stack = DataSource widht*height
exch % stack = size DataSource
} {
5 -1 roll
pop % throw away matrix
mul mul % bits/sample*width*height
7 add cvi 8 idiv % size in bytes of data floor(bits+7 / 8)
exch % stack = size DataSource
} ifelse
dup type /filetype eq {
ConsumeFileData
} {
dup type /arraytype eq or
1 index type /packedarraytype eq or {
ConsumeProcData
} {
pop pop % Remove DataSource and size
} ifelse
} ifelse
} bind def
/imagemask {
(imagemask)==
dup type /dicttype eq {
dup /MultipleDataSources known {
dup /MultipleDataSources get {
(Can't handle imagemask with multiple sources!) ==
} if
} if
dup /Width get % stack = -dict- width
7 add cvi 8 idiv % size in bytes of width floor(bits+7 / 8)
exch dup /Height get % stack = width -dict- height
exch /DataSource get % stack = width height DataSource
3 1 roll % stack = DataSource width height
mul % stack = DataSource width*height
exch % stack = size DataSource
} {
5 -1 roll
pop % throw away matrix
mul mul % bits/sample*width*height
7 add cvi 8 idiv % size in bytes of data floor(bits+7 / 8)
exch % stack = size DataSource
} ifelse
dup type /filetype eq {
ConsumeFileData
} {
dup type /arraytype eq or
1 index type /packedarraytype eq or {
ConsumeProcData
} {
pop pop % Remove DataSource and size
} ifelse
} ifelse
} bind def
/colorimage {
(colorimage)==
dup 1 ne {
1 index
{
(Can't handle colorimage with multiple sources!) ==
} if
} {
exch pop % get rid of 'multi'
% stack: w h bpc m d ncomp
3 -1 roll pop % stack: w h bpc d ncomp
exch 5 -1 roll % stack d w h bpc ncomp
mul mul mul % stack: d w*h*bpc*ncomp
7 add cvi 8 idiv exch % stack: bytes datasource
} ifelse
dup type /filetype eq {
ConsumeFileData
} {
dup type /arraytype eq or
1 index type /packedarraytype eq or {
ConsumeProcData
} {
pop pop % Remove DataSource and size
} ifelse
} ifelse
} bind def
That technique should work for images in any colour, because the image operator is used for both colour and monochrome images. Unless your file uses the obselete level 1.5 'colorimage' operator. I can't recall if I redefined that operator in the example, if not then yuo can redefine it in a similar fashion.
In fact I see that I offered redefinitions for image, colorimage and imagemask, so all image types should be elided. Perhaps you could share an example ?
精彩评论