Sophie

Sophie

distrib > Mageia > 5 > x86_64 > media > core-release > by-pkgid > a94aa4b565ade94d0310c6e77aeea765 > files > 47

ocaml-camlpdf-devel-1.7.1-3.mga5.x86_64.rpm

(* Make a PDF suitable for draft printing by replacing its images by crossed
boxes. Usage: pdfdraft input.pdf output.pdf *)
open Pdfutil

(* Predicate on an xobject: true if an image xobject. *)
let isimage pdf (_, xobj) =
  Pdf.lookup_direct pdf "/Subtype" xobj = Some (Pdf.Name "/Image")

(* Given a set of resources for a page, and the name of a resource, determine if
that name refers to an image xobject. *)
let xobject_isimage pdf resources name =
  match resources with
  | Pdf.Dictionary _ ->
      begin match Pdf.lookup_direct pdf "/XObject" resources with
      | Some xobjects ->
          isimage pdf ("", Pdf.lookup_fail "xobject not there" pdf name xobjects)
      | _ -> false
      end
  | _ -> failwith "bad resources"

(* Remove any image xobjects from a set of resources. *)
let remove_image_xobjects pdf resources =
  match resources with
  | Pdf.Dictionary res ->
      begin match Pdf.lookup_direct pdf "/XObject" resources with
      | Some (Pdf.Dictionary xobjects) ->
          Pdf.Dictionary
            (replace "/XObject" (Pdf.Dictionary (lose (isimage pdf) xobjects)) res)
      | _ -> resources
      end
  | _ -> failwith "bad resources"

(* The subsitute for an image. *)
let substitute =
  rev
    [Pdfops.Op_q;
     Pdfops.Op_w 0.;
     Pdfops.Op_G 0.;
     Pdfops.Op_re (0., 0., 1., 1.);
     Pdfops.Op_m (0., 0.);
     Pdfops.Op_l (1., 1.);
     Pdfops.Op_m (0., 1.);
     Pdfops.Op_l (1., 0.);
     Pdfops.Op_S;
     Pdfops.Op_Q]

(* Remove references to images from a graphics stream. *)
let rec remove_images_stream pdf resources prev = function
  | [] -> rev prev
  | (Pdfops.Op_Do name) as h::t ->
      if xobject_isimage pdf resources name
        then remove_images_stream pdf resources (substitute @ prev) t
        else remove_images_stream pdf resources (h::prev) t
  | Pdfops.InlineImage _::t ->
      remove_images_stream pdf resources (substitute @ prev) t
  | h::t ->
      remove_images_stream pdf resources (h::prev) t

(* Remove images from a page. *)
let remove_images_page pdf page =
  let content' =
    remove_images_stream pdf page.Pdfpage.resources []
       (Pdfops.parse_operators pdf page.Pdfpage.resources page.Pdfpage.content)
  in
    {page with
      Pdfpage.content =
        (let stream = Pdfops.stream_of_ops content' in
          Pdfcodec.encode_pdfstream pdf Pdfcodec.Flate stream;
          [stream]);
      Pdfpage.resources =
        remove_image_xobjects pdf page.Pdfpage.resources}

(* Remove images from all pages in a document. *)
let remove_images pdf =
  let pages = Pdfpage.pages_of_pagetree pdf in
    let pages' = map (remove_images_page pdf) pages in
      let pdf, pagetree_num = Pdfpage.add_pagetree pages' pdf in
        let pdf = Pdfpage.add_root pagetree_num [] pdf in
          Pdf.remove_unreferenced pdf;
          pdf

(* Read command line arguments and call [remove_images] *)
let _ =
  match Array.to_list Sys.argv with
  | [_; in_file; out_file] ->
      begin try
        let ch = open_in_bin in_file in
          let pdf = Pdfread.pdf_of_channel None None ch in
            Pdfwrite.pdf_to_file (remove_images pdf) out_file;
            close_in ch
      with
        err ->
          Printf.printf "Failed to produce output.\n%s\n\n" (Printexc.to_string err);
          exit 1
      end
  | _ ->
      print_string "Syntax: pdfdraft <input> <output>\n\n"; exit 1