| V | = | version |
| R | = | revision |
| Length | = | params[:KeyLength] |
| P | = | params[:Permissions] |
| EncryptMetadata | = | params[:EncryptMetadata] |
| CF | = | Dictionary.new |
| AuthEvent | = | :DocOpen |
| CFM | = | :AESV2 |
| Length | = | 16 |
| StmF | = | handler.StrF = :StdCF |
| ID | = | [ id, id ] |
| Rect | = | Rectangle[:llx => 0.0, :lly => 0.0, :urx => 0.0, :ury => 0.0] |
| V | = | digsig ; |
| SigFlags | = | InteractiveForm::SigFlags::SIGNATURESEXIST | InteractiveForm::SigFlags::APPENDONLY |
| Location | = | HexaString.new(location) if location |
| ContactInfo | = | HexaString.new(contact) if contact |
| Reason | = | HexaString.new(reason) if reason |
| Data | = | self.Catalog |
| TransformParams | = | UsageRights::TransformParams.new |
| V | = | UsageRights::TransformParams::VERSION |
| Reference | = | [ sigref ] |
| UR3 | = | digsig |
| Prev | = | prev_xref_offset |
| Size | = | objset.size + 1 |
| Prev | = | prev_xref_offset |
| XRefStm | = | xrefstm_offset if options[:use_xrefstm] == true |
| Size | = | size + 1 |
| Root | = | root |
| Pages | = | PageTreeNode.new.set_indirect(true) |
| Root | = | catalog.reference |
| Size | = | size + 1 |
| Root | = | self << cat |
| OpenAction | = | action |
| WC | = | action |
| WP | = | action |
| Names | = | Names.new |
| Count | = | treeroot.Kids.length |
| Parent | = | treeroot |
| get_object | -> | [] |
| filename | [RW] | |
| header | [RW] | |
| revisions | [RW] |
| init_structure: | If this flag is set, then some structures will be automatically generated while manipulating this PDF. Set it if you are creating a new PDF file, this must not be used when parsing an existing file. |
# File sources/parser/pdf.rb, line 164
164: def initialize(init_structure = true)
165:
166: @header = PDF::Header.new
167: @revisions = []
168:
169: add_new_revision
170:
171: @revisions.first.trailer = Trailer.new
172:
173: init if init_structure
174: end
Adds a new object to the PDF file. If this object has no version number, then a new one will be automatically computed and assignated to him. It returns a Reference to this Object.
| object: | The object to add. |
# File sources/parser/pdf.rb, line 395
395: def <<(object)
396:
397: add_to_revision(object, @revisions.last)
398:
399: end
Returns the current Catalog Dictionary.
# File sources/parser/catalog.rb, line 33
33: def Catalog
34: get_doc_attr(:Root)
35: end
Sets the current Catalog Dictionary.
# File sources/parser/catalog.rb, line 40
40: def Catalog=(cat)
41:
42: unless cat.is_a?(Catalog)
43: raise TypeError, "Expected type Catalog, received #{cat.class}"
44: end
45:
46: if @revisions.last.trailer.Root
47: delete_object(@revisions.last.trailer.Root)
48: end
49:
50: @revisions.last.trailer.Root = self << cat
51: end
Add a field to the Acrobat form.
| field: | The Field to add. |
# File sources/parser/acroform.rb, line 41
41: def add_field(field)
42:
43: if field.is_a?(::Array)
44: raise TypeError, "Expected array of Fields" unless field.all? { |f| f.is_a?(Field) }
45: elsif not field.is_a?(Field)
46: raise TypeError, "Expected Field, received #{field.class}"
47: end
48:
49: fields = field.is_a?(Field) ? [field] : field
50:
51: self.Catalog.AcroForm ||= InteractiveForm.new
52: self.Catalog.AcroForm.Fields ||= []
53:
54: self.Catalog.AcroForm.Fields.concat(fields)
55:
56: self
57: end
Ends the current Revision, and starts a new one.
# File sources/parser/pdf.rb, line 652
652: def add_new_revision
653:
654: root = @revisions.last.trailer[:Root] unless @revisions.empty?
655:
656: @revisions << Revision.new(self)
657: @revisions.last.trailer = Trailer.new
658: @revisions.last.trailer.Root = root
659:
660: self
661: end
Adds a new object to a specific revision. If this object has no version number, then a new one will be automatically computed and assignated to him. It returns a Reference to this Object.
| object: | The object to add. |
| revision: | The revision to add the object to. |
# File sources/parser/pdf.rb, line 408
408: def add_to_revision(object, revision)
409:
410: object.set_indirect(true)
411: object.set_pdf(self)
412:
413: object.no, object.generation = alloc_new_object_number if object.no == 0
414:
415: revision.body[object.reference] = object
416:
417: object.reference
418: end
# File sources/parser/page.rb, line 26
26: def append_page(page = Page.new, *more)
27:
28: pages = [ page ].concat(more)
29:
30: fail "Expecting Page type, instead of #{page.class}" unless pages.all?{|page| page.is_a?(Page)}
31:
32: treeroot = self.Catalog.Pages
33:
34: treeroot.Kids ||= [] #:nodoc:
35: treeroot.Kids.concat(pages)
36: treeroot.Count = treeroot.Kids.length
37:
38: pages.each do |page|
39: page.Parent = treeroot
40: end
41:
42: self
43: end
# File sources/parser/pdf.rb, line 342
342: def append_subobj(root, objset, inc_objstm)
343:
344: if objset.find{ |o| root.equal?(o) }.nil?
345:
346: objset << root
347:
348: if root.is_a?(Dictionary)
349: root.each_pair { |name, value|
350: append_subobj(name, objset, inc_objstm)
351: append_subobj(value, objset, inc_objstm)
352: }
353: elsif root.is_a?(Array) or (root.is_a?(ObjectStream) and inc_objstm == true)
354: root.each { |subobj| append_subobj(subobj, objset, inc_objstm) }
355: end
356:
357: end
358:
359: end
Attachs an embedded file to the PDF.
| path: | The path to the file to attach. |
| options: | A set of options to configure the attachment. |
# File sources/parser/file.rb, line 35
35: def attach_file(path, options = {})
36:
37: #
38: # Default options.
39: #
40: params =
41: {
42: :Register => true, # Shall the file be registered in the name directory ?
43: :EmbeddedName => File.basename(path), # The inner filename of the attachment.
44: :Filter => :FlateDecode # The stream filter used to store data.
45: }
46:
47: params.update(options)
48:
49: fdata = File.open(path, "r").binmode.read
50:
51: fstream = EmbeddedFileStream.new
52: fstream.data = fdata
53: fstream.setFilter(params[:Filter])
54:
55: name = params[:EmbeddedName]
56: fspec = FileSpec.new.setType(:Filespec).setF(name).setEF(FileSpec.new(:F => fstream))
57:
58: register(Names::Root::EMBEDDEDFILES, name, fspec) if params[:Register] == true
59:
60: fspec
61: end
This method is meant to recompute, verify and correct main PDF structures, in order to output a proper file.
# File sources/parser/pdf.rb, line 436
436: def compile
437:
438: #
439: # A valid document must have at least one page.
440: #
441: append_page if pages.empty?
442:
443: #
444: # Allocates object numbers and creates references.
445: # Invokes object finalization methods.
446: #
447: physicalize
448:
449: #
450: # Sets the PDF version header.
451: #
452: pdf_version = version_required
453: @header.majorversion = pdf_version.to_s[0,1].to_i
454: @header.minorversion = pdf_version.to_s[2,1].to_i
455:
456: self
457: end
Decrypts the current document (only RC4 40..128 bits). TODO: AESv2, AESv3, lazy decryption
| passwd: | The password to decrypt the document. |
# File sources/parser/encryption.rb, line 54
54: def decrypt(passwd = "")
55:
56: unless self.is_encrypted?
57: raise EncryptionError, "PDF is not encrypted"
58: end
59:
60: encrypt_dict = get_doc_attr(:Encrypt)
61: handler = Encryption::Standard::Dictionary.new(encrypt_dict.copy)
62:
63: unless handler.Filter == :Standard
64: raise EncryptionNotSupportedError, "Unknown security handler : '#{handler.Filter.to_s}'"
65: end
66:
67: algorithm =
68: case handler.V
69: when 1,2 then Encryption::ARC4
70: when 4 then Encryption::AES
71: else
72: raise EncryptionNotSupportedError, "Unsupported encryption version : #{handler.V}"
73: end
74:
75: id = get_doc_attr(:ID)
76: if id.nil? or not id.is_a?(Array)
77: raise EncryptionError, "Document ID was not found or is invalid"
78: else
79: id = id.first
80: end
81:
82: if not handler.is_owner_password?(passwd, id) and not handler.is_user_password?(passwd, id)
83: raise EncryptionInvalidPasswordError
84: end
85:
86: encryption_key = handler.compute_encryption_key(passwd, id)
87:
88: #self.extend(Encryption::EncryptedDocument)
89: #self.encryption_dict = encrypt_dict
90: #self.encryption_key = encryption_key
91: #self.stm_algo = self.str_algo = algorithm
92:
93: #
94: # Should be fixed to exclude only the active XRefStream
95: #
96: encrypted_objects = self.objects(false).find_all{ |obj|
97: (obj.is_a?(String) and not obj.indirect_parent.is_a?(XRefStream) and not obj.equal?(encrypt_dict[:U]) and not obj.equal?(encrypt_dict[:O])) or
98: (obj.is_a?(Stream) and not obj.is_a?(XRefStream))
99: }
100:
101: encrypted_objects.each { |obj|
102: no = obj.indirect_parent.no
103: gen = obj.indirect_parent.generation
104:
105: k = encryption_key + [no].pack("I")[0..2] + [gen].pack("I")[0..1]
106: key_len = (k.length > 16) ? 16 : k.length
107:
108: k << "sAlT" if algorithm == Encryption::AES
109:
110: key = Digest::MD5.digest(k)[0, key_len]
111:
112: case obj
113: when String then obj.replace(algorithm.decrypt(key, obj.value))
114: when Stream then obj.rawdata = algorithm.decrypt(key, obj.rawdata)
115: end
116: }
117:
118: self
119: end
Enable the document Usage Rights.
| rights: | list of rights defined in UsageRights::Rights |
# File sources/parser/signature.rb, line 130
130: def enable_usage_rights(*rights)
131:
132: def signfield_size(certificate, key, ca = []) #:nodoc:
133: datatest = "abcdefghijklmnopqrstuvwxyz"
134: OpenSSL::PKCS7.sign(certificate, key, datatest, ca, OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der.size + 128
135: end
136:
137: begin
138: key = OpenSSL::PKey::RSA.new(File.open('adobe.key','r').binmode.read)
139: certificate = OpenSSL::X509::Certificate.new(File.open('adobe.crt','r').binmode.read)
140: rescue
141: warn "The Adobe private key is necessary to enable usage rights.\nYou do not seem to be Adobe :)... Aborting."
142: return nil
143: end
144:
145: digsig = Signature::DigitalSignature.new.set_indirect(true)
146:
147: self.Catalog.AcroForm ||= InteractiveForm.new
148: #self.Catalog.AcroForm.SigFlags = InteractiveForm::SigFlags::APPENDONLY
149:
150: digsig.Type = :Sig #:nodoc:
151: digsig.Contents = HexaString.new("\x00" * signfield_size(certificate, key, [])) #:nodoc:
152: digsig.Filter = Name.new("Adobe.PPKLite") #:nodoc:
153: digsig.Name = "ARE Acrobat Product v8.0 P23 0002337" #:nodoc:
154: digsig.SubFilter = Name.new("adbe.pkcs7.detached") #:nodoc:
155: digsig.ByteRange = [0, 0, 0, 0] #:nodoc:
156:
157: sigref = Signature::Reference.new #:nodoc:
158: sigref.Type = :SigRef #:nodoc:
159: sigref.TransformMethod = :UR3 #:nodoc:
160: sigref.Data = self.Catalog
161:
162: sigref.TransformParams = UsageRights::TransformParams.new
163: sigref.TransformParams.P = true #:nodoc:
164: sigref.TransformParams.Type = :TransformParams #:nodoc:
165: sigref.TransformParams.V = UsageRights::TransformParams::VERSION
166:
167: rights.each { |right|
168:
169: sigref.TransformParams[right.first] ||= []
170: sigref.TransformParams[right.first].concat(right[1..-1])
171:
172: }
173:
174: digsig.Reference = [ sigref ]
175:
176: self.Catalog.Perms ||= Perms.new
177: self.Catalog.Perms.UR3 = digsig
178:
179: #
180: # Flattening the PDF to get file view.
181: #
182: self.compile
183:
184: #
185: # Creating an empty Xref table to compute signature byte range.
186: #
187: rebuild_dummy_xrefs
188:
189: sigoffset = get_object_offset(digsig.no, digsig.generation) + digsig.sigOffset
190:
191: digsig.ByteRange[0] = 0
192: digsig.ByteRange[1] = sigoffset
193: digsig.ByteRange[2] = sigoffset + digsig.Contents.size
194:
195: digsig.ByteRange[3] = filesize - digsig.ByteRange[2] until digsig.ByteRange[3] == filesize - digsig.ByteRange[2]
196:
197: # From that point the file size remains constant
198:
199: #
200: # Correct Xrefs variations caused by ByteRange modifications.
201: #
202: rebuildxrefs
203:
204: filedata = self.to_bin
205: signable_data = filedata[digsig.ByteRange[0],digsig.ByteRange[1]] + filedata[digsig.ByteRange[2],digsig.ByteRange[3]]
206:
207: signature = OpenSSL::PKCS7.sign(certificate, key, signable_data, [], OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der
208: digsig.Contents[0, signature.size] = signature
209:
210: #
211: # No more modification are allowed after signing.
212: #
213: self.freeze
214:
215: end
Encrypts the current document with the provided passwords. The document will be encrypted at writing-on-disk time.
| userpasswd: | The user password. |
| ownerpasswd: | The owner password. |
| options: | A set of options to configure encryption. |
# File sources/parser/encryption.rb, line 128
128: def encrypt(userpasswd, ownerpasswd, options = {})
129:
130: if self.is_encrypted?
131: raise EncryptionError, "PDF is already encrypted"
132: end
133:
134: #
135: # Default encryption options.
136: #
137: params =
138: {
139: :Algorithm => :RC4, # :RC4 or :AES
140: :KeyLength => 128, # Key size in bits
141: :EncryptMetadata => true, # Metadata shall be encrypted?
142: :Permissions => Encryption::Standard::Permissions::ALL # Document permissions
143: }
144:
145: params.update(options)
146:
147: case params[:Algorithm]
148: when :RC4
149: algorithm = Encryption::ARC4
150: if (40..128) === params[:KeyLength] and params[:KeyLength] % 8 == 0
151: if params[:KeyLength] > 40
152: version = 2
153: revision = 3
154: else
155: version = 1
156: revision = 2
157: end
158: else
159: raise EncryptionError, "Invalid key length"
160: end
161: when :AES
162: algorithm = Encryption::AES
163: if params[:KeyLength] == 128
164: version = revision = 4
165: else
166: raise EncryptionError, "Invalid key length"
167: end
168: else
169: raise EncryptionNotSupportedError, "Algorithm not supported : #{params[:Algorithm]}"
170: end
171:
172: id = (get_doc_attr(:ID) || gen_id).first
173:
174: handler = Encryption::Standard::Dictionary.new
175: handler.Filter = :Standard #:nodoc:
176: handler.V = version
177: handler.R = revision
178: handler.Length = params[:KeyLength]
179: handler.P = params[:Permissions]
180:
181: if revision == 4
182: handler.EncryptMetadata = params[:EncryptMetadata]
183: handler.CF = Dictionary.new
184: cryptfilter = Encryption::CryptFilterDictionary.new
185: cryptfilter.AuthEvent = :DocOpen
186: cryptfilter.CFM = :AESV2
187: cryptfilter.Length = 16
188:
189: handler.CF[:StdCF] = cryptfilter
190: handler.StmF = handler.StrF = :StdCF
191: end
192:
193: handler.set_owner_password(userpasswd, ownerpasswd)
194: handler.set_user_password(userpasswd, id)
195:
196: encryption_key = handler.compute_encryption_key(userpasswd, id)
197:
198: fileInfo = get_trailer_info
199: fileInfo[:Encrypt] = self << handler
200:
201: self.extend(Encryption::EncryptedDocument)
202: self.encryption_dict = handler
203: self.encryption_key = encryption_key
204: self.stm_algo = self.str_algo = algorithm
205:
206: self
207: end
Exports the document to a dot Graphiz file.
| filename: | The path where to save the file. |
# File sources/parser/export.rb, line 34
34: def export_to_graph(filename)
35:
36: def appearance(object) #:nodoc:
37:
38: label = object.type.to_s
39: case object
40: when Catalog
41: fontcolor = "red"
42: color = "mistyrose"
43: shape = "doublecircle"
44: when Name, Number
45: label = object.value
46: fontcolor = "orange"
47: color = "lightgoldenrodyellow"
48: shape = "polygon"
49: when String
50: label = object.value unless (object.is_binary_data? or object.length > 50)
51: fontcolor = "red"
52: color = "white"
53: shape = "polygon"
54: when Array
55: fontcolor = "green"
56: color = "lightcyan"
57: shape = "ellipse"
58: else
59: fontcolor = "blue"
60: color = "aliceblue"
61: shape = "ellipse"
62: end
63:
64: { :label => label, :fontcolor => fontcolor, :color => color, :shape => shape }
65: end
66:
67: def add_edges(pdf, fd, object) #:nodoc:
68:
69: if object.is_a?(Array) or object.is_a?(ObjectStream)
70:
71: object.each { |subobj|
72:
73: if subobj.is_a?(Reference) then subobj = pdf.indirect_objects[subobj] end
74:
75: unless subobj.nil?
76: fd << "\t#{object.object_id} -> #{subobj.object_id}\n"
77: end
78: }
79:
80: elsif object.is_a?(Dictionary)
81:
82: object.each_pair { |name, subobj|
83:
84: if subobj.is_a?(Reference) then subobj = pdf.indirect_objects[subobj] end
85:
86: unless subobj.nil?
87: fd << "\t#{object.object_id} -> #{subobj.object_id} [label=\"#{name.value}\",fontsize=7];\n"
88: end
89: }
90:
91: end
92:
93: if object.is_a?(Stream)
94:
95: object.dictionary.each_pair { |key, value|
96:
97: if value.is_a?(Reference) then value = pdf.indirect_objects[subobj] end
98:
99: unless value.nil?
100: fd << "\t#{object.object_id} -> #{value.object_id} [label=\"#{key.value}\",fontsize=7];\n"
101: end
102: }
103:
104: end
105:
106: end
107:
108: graphname = "PDF" if graphname.nil? or graphname.empty?
109:
110: fd = File.open(filename, "w")
111:
112: begin
113:
114: fd << "digraph #{graphname} {\n\n"
115:
116: objects = self.objects(true).find_all{ |obj| not obj.is_a?(Reference) }
117:
118: objects.each { |object|
119:
120: attr = appearance(object)
121:
122: fd << "\t#{object.object_id} [label=\"#{attr[:label]}\",shape=#{attr[:shape]},color=#{attr[:color]},style=filled,fontcolor=#{attr[:fontcolor]}];\n"
123:
124: if object.is_a?(Stream)
125:
126: object.dictionary.each { |value|
127:
128: unless value.is_a?(Reference)
129: attr = appearance(value)
130: fd << "\t#{value.object_id} [label=\"#{attr[:label]}\",shape=#{attr[:shape]},color=#{attr[:color]},style=filled,fontcolor=#{attr[:fontcolor]}];\n"
131: end
132:
133: }
134:
135: end
136:
137: add_edges(self, fd, object)
138:
139: }
140:
141: fd << "\n}"
142:
143: ensure
144: fd.close
145: end
146:
147: end
Exports the document to a GraphML file.
| filename: | The path where to save the file. |
# File sources/parser/export.rb, line 153
153: def export_to_graphml(filename)
154:
155: def declare_node(id, attr) #:nodoc:
156: " <node id=\"#{id}\">\n" <<
157: " <data key=\"d0\">\n" <<
158: " <y:ShapeNode>\n" <<
159: " <y:NodeLabel>#{attr[:label]}</y:NodeLabel>\n" <<
160: #~ " <y:Shape type=\"#{attr[:shape]}\"/>\n" <<
161: " </y:ShapeNode>\n" <<
162: " </data>\n" <<
163: " </node>\n"
164: end
165:
166: def declare_edge(id, src, dest, label = nil) #:nodoc:
167: " <edge id=\"#{id}\" source=\"#{src}\" target=\"#{dest}\">\n" <<
168: " <data key=\"d1\">\n" <<
169: " <y:PolyLineEdge>\n" <<
170: " <y:LineStyle type=\"line\" width=\"1.0\" color=\"#000000\"/>\n" <<
171: " <y:Arrows source=\"none\" target=\"standard\"/>\n" <<
172: " <y:EdgeLabel>#{label.to_s}</y:EdgeLabel>\n" <<
173: " </y:PolyLineEdge>\n" <<
174: " </data>\n" <<
175: " </edge>\n"
176: end
177:
178: def appearance(object) #:nodoc:
179:
180: label = object.type.to_s
181: case object
182: when Catalog
183: fontcolor = "red"
184: color = "mistyrose"
185: shape = "doublecircle"
186: when Name, Number
187: label = object.value
188: fontcolor = "orange"
189: color = "lightgoldenrodyellow"
190: shape = "polygon"
191: when String
192: label = object.value unless (object.is_binary_data? or object.length > 50)
193: fontcolor = "red"
194: color = "white"
195: shape = "polygon"
196: when Array
197: fontcolor = "green"
198: color = "lightcyan"
199: shape = "ellipse"
200: else
201: fontcolor = "blue"
202: color = "aliceblue"
203: shape = "ellipse"
204: end
205:
206: { :label => label, :fontcolor => fontcolor, :color => color, :shape => shape }
207: end
208:
209: def add_edges(pdf, fd, object, id) #:nodoc:
210:
211: if object.is_a?(Array) or object.is_a?(ObjectStream)
212:
213: object.each { |subobj|
214:
215: if subobj.is_a?(Reference) then subobj = pdf.indirect_objects[subobj] end
216:
217: unless subobj.nil?
218: fd << declare_edge("e#{id}", "n#{object.object_id}", "n#{subobj.object_id}")
219: id = id + 1
220: end
221: }
222:
223: elsif object.is_a?(Dictionary)
224:
225: object.each_pair { |name, subobj|
226:
227: if subobj.is_a?(Reference) then subobj = pdf.indirect_objects[subobj] end
228:
229: unless subobj.nil?
230: fd << declare_edge("e#{id}", "n#{object.object_id}", "n#{subobj.object_id}", name.value)
231: id = id + 1
232: end
233: }
234:
235: end
236:
237: if object.is_a?(Stream)
238:
239: object.dictionary.each_pair { |key, value|
240:
241: if value.is_a?(Reference) then value = pdf.indirect_objects[subobj] end
242:
243: unless value.nil?
244: fd << declare_edge("e#{id}", "n#{object.object_id}", "n#{value.object_id}", key.value)
245: id = id + 1
246: end
247: }
248:
249: end
250:
251: id
252: end
253:
254: @@edge_nb = 1
255:
256: graphname = "PDF" if graphname.nil? or graphname.empty?
257:
258: fd = File.open(filename, "w")
259:
260: edge_nb = 1
261: begin
262:
263: fd << '<?xml version="1.0" encoding="UTF-8"?>' << "\n"
264: fd << '<graphml xmlns="http://graphml.graphdrawing.org/xmlns/graphml"' << "\n"
265: fd << ' xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' << "\n"
266: fd << ' xsi:schemaLocation="http://graphml.graphdrawing.org/xmlns/graphml ' << "\n"
267: fd << ' http://www.yworks.com/xml/schema/graphml/1.0/ygraphml.xsd"' << "\n"
268: fd << ' xmlns:y="http://www.yworks.com/xml/graphml">' << "\n"
269: fd << '<key id="d0" for="node" yfiles.type="nodegraphics"/>' << "\n"
270: fd << '<key id="d1" for="edge" yfiles.type="edgegraphics"/>' << "\n"
271: fd << "<graph id=\"#{graphname}\" edgedefault=\"directed\">\n"
272:
273: objects = self.objects(true).find_all{ |obj| not obj.is_a?(Reference) }
274:
275: objects.each { |object|
276:
277: fd << declare_node("n#{object.object_id}", appearance(object))
278:
279: if object.is_a?(Stream)
280:
281: object.dictionary.each { |value|
282:
283: unless value.is_a?(Reference)
284: fd << declare_node(value.object_id, appearance(value))
285: end
286: }
287: end
288:
289: edge_nb = add_edges(self, fd, object, edge_nb)
290: }
291:
292: fd << '</graph>' << "\n"
293: fd << '</graphml>'
294:
295: ensure
296: fd.close
297: end
298:
299: end
Returns the virtual file size as it would be taking on disk.
# File sources/parser/pdf.rb, line 191
191: def filesize
192: self.to_bin(:rebuildxrefs => false).size
193: end
Returns an array of objects matching specified block.
# File sources/parser/pdf.rb, line 321
321: def find(params = {}, &b)
322:
323: options =
324: {
325: :only_indirect => false
326: }
327: options.update(params)
328:
329: objset = (options[:only_indirect] == true) ?
330: self.indirect_objects.values : self.objects
331:
332: objset.find_all(&b)
333: end
Returns the document information dictionary if present.
# File sources/parser/metadata.rb, line 49
49: def get_document_info
50: get_doc_attr :Info
51: end
Returns a Hash of the information found in the metadata stream
# File sources/parser/metadata.rb, line 56
56: def get_metadata
57: metadata_stm = self.Catalog.Metadata
58:
59: if metadata_stm.is_a?(Stream)
60: doc = REXML::Document.new(metadata_stm.data)
61:
62: info = {}
63: doc.elements.each("*/*/rdf:Description/*") do |element|
64: info[element.name] = element.text
65: end
66:
67: return info
68: end
69: end
Returns an array of Objects whose content is matching pattern.
# File sources/parser/pdf.rb, line 267
267: def grep(*patterns)
268:
269: patterns.map! do |pattern|
270: pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
271: end
272:
273: unless patterns.all? { |pattern| pattern.is_a?(Regexp) }
274: raise TypeError, "Expected a String or Regexp"
275: end
276:
277: result = []
278: objects.each do |obj|
279: case obj
280: when String, Name
281: result << obj if patterns.any?{|pattern| obj.value.to_s.match(pattern)}
282: when Stream
283: result << obj if patterns.any?{|pattern| obj.data.match(pattern)}
284: end
285: end
286:
287: result
288: end
Returns true if the document has a document information dictionary.
# File sources/parser/metadata.rb, line 35
35: def has_document_info?
36: has_attr? :Info
37: end
Returns true if the document contains an acrobat form.
# File sources/parser/acroform.rb, line 33
33: def has_form?
34: not self.Catalog.nil? and not self.Catalog.AcroForm.nil?
35: end
Returns true if the document has a catalog metadata stream.
# File sources/parser/metadata.rb, line 42
42: def has_metadata?
43: self.Catalog.has_key? :Metadata
44: end
# File sources/parser/signature.rb, line 217
217: def has_usage_rights?
218:
219: #~ not self.Catalog.Perms.nil? and (not self.Catalog.Perms.UR3.nil? or not self.Catalog.Perms.UR.nil?)
220: "todo"
221:
222: end
Returns whether the current document is linearized.
# File sources/parser/linearization.rb, line 33
33: def is_linearized?
34: obj = @revisions.first.body.values.first
35:
36: obj.is_a?(Dictionary) and obj.has_key? :Linearized
37: end
Returns whether the document contains a digital signature.
# File sources/parser/signature.rb, line 119
119: def is_signed?
120:
121: #~ not self.Catalog.AcroForm.nil? and (self.Catalog.AcroForm[:SigFlags] & InteractiveForm::SigFlags::SIGNATUREEXISTS) != 0
122: "todo"
123:
124: end
Returns an array of Objects whose name (in a Dictionary) is matching pattern.
# File sources/parser/pdf.rb, line 293
293: def ls(*patterns)
294:
295: if patterns.empty?
296: return objects
297: end
298:
299: result = []
300:
301: patterns.map! do |pattern|
302: pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
303: end
304:
305: objects.each do |obj|
306: if obj.is_a?(Dictionary)
307: obj.each_pair do |name, obj|
308: if patterns.any?{ |pattern| name.value.to_s.match(pattern) }
309: result << ( obj.is_a?(Reference) ? obj.solve : obj )
310: end
311: end
312: end
313: end
314:
315: result
316: end
# File sources/parser/obfuscation.rb, line 216
216: def obfuscate_and_saveas(filename, options = {})
217: options[:obfuscate] = true
218: saveas(filename, options)
219: end
Returns an array of objects embedded in the PDF body.
| include_objstm: | Whether it shall return objects embedded in object streams. |
Note : Shall return to an iterator for Ruby 1.9 comp.
# File sources/parser/pdf.rb, line 340
340: def objects(include_objstm = true)
341:
342: def append_subobj(root, objset, inc_objstm)
343:
344: if objset.find{ |o| root.equal?(o) }.nil?
345:
346: objset << root
347:
348: if root.is_a?(Dictionary)
349: root.each_pair { |name, value|
350: append_subobj(name, objset, inc_objstm)
351: append_subobj(value, objset, inc_objstm)
352: }
353: elsif root.is_a?(Array) or (root.is_a?(ObjectStream) and inc_objstm == true)
354: root.each { |subobj| append_subobj(subobj, objset, inc_objstm) }
355: end
356:
357: end
358:
359: end
360:
361: objset = []
362: @revisions.each { |revision|
363: revision.body.each_value { |object|
364: append_subobj(object, objset, include_objstm)
365: }
366: }
367:
368: objset
369: end
Sets an action to run on document closing.
| action: | A JavaScript Action Object. |
# File sources/parser/catalog.rb, line 76
76: def onDocumentClose(action)
77:
78: unless action.is_a?(Action::JavaScript)
79: raise TypeError, "An Action::JavaScript object must be passed."
80: end
81:
82: unless self.Catalog
83: raise InvalidPDF, "A catalog object must exist to add this action."
84: end
85:
86: self.Catalog.AA ||= CatalogAdditionalActions.new
87: self.Catalog.AA.WC = action
88:
89: self
90: end
Sets an action to run on document opening.
| action: | An Action Object. |
# File sources/parser/catalog.rb, line 57
57: def onDocumentOpen(action)
58:
59: unless action.is_a?(Action::Action)
60: raise TypeError, "An Action object must be passed."
61: end
62:
63: unless self.Catalog
64: raise InvalidPDF, "A catalog object must exist to add this action."
65: end
66:
67: self.Catalog.OpenAction = action
68:
69: self
70: end
Sets an action to run on document printing.
| action: | A JavaScript Action Object. |
# File sources/parser/catalog.rb, line 96
96: def onDocumentPrint(action)
97:
98: unless action.is_a?(Action::JavaScript)
99: raise TypeError, "An Action::JavaScript object must be passed."
100: end
101:
102: unless self.Catalog
103: raise InvalidPDF, "A catalog object must exist to add this action."
104: end
105:
106: self.Catalog.AA ||= CatalogAdditionalActions.new
107: self.Catalog.AA.WP = action
108:
109: end
Converts a logical PDF view into a physical view ready for writing.
# File sources/parser/pdf.rb, line 751
751: def physicalize
752:
753: #
754: # Indirect objects are added to the revision and assigned numbers.
755: #
756: def build(obj, revision, embedded = false) #:nodoc:
757:
758: #
759: # Finalize any subobjects before building the stream.
760: #
761: if obj.is_a?(ObjectStream)
762: obj.each { |subobj|
763: build(subobj, revision, true)
764: }
765: end
766:
767: obj.pre_build
768:
769: if obj.is_a?(Dictionary) or obj.is_a?(Array)
770:
771: obj.map! { |subobj|
772: if subobj.is_indirect?
773: if get_object(subobj.reference)
774: subobj.reference
775: else
776: ref = add_to_revision(subobj, revision)
777: build(subobj, revision)
778: ref
779: end
780: else
781: subobj
782: end
783: }
784:
785: obj.each { |subobj|
786: build(subobj, revision)
787: }
788:
789: end
790:
791: obj.post_build
792:
793: end
794:
795: all_indirect_objects.each { |obj, revision|
796: build(obj, revision)
797: }
798:
799: self
800: end
Compute and update XRef::Section for each Revision.
# File sources/parser/pdf.rb, line 625
625: def rebuildxrefs
626:
627: size = 0
628: startxref = @header.to_s.size
629:
630: @revisions.each { |revision|
631:
632: revision.body.each_value { |object|
633: startxref += object.to_s.size
634: }
635:
636: size += revision.body.size
637: revision.xreftable = buildxrefs(revision.body.values)
638:
639: revision.trailer ||= Trailer.new
640: revision.trailer.Size = size + 1
641: revision.trailer.startxref = startxref
642:
643: startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
644: }
645:
646: self
647: end
Registers an object into a specific Names root dictionary.
| root: | The root dictionary (see Names::Root) |
| name: | The value name. |
| value: | The value to associate with this name. |
# File sources/parser/catalog.rb, line 117
117: def register(root, name, value)
118:
119: if self.Catalog.Names.nil?
120: self.Catalog.Names = Names.new
121: end
122:
123: value.set_indirect(true)
124:
125: namesroot = self.Catalog.Names.send(root)
126: if namesroot.nil?
127: names = NameTreeNode.new({:Names => [] })
128: self.Catalog.Names.send((root.id2name + "=").to_sym, (self << names))
129: names.Names << name << value
130: else
131: namesroot.Names << name << value
132: end
133:
134: end
Remove last Revisions.
| level: | The number of revisions to remove. |
# File sources/parser/pdf.rb, line 667
667: def remove_last_revision(level = 1)
668:
669: @revisions.pop(level)
670:
671: self
672: end
Saves the current file as its current filename.
# File sources/parser/pdf.rb, line 198
198: def save(filename = nil, params = {})
199:
200: name = filename || @filename
201: fail "No filename specified for saving." unless name
202:
203: options =
204: {
205: :recompile => true,
206: }
207: options.update(params)
208:
209: fd = File.open(name, "w").binmode
210:
211: self.compile if options[:recompile] == true
212: bin = self.to_bin(options)
213: fd << bin
214:
215: fd.close
216:
217: self
218: end
Saves the file up to given revision number. This can be useful to visualize the modifications over different incremental updates.
| revision: | The revision number to save. |
| filename: | The path where to save this PDF. |
# File sources/parser/pdf.rb, line 243
243: def save_upto(revision, filename)
244:
245: fd = File.open(filename, "w").binmode
246:
247: fd << @header
248:
249: nrev = 0
250: while nrev < revision && nrev < @revisions.size
251:
252: fd << @revisions[nrev].body.values
253: fd << @revisions[nrev].xreftable
254: fd << @revisions[nrev].trailer
255:
256: nrev = nrev.succ
257: end
258:
259: fd.close
260:
261: self
262: end
Sets the current filename to the argument given, then save it.
| filename: | The path where to save this PDF. |
# File sources/parser/pdf.rb, line 224
224: def saveas(filename, params = {})
225:
226: if self.frozen?
227: params[:recompile] = params[:rebuildxrefs] = false
228: save(filename, params)
229: else
230: @filename = filename
231: save(filename, params)
232: end
233:
234: self
235: end
Sign the document with the given key and x509 certificate.
| certificate: | The X509 certificate containing the public key. |
| key: | The private key associated with the certificate. |
| ca: | Optional CA certificates used to sign the user certificate. |
# File sources/parser/signature.rb, line 34
34: def sign(certificate, key, ca = [], annotation = nil, location = nil, contact = nil, reason = nil)
35:
36: unless certificate.is_a?(OpenSSL::X509::Certificate)
37: raise TypeError, "A OpenSSL::X509::Certificate object must be passed."
38: end
39:
40: unless key.is_a?(OpenSSL::PKey::RSA)
41: raise TypeError, "A OpenSSL::PKey::RSA object must be passed."
42: end
43:
44: unless ca.is_a?(::Array)
45: raise TypeError, "Expected an Array of CA certificate."
46: end
47:
48: unless annotation.nil? or annotation.is_a?(Annotation::Widget::Signature)
49: raise TypeError, "Expected a Annotation::Widget::Signature object."
50: end
51:
52: def signfield_size(certificate, key, ca = []) #;nodoc:
53: datatest = "abcdefghijklmnopqrstuvwxyz"
54: OpenSSL::PKCS7.sign(certificate, key, datatest, ca, OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der.size + 128
55: end
56:
57: digsig = Signature::DigitalSignature.new.set_indirect(true)
58:
59: if annotation.nil?
60: annotation = Annotation::Widget::Signature.new
61: annotation.Rect = Rectangle[:llx => 0.0, :lly => 0.0, :urx => 0.0, :ury => 0.0]
62: end
63:
64: annotation.V = digsig ;
65: add_field(annotation)
66: self.Catalog.AcroForm.SigFlags = InteractiveForm::SigFlags::SIGNATURESEXIST | InteractiveForm::SigFlags::APPENDONLY
67:
68: digsig.Type = :Sig #:nodoc:
69: digsig.Contents = HexaString.new("\x00" * signfield_size(certificate, key, ca)) #:nodoc:
70: digsig.Filter = Name.new("Adobe.PPKMS") #:nodoc:
71: digsig.SubFilter = Name.new("adbe.pkcs7.detached") #:nodoc:
72: digsig.ByteRange = [0, 0, 0, 0] #:nodoc:
73:
74: digsig.Location = HexaString.new(location) if location
75: digsig.ContactInfo = HexaString.new(contact) if contact
76: digsig.Reason = HexaString.new(reason) if reason
77:
78: #
79: # Flattening the PDF to get file view.
80: #
81: self.compile
82:
83: #
84: # Creating an empty Xref table to compute signature byte range.
85: #
86: rebuild_dummy_xrefs
87:
88: sigoffset = get_object_offset(digsig.no, digsig.generation) + digsig.sigOffset
89:
90: digsig.ByteRange[0] = 0
91: digsig.ByteRange[1] = sigoffset
92: digsig.ByteRange[2] = sigoffset + digsig.Contents.size
93:
94: digsig.ByteRange[3] = filesize - digsig.ByteRange[2] until digsig.ByteRange[3] == filesize - digsig.ByteRange[2]
95:
96: # From that point the file size remains constant
97:
98: #
99: # Correct Xrefs variations caused by ByteRange modifications.
100: #
101: rebuildxrefs
102:
103: filedata = self.to_bin
104: signable_data = filedata[digsig.ByteRange[0],digsig.ByteRange[1]] + filedata[digsig.ByteRange[2],digsig.ByteRange[3]]
105:
106: signature = OpenSSL::PKCS7.sign(certificate, key, signable_data, ca, OpenSSL::PKCS7::DETACHED | OpenSSL::PKCS7::BINARY).to_der
107: digsig.Contents[0, signature.size] = signature
108:
109: #
110: # No more modification are allowed after signing.
111: #
112: self.freeze
113:
114: end
Returns the final binary representation of the current document.
| rebuildxrefs: | Computes xrefs while writing objects (default true). |
| obfuscate: | Do some basic syntactic object obfuscation. |
# File sources/parser/pdf.rb, line 464
464: def to_bin(params = {})
465:
466: has_objstm = self.indirect_objects.values.any?{|obj| obj.is_a?(ObjectStream)}
467:
468: options =
469: {
470: :rebuildxrefs => true,
471: :obfuscate => false,
472: :use_xrefstm => has_objstm,
473: :use_xreftable => (not has_objstm)
474: #todo linearize
475: }
476: options.update(params)
477:
478: # Reset to default params if no xrefs are chosen (hybrid files not supported yet)
479: if options[:use_xrefstm] == options[:use_xreftable]
480: options[:use_xrefstm] = has_objstm
481: options[:use_xreftable] = (not has_objstm)
482: end
483:
484: # Get trailer dictionary
485: trailer_info = get_trailer_info
486: if trailer_info.nil?
487: raise InvalidPDF, "No trailer information found"
488: end
489: trailer_dict = trailer_info.dictionary
490:
491: prev_xref_offset = nil
492: xrefstm_offset = nil
493: xreftable_offset = nil
494:
495: # Header
496: bin = ""
497: bin << @header.to_s
498:
499: # For each revision
500: @revisions.each do |rev|
501:
502: if options[:rebuildxrefs] == true
503: lastno_table, lastno_stm = 0, 0
504: brange_table, brange_stm = 0, 0
505:
506: xrefs_stm = [ XRef.new(0, XRef::LASTFREE, XRef::FREE) ]
507: xrefs_table = [ XRef.new(0, XRef::LASTFREE, XRef::FREE) ]
508:
509: if options[:use_xreftable] == true
510: xrefsection = XRef::Section.new
511: end
512:
513: if options[:use_xrefstm] == true
514: xrefstm = XRefStream.new
515: add_to_revision(xrefstm, rev)
516: end
517: end
518:
519: objset = rev.body.values
520:
521: objset.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
522: objset |= objstm.objects
523: end if options[:rebuildxrefs] == true and options[:use_xrefstm] == true
524:
525: objset.sort # process objects in number order
526:
527: # For each object
528: objset.sort.each { |obj|
529:
530: if options[:rebuildxrefs] == true
531:
532: # Adding subsections if needed
533: if options[:use_xreftable] and (obj.no - lastno_table).abs > 1
534: xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
535:
536: xrefs_table.clear
537: brange_table = obj.no
538: end
539: if options[:use_xrefstm] and (obj.no - lastno_stm).abs > 1
540: xrefs_stm.each do |xref| xrefstm << xref end
541: xrefstm.Index ||= []
542: xrefstm.Index << brange_stm << xrefs_stm.length
543:
544: xrefs_stm.clear
545: brange_stm = obj.no
546: end
547:
548: # Process embedded objects
549: if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream)
550: index = obj.parent.index(obj.no)
551:
552: xrefs_stm << XRefToCompressedObj.new(obj.parent.no, index)
553:
554: lastno_stm = obj.no
555: else
556: xrefs_stm << XRef.new(bin.size, obj.generation, XRef::USED)
557: xrefs_table << XRef.new(bin.size, obj.generation, XRef::USED)
558:
559: lastno_table = lastno_stm = obj.no
560: end
561:
562: end
563:
564: if obj.parent == obj or not obj.parent.is_a?(ObjectStream)
565:
566: # Finalize XRefStm
567: if options[:rebuildxrefs] == true and options[:use_xrefstm] == true and obj == xrefstm
568: xrefstm_offset = bin.size
569:
570: xrefs_stm.each do |xref| xrefstm << xref end
571: xrefstm.Index ||= []
572: xrefstm.Index << brange_stm << xrefs_stm.size
573:
574: xrefstm.dictionary = xrefstm.dictionary.merge(trailer_dict)
575: xrefstm.Prev = prev_xref_offset
576:
577: rev.trailer.dictionary = nil
578:
579: add_to_revision(xrefstm, rev)
580:
581: xrefstm.pre_build
582: xrefstm.post_build
583: end
584:
585: bin << (options[:obfuscate] == true ? obj.to_obfuscated_str : obj.to_s)
586: end
587: }
588:
589: rev.trailer ||= Trailer.new
590:
591: # XRef table
592: if options[:rebuildxrefs] == true
593:
594: if options[:use_xreftable] == true
595: table_offset = bin.size
596:
597: xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
598: rev.xreftable = xrefsection
599:
600: rev.trailer.dictionary = trailer_dict
601: rev.trailer.Size = objset.size + 1
602: rev.trailer.Prev = prev_xref_offset
603:
604: rev.trailer.XRefStm = xrefstm_offset if options[:use_xrefstm] == true
605: end
606:
607: startxref = options[:use_xreftable] == true ? table_offset : xrefstm_offset
608: rev.trailer.startxref = prev_xref_offset = startxref
609:
610: end # end each rev
611:
612: # Trailer
613:
614: bin << rev.xreftable.to_s if options[:use_xreftable] == true
615: bin << (options[:obfuscate] == true ? rev.trailer.to_obfuscated_str : rev.trailer.to_s)
616:
617: end
618:
619: bin
620: end