documentcloud · vrybas · Jan 13, 2012 · Jan 13, 2012 · Jun 19, 2013 · alxndrmlr
diff --git a/index.html b/index.html
@@ -228,7 +228,9 @@ <h2 id="usage">Usage</h2>
       <b>xls</b> and so on, as well as <b>html</b>, <b>odf</b>, <b>rtf</b>, <b>swf</b>, <b>svg</b>, and <b>wpd</b>.
       The first time that you convert a new file type, OpenOffice will lazy-load
       the code that processes it &mdash; subsequent conversions will be much faster.
-    </p>
+      <br/>You can use the <tt>--timeout</tt> flag to increase or decrease time before 'error on timeout' exception. 
+      Useful when you convert huge documents. Default timeout is 1 hour (more than enough to convert 1000-paged document).
+      </p>
 <pre>
 docsplit pdf documentation/*.html</pre>
 <pre>

diff --git a/lib/docsplit.rb b/lib/docsplit.rb
@@ -62,6 +62,7 @@ def self.extract_images(pdfs, opts={})
   # If the document is in an image format, use GraphicsMagick to extract the PDF.
   def self.extract_pdf(docs, opts={})
     out = opts[:output] || '.'
+    timeout = opts[:timeout] || 3600
     FileUtils.mkdir_p out unless File.exists?(out)
     [docs].flatten.each do |doc|
       ext = File.extname(doc)
@@ -71,7 +72,7 @@ def self.extract_pdf(docs, opts={})
       if GM_FORMATS.include?(`file -b --mime #{ESCAPE[doc]}`.strip.split(/[:;]\s+/)[0])
         `gm convert #{escaped_doc} #{escaped_out}/#{escaped_basename}.pdf`
       else
-        options = "-jar #{ROOT}/vendor/jodconverter/jodconverter-core-3.0-beta-4.jar -r #{ROOT}/vendor/conf/document-formats.js"
+        options = "-jar #{ROOT}/vendor/jodconverter/jodconverter-core-3.0-beta-4.jar -t #{timeout} -r #{ROOT}/vendor/conf/document-formats.js"
         run "#{options} #{escaped_doc} #{escaped_out}/#{escaped_basename}.pdf", [], {}
       end
     end

diff --git a/lib/docsplit/command_line.rb b/lib/docsplit/command_line.rb
@@ -94,6 +94,9 @@ def parse_options
         opts.on('--no-clean', 'disable cleaning of OCR\'d text') do |c|
           @options[:clean] = false
         end
+        opts.on('-t', '--timeout [SEC]', 'Timeout for PDF extraction from OpenOffice document format (default is 1 hour)') do |t|
+          @options[:timeout] = t
+        end
         opts.on('-r', '--rolling', 'generate images from each previous image') do |r|
           @options[:rolling] = true
         end