virantha · BrentNoorda · May 13, 2016 · May 13, 2016
diff --git a/pypdfocr/pypdfocr_gs.py b/pypdfocr/pypdfocr_gs.py
@@ -124,7 +124,7 @@ def _get_dpi(self, pdf_filename):
         try:
             out = subprocess.check_output(cmd, shell=True)
         except subprocess.CalledProcessError as e:
-            self._warn ("Could not execute pdfimages to calculate DPI (try installing xpdf or poppler?), so defaulting to %sdpi" % self.output_dpi) 
+            self._warn ("Could not execute pdfimages to calculate DPI (try installing xpdf or poppler?), so defaulting to %sdpi" % self.output_dpi)
             return
 
         # Need the second line of output
@@ -137,7 +137,7 @@ def _get_dpi(self, pdf_filename):
         logging.debug(results)
         results = results.split()
         if(results[2] != 'image'):
-            self._warn("Could not understand output of pdfimages, please rerun with -d option and file an issue at http://github.com/virantha/pypdfocr/issues") 
+            self._warn("Could not understand output of pdfimages, please rerun with -d option and file an issue at http://github.com/virantha/pypdfocr/issues")
             return
         x_pt, y_pt, greyscale = int(results[3]), int(results[4]), results[5]=='gray'
         self.greyscale = greyscale
@@ -146,7 +146,13 @@ def _get_dpi(self, pdf_filename):
         cmd = 'identify -format "%%w %%x %%h %%y\n" "%s"' % pdf_filename
         try:
             out = subprocess.check_output(cmd, shell=True)
-            results = out.splitlines()[0]
+
+            # skip any lines starting with "*** Warning"
+            out_splitlines = out.splitlines()
+            while "* Warning:" in  out_splitlines[0]:
+                out_splitlines = out_splitlines[1:]
+            results = out_splitlines[0]
+
             results = results.replace("Undefined", "")
             width, xdensity, height, ydensity = [float(x) for x in results.split()]
             xdpi = round(x_pt/width*xdensity)
@@ -161,16 +167,17 @@ def _get_dpi(self, pdf_filename):
 
 
         except Exception as e:
+            logging.debug(cmd)
             logging.debug(str(e))
-            self._warn ("Could not execute identify to calculate DPI (try installing imagemagick?), so defaulting to %sdpi" % self.output_dpi) 
+            self._warn ("Could not execute identify to calculate DPI (try installing imagemagick?), so defaulting to %sdpi" % self.output_dpi)
         return
 
 
 
     def _run_gs(self, options, output_filename, pdf_filename):
         try:
             cmd = '%s -q -dNOPAUSE %s -sOutputFile="%s" "%s" -c quit' % (self.binary, options, output_filename, pdf_filename)
-            logging.info(cmd)        
+            logging.info(cmd)
             out = subprocess.check_output(cmd, shell=True)
 
         except subprocess.CalledProcessError as e:

diff --git a/pypdfocr/pypdfocr_pdf.py b/pypdfocr/pypdfocr_pdf.py
@@ -155,7 +155,7 @@ def overlay_hocr_pages(self, dpi, hocr_filenames, orig_pdf_filename):
             merger.append(PdfFileReader(file(text_pdf_filename, 'rb')))
         merger.write(all_text_filename)
         merger.close()
-	del merger
+        del merger
 
 
         writer = PdfFileWriter()
@@ -191,10 +191,10 @@ def _get_merged_single_page(self, original_page, ocr_text_page):
 
         if orig_rotation_angle != 0:
             logging.info("Original Rotation: %s" % orig_rotation_angle)
-            self.mergeRotateAroundPointPage(original_page, ocr_text_page, orig_rotation_angle, ocr_text_page.mediaBox.getWidth()/2, ocr_text_page.mediaBox.getWidth()/2)
+            self.mergeRotateAroundPointPage(original_page, ocr_text_page, orig_rotation_angle, ocr_text_page.mediaBox.getWidth()/2, ocr_text_page.mediaBox.getHeight()/2)
             # None of these commands worked for me:
-            #orig_pg.rotateCounterClockwise(orig_rotation_angle)
-            #orig_pg.mergeRotatedPage(text_pg,orig_rotation_angle)
+            #original_page.rotateCounterClockwise(orig_rotation_angle)
+            #original_page.mergeRotatedPage(ocr_text_page,orig_rotation_angle)
         else:
             original_page.mergePage(ocr_text_page)
         original_page.compressContentStreams()