From d8a6067fc20f78f27468ad2c2a1a4f7d6d22148a Mon Sep 17 00:00:00 2001
From: Ben S <github@benjsec.33mail.com>
Date: Mon, 6 Nov 2017 08:44:10 +0000
Subject: [PATCH] Python 3.5 and 3.6 compatability

This updates to include compatability with py3, whilst retaining
all functionality in py2.

The evernote library is not yet py3 compatible, so is not used in
the py3 version. It is however still retained when using py2.
---
 .travis.yml                          |  11 ++-
 pypdfocr/pypdfocr.py                 |  61 +++++++++------
 pypdfocr/pypdfocr_filer_dirs.py      |   2 +-
 pypdfocr/pypdfocr_filer_evernote.py  |  20 +++--
 pypdfocr/pypdfocr_gs.py              |  26 +++----
 pypdfocr/pypdfocr_multiprocessing.py |  16 ++--
 pypdfocr/pypdfocr_pdf.py             |   8 +-
 pypdfocr/pypdfocr_pdffiler.py        |  17 ++---
 pypdfocr/pypdfocr_preprocess.py      |   8 +-
 pypdfocr/pypdfocr_tesseract.py       |  52 ++++---------
 pypdfocr/pypdfocr_watcher.py         |  37 ++++-----
 requirements.txt                     |   2 +-
 test/test_evernote.py                |  30 ++++++--
 test/test_gs.py                      |   7 +-
 test/test_option_parsing.py          |  46 ++++++++++--
 test/test_pdf_filer.py               |  17 +++--
 test/test_pypdfocr.py                | 107 +++++++++++++++++----------
 test/test_tesseract.py               |   8 +-
 test/test_watcher.py                 |  25 +++----
 19 files changed, 288 insertions(+), 212 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 5da555e..06dd8be 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,9 +1,14 @@
 language: python
 python:
     - "2.7"
+    - "3.5"
+    - "3.6"
+before_install:
+    - sudo apt-get -qq update
+    - sudo apt-get install -y tesseract-ocr ghostscript imagemagick
 install: 
-    - "pip install -r requirements.txt --use-mirrors"
-    - "pip install pytest mock --use-mirrors"
+    - "pip install -r requirements.txt"
+    - "pip install pytest mock"
     - "pip install ."
 script: 
-    - "python setup.py test"
+    - "pytest test"
diff --git a/pypdfocr/pypdfocr.py b/pypdfocr/pypdfocr.py
index 7ee7e9e..a95cb66 100644
--- a/pypdfocr/pypdfocr.py
+++ b/pypdfocr/pypdfocr.py
@@ -21,24 +21,37 @@
 import itertools
 from functools import wraps
 
-from version import __version__
+from .version import __version__
 from PIL import Image
 import yaml
 
 import multiprocessing
-# Replace the Popen routine to allow win32 pyinstaller to build
-from multiprocessing import forking
-from pypdfocr_multiprocessing import _Popen
+
+""" Special work-around to support multiprocessing and pyinstaller --onefile on windows systms
+
+    https://github.com/pyinstaller/pyinstaller/wiki/Recipe-Multiprocessing
+"""
+try:
+    # Python 3.4+
+    if sys.platform.startswith('win'):
+        import multiprocessing.popen_spawn_win32 as forking
+    else:
+        import multiprocessing.popen_fork as forking
+except ImportError:
+    import multiprocessing.forking as forking
+
+from .pypdfocr_multiprocessing import _Popen
 forking.Popen = _Popen
 
-from pypdfocr_pdf import PyPdf
-from pypdfocr_tesseract import PyTesseract
-from pypdfocr_gs import PyGs
-from pypdfocr_watcher import PyPdfWatcher
-from pypdfocr_pdffiler import PyPdfFiler
-from pypdfocr_filer_dirs import PyFilerDirs
-from pypdfocr_filer_evernote import PyFilerEvernote
-from pypdfocr_preprocess import PyPreprocess
+from .pypdfocr_pdf import PyPdf
+from .pypdfocr_tesseract import PyTesseract
+from .pypdfocr_gs import PyGs
+from .pypdfocr_watcher import PyPdfWatcher
+from .pypdfocr_pdffiler import PyPdfFiler
+from .pypdfocr_filer_dirs import PyFilerDirs
+from .pypdfocr_filer_evernote import ENABLED as evernote_enabled
+from .pypdfocr_filer_evernote import PyFilerEvernote
+from .pypdfocr_preprocess import PyPreprocess
 
 def error(text):
     print("ERROR: %s" % text)
@@ -49,12 +62,14 @@ def retry(count=5, exc_type = Exception):
     def decorator(func):
         @wraps(func)
         def result(*args, **kwargs):
+            err = None
             for _ in range(count):
                 try:
                     return func(*args, **kwargs)
-                except exc_type:
-                    pass
-                raise
+                except exc_type as e:
+                    err = e
+            else:
+                raise err
         return result
     return decorator
 
@@ -161,11 +176,11 @@ def get_options(self, argv):
         filing_group = p.add_argument_group(title="Filing optinos")
         filing_group.add_argument('-f', '--file', action='store_true',
             default=False, dest='enable_filing', help='Enable filing of converted PDFs')
-        #filing_group.add_argument('-c', '--config', type = argparse.FileType('r'),
+        # filing_group.add_argument('-c', '--config', type = argparse.FileType('r'),
         filing_group.add_argument('-c', '--config', type = lambda x: open_file_with_timeout(p,x),
              dest='configfile', help='Configuration file for defaults and PDF filing')
         filing_group.add_argument('-e', '--evernote', action='store_true',
-            default=False, dest='enable_evernote', help='Enable filing to Evernote')
+            default=False, dest='enable_evernote', help='Enable filing to Evernote.')
         filing_group.add_argument('-n', action='store_true',
             default=False, dest='match_using_filename', help='Use filename to match if contents did not match anything, before filing to default folder')
 
@@ -204,7 +219,11 @@ def get_options(self, argv):
             logging.debug("Read in configuration file")
             logging.debug(self.config)
 
-        if args.enable_evernote:
+        # Evernote filing does not work in py3
+        if args.enable_evernote and not evernote_enabled:
+            print("Warning: Evernote filing disabled, could not find evernote API. Evernote not available in py3.")
+            self.enable_evernote = False
+        elif args.enable_evernote:
             self.enable_evernote = True
         else:
             self.enable_evernote = False
@@ -367,11 +386,11 @@ def run_conversion(self, pdf_filename):
             time.sleep(1)
             if not self.debug:
                 # Need to clean up the original image files before preprocessing
-                if locals().has_key("fns"): # Have to check if this was set before exception raised
+                if "fns" in locals(): # Have to check if this was set before exception raised
                     logging.info("Cleaning up %s" % fns)
                     self._clean_up_files(fns)
 
-                if locals().has_key("preprocess_imagefilenames"):  # Have to check if this was set before exception raised
+                if "preprocess_imagefilenames" in locals():  # Have to check if this was set before exception raised
                     logging.info("Cleaning up %s" % preprocess_imagefilenames)
                     self._clean_up_files(preprocess_imagefilenames) # splat the hocr_filenames as it is a list of pairs
                     for ext in [".hocr", ".html", ".txt"]:
@@ -467,7 +486,7 @@ def go(self, argv):
                 except KeyboardInterrupt:
                     break
                 except Exception as e:
-                    print traceback.print_exc(e)
+                    print(traceback.print_exc(e))
                     py_watcher.stop()
                     
         else:
diff --git a/pypdfocr/pypdfocr_filer_dirs.py b/pypdfocr/pypdfocr_filer_dirs.py
index dc19330..c7dc73f 100644
--- a/pypdfocr/pypdfocr_filer_dirs.py
+++ b/pypdfocr/pypdfocr_filer_dirs.py
@@ -16,7 +16,7 @@
 import os
 import shutil
 
-from pypdfocr_filer import PyFiler
+from .pypdfocr_filer import PyFiler
 
 """
     Implementation of a filer class 
diff --git a/pypdfocr/pypdfocr_filer_evernote.py b/pypdfocr/pypdfocr_filer_evernote.py
index 80ec115..9064415 100644
--- a/pypdfocr/pypdfocr_filer_evernote.py
+++ b/pypdfocr/pypdfocr_filer_evernote.py
@@ -19,17 +19,21 @@
 import time
 import sys
 
-from pypdfocr_filer import PyFiler
+from .pypdfocr_filer import PyFiler
 
 import functools
 
-from evernote.api.client import EvernoteClient
-import evernote.edam.type.ttypes as Types
-import evernote.edam.userstore.constants as UserStoreConstants
-from evernote.edam.error.ttypes import EDAMUserException
-from evernote.edam.error.ttypes import EDAMSystemException
-from evernote.edam.error.ttypes import EDAMNotFoundException
-from evernote.edam.error.ttypes import EDAMErrorCode
+try:
+    from evernote.api.client import EvernoteClient
+    import evernote.edam.type.ttypes as Types
+    import evernote.edam.userstore.constants as UserStoreConstants
+    from evernote.edam.error.ttypes import EDAMUserException
+    from evernote.edam.error.ttypes import EDAMSystemException
+    from evernote.edam.error.ttypes import EDAMNotFoundException
+    from evernote.edam.error.ttypes import EDAMErrorCode
+    ENABLED = True
+except ImportError:
+    ENABLED = False
 
 
 """
diff --git a/pypdfocr/pypdfocr_gs.py b/pypdfocr/pypdfocr_gs.py
index 5599082..1477847 100644
--- a/pypdfocr/pypdfocr_gs.py
+++ b/pypdfocr/pypdfocr_gs.py
@@ -92,21 +92,21 @@ def _find_windows_gs(self):
             listing = os.listdir('.')
 
             # Find all possible gs* sub-directories
-	    listing = [x for x in listing if x.startswith('gs')]
+            listing = [x for x in listing if x.startswith('gs')]
 
             # TODO: Make this a natural sort
             listing.sort(reverse=True)
-	    for bindir in listing:
-		binpath = os.path.join(bindir,'bin')
-		if not os.path.exists(binpath): continue
-		os.chdir(binpath)
+            for bindir in listing:
+                binpath = os.path.join(bindir,'bin')
+                if not os.path.exists(binpath): continue
+                os.chdir(binpath)
                 # Look for gswin64c.exe or gswin32c.exe (the c is for the command-line version)
-		gswin = glob.glob('gswin*c.exe')
-		if len(gswin) == 0:
-		    continue
-		gs = os.path.abspath(gswin[0]) # Just use the first found .exe (Do i need to do anything more complicated here?)
-		os.chdir(cwd)
-		return gs
+                gswin = glob.glob('gswin*c.exe')
+                if len(gswin) == 0:
+                    continue
+                gs = os.path.abspath(gswin[0]) # Just use the first found .exe (Do i need to do anything more complicated here?)
+                os.chdir(cwd)
+                return gs
 
         if not gs:
             error(self.msgs['GS_MISSING_BINARY'])
@@ -171,10 +171,10 @@ def _run_gs(self, options, output_filename, pdf_filename):
         try:
             cmd = '%s -q -dNOPAUSE %s -sOutputFile="%s" "%s" -c quit' % (self.binary, options, output_filename, pdf_filename)
             logging.info(cmd)        
-            out = subprocess.check_output(cmd, shell=True)
+            out = subprocess.check_output(cmd, shell=True, universal_newlines=True)
 
         except subprocess.CalledProcessError as e:
-            print e.output
+            print(e.output)
             if "undefined in .getdeviceparams" in e.output:
                 error(self.msgs['GS_OUTDATED'])
             else:
diff --git a/pypdfocr/pypdfocr_multiprocessing.py b/pypdfocr/pypdfocr_multiprocessing.py
index 3666268..253bd55 100644
--- a/pypdfocr/pypdfocr_multiprocessing.py
+++ b/pypdfocr/pypdfocr_multiprocessing.py
@@ -13,19 +13,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import sys, os, multiprocessing.forking
 import logging
+import os
+import sys
 
 """ Special work-around to support multiprocessing and pyinstaller --onefile on windows systms
 
     https://github.com/pyinstaller/pyinstaller/wiki/Recipe-Multiprocessing
 """
+try:
+    # Python 3.4+
+    if sys.platform.startswith('win'):
+        import multiprocessing.popen_spawn_win32 as forking
+    else:
+        import multiprocessing.popen_fork as forking
+except ImportError:
+    import multiprocessing.forking as forking
 
-import multiprocessing.forking as forking
-import os
-import sys
 
-class _Popen(multiprocessing.forking.Popen):
+class _Popen(forking.Popen):
     def __init__(self, *args, **kw):
         if hasattr(sys, 'frozen'):
             # We have to set original _MEIPASS2 value from sys._MEIPASS
diff --git a/pypdfocr/pypdfocr_pdf.py b/pypdfocr/pypdfocr_pdf.py
index bdc1f86..8438b38 100644
--- a/pypdfocr/pypdfocr_pdf.py
+++ b/pypdfocr/pypdfocr_pdf.py
@@ -31,7 +31,6 @@
 import tempfile
 import glob
 
-import cStringIO
 import base64
 import zlib
 import math
@@ -52,7 +51,7 @@
 from reportlab.lib.enums import TA_LEFT
 from reportlab.platypus.paragraph import Paragraph
 
-from pypdfocr_util import Retry
+from .pypdfocr_util import Retry
 from functools import partial
 
 class RotatedPara(Paragraph):
@@ -152,10 +151,11 @@ def overlay_hocr_pages(self, dpi, hocr_filenames, orig_pdf_filename):
         all_text_filename = os.path.join(pdf_dir, "%s_text.pdf" % (basename))
         merger = PdfFileMerger()
         for text_pdf_filename in text_pdf_filenames:
-            merger.append(PdfFileReader(file(text_pdf_filename, 'rb')))
+            with open(text_pdf_filename, 'rb') as f:
+                merger.append(PdfFileReader(f))
         merger.write(all_text_filename)
         merger.close()
-	del merger
+        del merger
 
 
         writer = PdfFileWriter()
diff --git a/pypdfocr/pypdfocr_pdffiler.py b/pypdfocr/pypdfocr_pdffiler.py
index 1bb23f5..1ab4247 100644
--- a/pypdfocr/pypdfocr_pdffiler.py
+++ b/pypdfocr/pypdfocr_pdffiler.py
@@ -18,15 +18,14 @@
     on keywords
 """
 
-from sets import Set    
 import sys, os
 import re
 import logging
 import shutil
 
 from PyPDF2 import PdfFileReader
-from pypdfocr_filer import PyFiler
-from pypdfocr_filer_dirs import PyFilerDirs
+from .pypdfocr_filer import PyFiler
+from .pypdfocr_filer_dirs import PyFilerDirs
 
 class PyPdfFiler(object):
     def __init__(self, filer):
@@ -36,7 +35,7 @@ def __init__(self, filer):
 
         # Whether to fall back on filename for matching keywords against
         # if there is no match in the text
-        self.file_using_filename = False 
+        self.file_using_filename = False
 
     def iter_pdf_page_text(self, filename):
         self.filename = filename
@@ -44,7 +43,7 @@ def iter_pdf_page_text(self, filename):
         logging.info("pdf scanner found %d pages in %s" % (reader.getNumPages(), filename))
         for pgnum in range(reader.getNumPages()):
             text = reader.getPage(pgnum).extractText()
-            text = text.encode('ascii', 'ignore')
+            # text = text.encode('ascii', 'ignore')
             text = text.replace('\n', ' ')
             yield text
 
@@ -56,10 +55,10 @@ def _get_matching_folder(self, pdfText):
                 if s in searchText:
                     logging.info("Matched keyword '%s'" % s)
                     return folder
-        # No match found, so return 
+        # No match found, so return
         return None
 
-    def file_original (self, original_filename):
+    def file_original(self, original_filename):
         return self.filer.file_original(original_filename)
 
     def move_to_matching_folder(self, filename):
@@ -72,9 +71,9 @@ def move_to_matching_folder(self, filename):
 
         tgt_file = self.filer.move_to_matching_folder(filename, tgt_folder)
         return tgt_file
-        
+
 if __name__ == '__main__':
     p = PyPdfFiler(PyFilerDirs())
     for page_text in p.iter_pdf_page_text("scan_ocr.pdf"):
-        print (page_text)
+        print(page_text)
 
diff --git a/pypdfocr/pypdfocr_preprocess.py b/pypdfocr/pypdfocr_preprocess.py
index e942cc3..34ed89a 100644
--- a/pypdfocr/pypdfocr_preprocess.py
+++ b/pypdfocr/pypdfocr_preprocess.py
@@ -28,7 +28,7 @@
 import signal
 
 from multiprocessing import Pool
-from pypdfocr_interrupts import init_worker
+from .pypdfocr_interrupts import init_worker
 
 # Ugly hack to pass in object method to the multiprocessing library
 # From http://www.rueckstiess.net/research/snippets/show/ca1d7d90
@@ -58,7 +58,7 @@ def cmd(self, cmd_list):
             logging.debug(out)
             return out
         except subprocess.CalledProcessError as e:
-            print e.output
+            print(e.output)
             self._warn("Could not run command %s" % cmd_list)
             
 
@@ -102,14 +102,14 @@ def preprocess(self, in_filenames):
             logging.info("Starting preprocessing parallel execution")
             preprocessed_filenames = pool.map(unwrap_self,zip([self]*len(fns),fns))
             pool.close()
-        except KeyboardInterrupt or Exception:
+        except (KeyboardInterrupt, Exception):
             print("Caught keyboard interrupt... terminating")
             pool.terminate()
             #sys,exit(-1)
             raise
         finally:
             pool.join()
-            logging.info ("Completed preprocessing")
+            logging.info("Completed preprocessing")
 
         return preprocessed_filenames
 
diff --git a/pypdfocr/pypdfocr_tesseract.py b/pypdfocr/pypdfocr_tesseract.py
index 8f246ee..1cfb9f2 100644
--- a/pypdfocr/pypdfocr_tesseract.py
+++ b/pypdfocr/pypdfocr_tesseract.py
@@ -23,10 +23,11 @@
 import logging
 import subprocess
 import glob
+from pkg_resources import parse_version
 from subprocess import CalledProcessError
 
 from multiprocessing import Pool
-from pypdfocr_interrupts import init_worker
+from .pypdfocr_interrupts import init_worker
 
 def error(text):
     print("ERROR: %s" % text)
@@ -79,50 +80,27 @@ def _is_version_uptodate(self):
             Make sure the version is current 
         """
         logging.info("Checking tesseract version")
-        cmd = '%s -v' % (self.binary)
+        cmd = "%s -v" % self.binary
         logging.info(cmd)        
         try:
-            ret_output = subprocess.check_output(cmd, shell=True,  stderr=subprocess.STDOUT)
+            ret_output = subprocess.check_output(
+                cmd, shell=True, stderr=subprocess.STDOUT, universal_newlines=True)
         except CalledProcessError:
             # Could not run tesseract
             error(self.msgs['TS_MISSING'])
 
         ver_str = '0.0.0'
         for line in ret_output.splitlines():
+            print(line)
             if 'tesseract' in line:
                 ver_str = line.split(' ')[1]
-                if ver_str.endswith('dev'): # Fix for version strings that end in 'dev'
-                    ver_str = ver_str[:-3]
-
-        # Iterate through the version dots
-        ver = [int(x) for x in ver_str.split('.')]
-        req = [int(x) for x in self.required.split('.')]
-
         # Aargh, in windows 3.02.02 is reported as version 3.02  
-        # SFKM
         if str(os.name) == 'nt':
-            req = req[:2]
-
-        version_good = False
-        for i,num in enumerate(req):
-            if len(ver) < i+1:
-                # This minor version number is not present in tesseract, so it must be
-                # lower than required.  (3.02 < 3.02.01)
-                break
-            if ver[i]==num and len(ver) == i+1 and len(ver)==len(req):
-                # 3.02.02 == 3.02.02
-                version_good = True
-                continue
-            if ver[i]>num:
-                # 4.0 > 3.02.02
-                # 3.03.02 > 3.02.02
-                version_good = True
-                break
-            if ver[i]<num:
-                # 3.01.02 < 3.02.02
-                break
-            
-        return version_good, ver_str
+            req = self.required[:-3]
+        else:
+            req = self.required
+        print(ver_str)
+        return (parse_version(ver_str) >= parse_version(req)), ver_str
 
     def _warn(self, msg): # pragma: no cover
         print("WARNING: %s" % msg)
@@ -139,16 +117,16 @@ def make_hocr_from_pnms(self, fns):
         pool = Pool(processes=self.threads, initializer=init_worker)
 
         try:
-            hocr_filenames = pool.map(unwrap_self, zip([self]*len(fns), fns))
+            hocr_filenames = pool.map(unwrap_self, list(zip([self]*len(fns), fns)))
             pool.close()
-        except KeyboardInterrupt or Exception:
+        except (KeyboardInterrupt, Exception):
             print("Caught keyboard interrupt... terminating")
             pool.terminate()
             raise
         finally:
             pool.join()
 
-        return zip(fns,hocr_filenames)
+        return list(zip(fns,hocr_filenames))
 
 
     def make_hocr_from_pnm(self, img_filename):
@@ -166,7 +144,7 @@ def make_hocr_from_pnm(self, img_filename):
             ret_output = subprocess.check_output(cmd, shell=True,  stderr=subprocess.STDOUT)
         except subprocess.CalledProcessError as e:
             # Could not run tesseract
-            print e.output
+            print(e.output)
             self._warn (self.msgs['TS_FAILED'])
                 
         if os.path.isfile(hocr_filename):
diff --git a/pypdfocr/pypdfocr_watcher.py b/pypdfocr/pypdfocr_watcher.py
index f7ef556..ec94b04 100755
--- a/pypdfocr/pypdfocr_watcher.py
+++ b/pypdfocr/pypdfocr_watcher.py
@@ -25,17 +25,19 @@ class PyPdfWatcher(FileSystemEventHandler):
         Every few seconds pop-off queue and if timestamp older than 3 seconds,
         process the file else, push it back onto queue.
     """
-    events = {}
-    events_lock = Lock()
 
     def __init__(self, monitor_dir, config):
         FileSystemEventHandler.__init__(self)
+        
+        self.events = {}
+        self.events_lock = Lock()
 
         self.monitor_dir = monitor_dir
         if not config: config = {}
 
         self.scan_interval = config.get('scan_interval', 3) # If no updates in 3 seconds (or user specified option in config file) process file
 
+
     def start(self):
         self.observer = Observer()
         self.observer.schedule(self, self.monitor_dir)
@@ -94,19 +96,19 @@ def check_for_new_pdf(self,ev_path):
         """
         if ev_path.endswith(".pdf"):
             if not ev_path.endswith(("_ocr.pdf", "_test.pdf")):
-                PyPdfWatcher.events_lock.acquire()
-                if not ev_path in PyPdfWatcher.events:
-                    PyPdfWatcher.events[ev_path] = time.time()
+                self.events_lock.acquire()
+                if not ev_path in self.events:
+                    self.events[ev_path] = time.time()
                     logging.info ("Adding %s to event queue" % ev_path)
                 else:
-                    if PyPdfWatcher.events[ev_path] == -1:
+                    if self.events[ev_path] == -1:
                         logging.info ( "%s removing from event queue" % (ev_path))
-                        del PyPdfWatcher.events[ev_path]
+                        del self.events[ev_path]
                     else: 
                         newTime = time.time()
                         logging.debug ( "%s already in event queue, updating timestamp to %d" % (ev_path, newTime))
-                        PyPdfWatcher.events[ev_path]  = newTime
-                PyPdfWatcher.events_lock.release()
+                        self.events[ev_path]  = newTime
+                self.events_lock.release()
 
                       
               
@@ -133,19 +135,18 @@ def check_queue(self):
             :returns: Filename if available to process, otherwise None.
         """
         now = time.time()
-        PyPdfWatcher.events_lock.acquire()
-        for monitored_file, timestamp in PyPdfWatcher.events.items():
-            if timestamp == -1:
-                del PyPdfWatcher.events[monitored_file]
-            elif now - timestamp > self.scan_interval:
+        self.events_lock.acquire()
+        self.events = {file:ts for file, ts in self.events.items() if ts != -1}
+        for monitored_file, timestamp in self.events.items():
+            if now - timestamp > self.scan_interval:
                 logging.info("Processing new file %s" % (monitored_file))
                 # Remove this file from the dict
-                del PyPdfWatcher.events[monitored_file]
+                del self.events[monitored_file]
                 monitored_file = self.rename_file_with_spaces(monitored_file)
-                PyPdfWatcher.events[monitored_file] = -1 # Add back into queue and mark as not needing further action in the event handler
-                PyPdfWatcher.events_lock.release()
+                self.events[monitored_file] = -1 # Add back into queue and mark as not needing further action in the event handler
+                self.events_lock.release()
                 return monitored_file
-        PyPdfWatcher.events_lock.release()
+        self.events_lock.release()
         return None
 
 
diff --git a/requirements.txt b/requirements.txt
index ae91a04..c086189 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,4 +2,4 @@ pillow>=2.2
 reportlab>=2.7
 watchdog>=0.6.0
 pypdf2>=1.23
-evernote
+evernote; python_version < '3'
diff --git a/test/test_evernote.py b/test/test_evernote.py
index d337ec4..9afbf02 100644
--- a/test/test_evernote.py
+++ b/test/test_evernote.py
@@ -2,13 +2,24 @@
 import pypdfocr.pypdfocr_filer_evernote as P
 import pytest
 import os
+import sys
 
-import evernote.api.client
-import evernote.edam.type.ttypes as Types
+if sys.version_info.major == 2:
+    import evernote.api.client
+    import evernote.edam.type.ttypes as Types
 import hashlib
 
 from mock import patch, call
 
+
+def test_import():
+    """Evernote filing enabled for py2 only"""
+    expect_enabled = sys.version_info.major == 2
+    assert P.ENABLED == expect_enabled
+
+
+@pytest.mark.skipif(sys.version_info.major>=3, 
+                    reason="Evernote API not compatible with py3.")
 class TestEvernote:
 
     def test_connecct(self):
@@ -22,23 +33,25 @@ def test_connecct(self):
     def test_file_original(self, mock_move):
         with patch("pypdfocr.pypdfocr_filer_evernote.EvernoteClient") as mock_evernote_client:
             p = P.PyFilerEvernote("TOKEN")
-            filename = os.path.join("pdfs","test_recipe.pdf")
+            filepath = os.path.dirname(__file__)
+            filename = os.path.join(filepath, "pdfs","test_recipe.pdf")
 
             # First, test code that does not move original
             p.file_original(filename)
             assert (not mock_move.called)
 
             # Now test moving
-            p.set_original_move_folder(os.path.join("temp", "original"))
+            p.set_original_move_folder(os.path.join(filepath, "temp", "original"))
             p.file_original(filename)
-            mock_move.assert_called_with(filename, os.path.join("temp","original", "test_recipe_2.pdf"))
+            mock_move.assert_called_with(filename, os.path.join(filepath, "temp","original", "test_recipe_2.pdf"))
 
     @patch('os.remove')
     def test_move_to_folder(self, mock_remove):
         with patch("pypdfocr.pypdfocr_filer_evernote.EvernoteClient") as mock_evernote_client:
             p = P.PyFilerEvernote("TOKEN")
-	    filename = os.path.join("pdfs", "test_recipe.pdf")
-            foldername = 'recipe'
+            filepath = os.path.dirname(__file__)
+            filename = os.path.join(filepath, "pdfs", "test_recipe.pdf")
+            foldername = os.path.join(filepath, 'recipe')
             with pytest.raises(AssertionError):
                 p.move_to_matching_folder(filename, foldername)
             p.set_target_folder('target')
@@ -61,7 +74,8 @@ def test_create_note(self):
             p = P.PyFilerEvernote("TOKEN")
             notebook = Types.Notebook()
             notebook.name = "recipe"
-            filename = "pdfs/test_recipe.pdf"
+            filepath = os.path.dirname(__file__)
+            filename = os.path.join(filepath, "pdfs/test_recipe.pdf")
             note = p._create_evernote_note(notebook, filename)
             xml = '<?xml version="1.0" encoding="UTF-8"?><!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">'
             assert(note.content.startswith(xml))
diff --git a/test/test_gs.py b/test/test_gs.py
index c092b4f..9bebb57 100644
--- a/test/test_gs.py
+++ b/test/test_gs.py
@@ -1,12 +1,9 @@
 #from pypdfocr import PyPDFOCR as P
-import pypdfocr.pypdfocr_gs as P
+from pypdfocr import pypdfocr_gs as P
 import pytest
 import os
 
-import hashlib
-
-from mock import patch, call
-from pytest import skip
+from mock import patch
 
 class TestGS:
 
diff --git a/test/test_option_parsing.py b/test/test_option_parsing.py
index b8ae055..c97e39d 100644
--- a/test/test_option_parsing.py
+++ b/test/test_option_parsing.py
@@ -1,5 +1,7 @@
-#from pypdfocr import PyPDFOCR as P
-import pypdfocr.pypdfocr as P
+import os
+import sys
+
+from pypdfocr import pypdfocr as P
 import pytest
 
 
@@ -37,11 +39,15 @@ def test_standalone_filing(self):
             self.p.get_options(opts)
 
         # Assert that it checks that the config file is present
-        opts.append('--config=test_option_config.yaml')
+        conf_path = os.path.join(
+            os.path.dirname(__file__), 'test_option_config.yaml')
+        opts.append('--config={}'.format(conf_path))
         self.p.get_options(opts)
         assert(self.p.enable_filing)
         assert(self.p.config)
 
+    @pytest.mark.skipif(sys.version_info.major>2,
+                        reason="Evernote disabled for py3")
     def test_standalone_filing_evernote(self):
         # Check when evernote is enabled
         opts = ["blah.pdf"]
@@ -50,7 +56,9 @@ def test_standalone_filing_evernote(self):
         with pytest.raises(SystemExit):
             self.p.get_options(opts)
 
-        opts.append('--config=test_option_config.yaml')
+        conf_path = os.path.join(
+            os.path.dirname(__file__), 'test_option_config.yaml')
+        opts.append('--config={}'.format(conf_path))
         self.p.get_options(opts)
         # Enabling -e should turn on filing too
         assert(self.p.enable_filing)
@@ -65,6 +73,21 @@ def test_standalone_filing_evernote(self):
         assert(self.p.config)
         assert(not self.p.watch)
 
+    @pytest.mark.skipif(sys.version_info.major==2,
+                        reason="Evernote works on py2")
+    def test_evernote_disabled(self):
+        opts = ["blah.pdf"]
+        opts.append('-e')
+        # Assert that it checks that the config file is present
+        with pytest.raises(SystemExit):
+            self.p.get_options(opts)
+
+        conf_path = os.path.join(
+            os.path.dirname(__file__), 'test_option_config.yaml')
+        opts.append('--config={}'.format(conf_path))
+        self.p.get_options(opts)
+        assert not self.p.enable_evernote
+
     def test_standalone_watch_conflict(self):
         # When pdf file is specified, we don't want to allow watch option
         opts = ["blah.pdf", '-w']
@@ -80,23 +103,30 @@ def test_watch_filing(self):
         opts = ['-w temp']
         self.p.get_options(opts)
         assert(self.p.watch_dir)
-
-        opts.append('--config=test_option_config.yaml')
+        conf_path = os.path.join(
+            os.path.dirname(__file__), 'test_option_config.yaml')
+        opts.append('--config={}'.format(conf_path))
         self.p.get_options(opts)
         assert(self.p.watch)
         assert(self.p.config)
         assert(not self.p.enable_filing)
         assert(not self.p.enable_evernote)
 
+    @pytest.mark.skipif(sys.version_info.major>2,
+                        reason="Evernote disabled for py3")
     def test_watch_filing_evernote(self):
-        opts = ['-w temp', '-e', '--config=test_option_config.yaml']
+        conf_path = os.path.join(
+            os.path.dirname(__file__), 'test_option_config.yaml')
+        opts = ['-w temp', '-e', '--config={}'.format(conf_path)]
         self.p.get_options(opts)
         assert(self.p.watch)
         assert(self.p.config)
         assert(self.p.enable_filing)
         assert(self.p.enable_evernote)
 
-        opts = ['-w temp', '-f', '-e',  '--config=test_option_config.yaml']
+        conf_path = os.path.join(
+            os.path.dirname(__file__), 'test_option_config.yaml')
+        opts = ['-w temp', '-f', '-e',  '--config={}'.format(conf_path)]
         self.p.get_options(opts)
         assert(self.p.watch)
         assert(self.p.config)
diff --git a/test/test_pdf_filer.py b/test/test_pdf_filer.py
index 9db7382..bdea966 100644
--- a/test/test_pdf_filer.py
+++ b/test/test_pdf_filer.py
@@ -1,12 +1,8 @@
 #from pypdfocr import PyPDFOCR as P
-import pypdfocr.pypdfocr as P
-import pytest
+from pypdfocr import pypdfocr as P
 import os
 
-import hashlib
-
 from mock import patch, call
-from pytest import skip
 
 class TestPDFFiler:
 
@@ -19,7 +15,9 @@ def test_file_by_filename(self, mock_move):
         # Mock the move function so we don't actually end up filing
         p = P.PyPDFOCR()
         cwd = os.getcwd()
-        filename = os.path.join("pdfs", "test_super_long_keyword.pdf")
+        filename = os.path.join(os.path.dirname(__file__),
+                                "pdfs",
+                                "test_super_long_keyword.pdf")
         out_filename = filename.replace(".pdf", "_ocr.pdf")
 
         if os.path.exists(out_filename):
@@ -27,7 +25,10 @@ def test_file_by_filename(self, mock_move):
 
         print("Current directory: %s" % os.getcwd())
         #opts = [filename, "--config=test_pypdfocr_config.yaml", "-f"]
-        opts = [filename, "--config=test_pypdfocr_config_filename.yaml", "-f", "-n"]
+        conf_path = os.path.join(
+            os.path.dirname(__file__), 'test_pypdfocr_config.yaml')
+
+        opts = [filename, "--config={}".format(conf_path), "-f", "-n"]
         p.go(opts)
 
         assert(os.path.exists(out_filename))
@@ -38,4 +39,4 @@ def test_file_by_filename(self, mock_move):
 
 
 
-        
+
diff --git a/test/test_pypdfocr.py b/test/test_pypdfocr.py
index f3c8db7..21d1c94 100644
--- a/test/test_pypdfocr.py
+++ b/test/test_pypdfocr.py
@@ -1,15 +1,11 @@
 #from pypdfocr import PyPDFOCR as P
-import pypdfocr.pypdfocr as P
+from pypdfocr import pypdfocr as P
 import pytest
 import os
 import logging
 
 from PyPDF2 import PdfFileReader
-import smtplib
-from mock import Mock
 from mock import patch, call
-from mock import MagicMock
-from mock import PropertyMock
 
 
 class TestPydfocr:
@@ -23,34 +19,62 @@ def _iter_pdf(self, filename):
             logging.debug("pdf scanner found %d pages in %s" % (reader.getNumPages(), filename))
             for pgnum in range(reader.getNumPages()):
                 text = reader.getPage(pgnum).extractText()
-                text = text.encode('ascii', 'ignore')
+                # text = text.encode('ascii', 'ignore')
                 text = text.replace('\n', ' ')
                 yield text
-    
+
+    filepath = os.path.dirname(__file__)
     pdf_tests = [
-            (".", os.path.join("temp","target","recipe"), os.path.join("..","test", "pdfs", "test_recipe.pdf"), [ ["Simply Recipes"],
-                                 ]),
-        (".", os.path.join("temp","target","patents"), os.path.join("pdfs","test_patent.pdf"), [ 
-                           ["asynchronous", "subject to", "20 Claims"], # Page 1
-                           ["FOREIGN PATENT" ], # Page 2
-                            ]),
-        (".", os.path.join("temp","target", "default"), os.path.join("pdfs","test_sherlock.pdf"), [ ["Bohemia", "Trincomalee"], # Page 1
-                           ["hundreds of times" ], # Page 2
-                           ]),
-        ("pdfs", os.path.join("temp","target","default"), "test_sherlock.pdf", [ ["Bohemia", "Trincomalee"], # Page 1
-                           ["hundreds of times" ], # Page 2
-                           ]),
-            (".", os.path.join("temp","target","recipe"), os.path.join("..","test", "pdfs", "1.pdf"), [ ["Simply","Recipes"],
-                                 ]),
-            (".", os.path.join("temp","target","recipe"), os.path.join("..","test", "pdfs", "test_recipe_sideways.pdf"), [ ["Simply","Recipes", 'spinach'],
-                                 ]),
+        (
+            filepath,
+            os.path.join(filepath, "temp","target","recipe"),
+            os.path.join(filepath, "pdfs", "test_recipe.pdf"),
+            [ ["Simply Recipes"],]),
+        (
+            filepath,
+            os.path.join(filepath, "temp","target","patents"),
+            os.path.join("pdfs","test_patent.pdf"),
+            [
+                ["asynchronous", "subject to", "20 Claims"], # Page 1
+                ["FOREIGN PATENT" ], # Page 2
+            ]),
+        (
+            filepath,
+            os.path.join(filepath, "temp","target", "default"),
+            os.path.join("pdfs","test_sherlock.pdf"),
+            [
+                ["Bohemia", "Trincomalee"], # Page 1
+                ["hundreds of times" ], # Page 2
+            ]),
+        (
+            os.path.join(filepath, "pdfs"),
+            os.path.join(filepath, "temp","target","default"),
+            "test_sherlock.pdf",
+            [
+                ["Bohemia", "Trincomalee"], # Page 1
+                ["hundreds of times" ], # Page 2
+            ]),
+        (
+            filepath,
+            os.path.join(filepath, "temp","target","recipe"),
+            os.path.join("..","test", "pdfs", "1.pdf"),
+            [
+                ["Simply","Recipes"],
+            ]),
+        (
+            filepath,
+            os.path.join(filepath, "temp","target","recipe"),
+            os.path.join("..","test", "pdfs", "test_recipe_sideways.pdf"),
+            [
+                ["Simply","Recipes", 'spinach'],
+            ]),
         ]
 
-    #@pytest.mark.skipif(True, reason="Just testing")
+    # @pytest.mark.skipif(True, reason="Just testing")
     @pytest.mark.parametrize("dirname, tgt_folder, filename, expected", pdf_tests)
     def test_standalone(self, dirname, tgt_folder, filename, expected):
         """
-            Test the single file conversion with no filing.  
+            Test the single file conversion with no filing.
             Tests relative paths (".."), files in subirs, and files in current dir
             Checks for that _ocr file is created and keywords found in pdf.
             Modify :attribute:`pdf_tests` for changing keywords, etc
@@ -61,9 +85,9 @@ def test_standalone(self, dirname, tgt_folder, filename, expected):
 
         # First redo the unix-style paths, in case we're running on windows
         # Assume paths in unix-style
-        dirname = os.path.join(*(dirname.split("/")))
-        tgt_folder = os.path.join(*(tgt_folder.split("/")))
-        filename = os.path.join(*(filename.split("/")))
+        # dirname = os.path.join(*(dirname.split("/")))
+        # tgt_folder = os.path.join(*(tgt_folder.split("/")))
+        # filename = os.path.join(*(filename.split("/")))
 
 
         cwd = os.getcwd()
@@ -77,12 +101,12 @@ def test_standalone(self, dirname, tgt_folder, filename, expected):
             if len(expected) > i:
                 for keyword in expected[i]:
                     assert(keyword in t)
-            print ("\n----------------------\nPage %d\n" % i)
-            print t
+            print("\n----------------------\nPage %d\n" % i)
+            print(t)
         os.remove(out_filename)
         os.chdir(cwd)
 
-    #@pytest.mark.skipif(True, reason="just testing")
+    # @pytest.mark.skipif(True, reason="just testing")
     @pytest.mark.parametrize("dirname, tgt_folder, filename, expected", [pdf_tests[0]])
     def test_standalone_email(self, dirname, tgt_folder, filename, expected):
         """
@@ -104,11 +128,11 @@ def test_standalone_email(self, dirname, tgt_folder, filename, expected):
                 if len(expected) > i:
                     for keyword in expected[i]:
                         assert(keyword in t)
-                print ("\n----------------------\nPage %d\n" % i)
-                print t
+                print("\n----------------------\nPage %d\n" % i)
+                print(t)
             os.remove(out_filename)
             os.chdir(cwd)
-            
+
             # Assert the smtp calls
             instance = mock_smtp.return_value
             assert(instance.starttls.called)
@@ -116,7 +140,10 @@ def test_standalone_email(self, dirname, tgt_folder, filename, expected):
             assert(instance.sendmail.called)
 
     @patch('shutil.move')
-    @pytest.mark.parametrize("config", [("test_pypdfocr_config.yaml"), ("test_pypdfocr_config_no_move_original.yaml")])
+    @pytest.mark.parametrize(
+        "config",
+        [(os.path.join(filepath, "test_pypdfocr_config.yaml")),
+         (os.path.join(filepath, "test_pypdfocr_config_no_move_original.yaml"))])
     @pytest.mark.parametrize("dirname, tgt_folder, filename, expected", pdf_tests[0:3])
     def test_standalone_filing(self, mock_move, config, dirname, tgt_folder, filename, expected):
         """
@@ -146,18 +173,18 @@ def test_standalone_filing(self, mock_move, config, dirname, tgt_folder, filenam
             if len(expected) > i:
                 for keyword in expected[i]:
                     assert(keyword in t)
-            print ("\n----------------------\nPage %d\n" % i)
-            print t
+            print("\n----------------------\nPage %d\n" % i)
+            print(t)
         os.remove(out_filename)
         os.chdir(cwd)
-        
+
         # Assert the smtp calls
         calls = [call(out_filename,
-                        os.path.abspath(os.path.join(tgt_folder,os.path.basename(out_filename))))]
+                      os.path.abspath(os.path.join(tgt_folder,os.path.basename(out_filename))))]
         if not "no_move_original" in config:
             new_file_name = os.path.basename(filename).replace(".pdf", "_2.pdf")
             calls.append(call(filename,
-                                os.path.abspath(os.path.join("temp","original", new_file_name))))
+                              os.path.abspath(os.path.join("test", "temp","original", new_file_name))))
         mock_move.assert_has_calls(calls)
 
     def test_set_binaries(self):
diff --git a/test/test_tesseract.py b/test/test_tesseract.py
index c137248..5f10131 100644
--- a/test/test_tesseract.py
+++ b/test/test_tesseract.py
@@ -1,11 +1,9 @@
 #from pypdfocr import PyPDFOCR as P
-import pypdfocr.pypdfocr_tesseract as P
+from pypdfocr import pypdfocr_tesseract as P
 import pytest
 import os
 
-import hashlib
-
-from mock import patch, call
+from mock import patch
 
 class TestTesseract:
 
@@ -72,7 +70,7 @@ def test_tesseract_presence(self, capsys):
 
     def test_tesseract_version(self, capsys):
         p = P.PyTesseract({})
-        p.required = "100"
+        p.required = "100.01"
         with pytest.raises(SystemExit):
             p.make_hocr_from_pnms("")
         out, err = capsys.readouterr()
diff --git a/test/test_watcher.py b/test/test_watcher.py
index 8470760..a7ae6ce 100644
--- a/test/test_watcher.py
+++ b/test/test_watcher.py
@@ -1,15 +1,11 @@
-#from pypdfocr import PyPDFOCR as P
-import pypdfocr.pypdfocr_watcher as P
+from pypdfocr import pypdfocr_watcher as P
 import pytest
 
-import evernote.api.client
-import evernote.edam.type.ttypes as Types
-import hashlib
 import time
 import os
 from collections import namedtuple
 
-from mock import patch, call
+from mock import patch
 
 class TestWatching:
 
@@ -23,20 +19,20 @@ class TestWatching:
 
     @patch('shutil.move')
     @pytest.mark.parametrize(("filename, expected"), filenames)
-    def test_rename(self, mock_move, filename, expected):
+    def test_rename(self, mock_move, filename, expected, tmpdir):
     
         if expected == None:
             expected = filename
 
-        p = P.PyPdfWatcher('temp',{})
+        p = P.PyPdfWatcher(str(tmpdir.mkdir("tmp")),{})
 
         # First, test code that does not move original
         ret = p.rename_file_with_spaces(filename)
         assert (ret==expected)
 
-    def test_check_for_new_pdf(self):
+    def test_check_for_new_pdf(self, tmpdir):
     
-        p = P.PyPdfWatcher('temp', {})
+        p = P.PyPdfWatcher(str(tmpdir.mkdir("tmp")), {})
         p.check_for_new_pdf("blah_ocr.pdf")
         assert("blah_ocr.pdf" not in p.events)
         p.check_for_new_pdf("blah.pdf")
@@ -49,8 +45,8 @@ def test_check_for_new_pdf(self):
         p.check_for_new_pdf("blah.pdf")
         assert(p.events['blah.pdf']-time.time() <=1) # Check that time stamp was updated
 
-    def test_events(self):
-        p = P.PyPdfWatcher('temp', {})
+    def test_events(self, tmpdir):
+        p = P.PyPdfWatcher(str(tmpdir.mkdir("tmp")), {})
 
         event = namedtuple('event', 'src_path, dest_path')
 
@@ -63,8 +59,9 @@ def test_events(self):
         p.on_modified(event(src_path='temp_recipe3.pdf', dest_path=None))
         assert('temp_recipe3.pdf' in p.events)
 
-    def test_check_queue(self):
-        p = P.PyPdfWatcher('temp', {})
+    def test_check_queue(self, tmpdir):
+        p = P.PyPdfWatcher(str(tmpdir.mkdir("tmp")), {})
+        assert p.events == {}
         now = time.time()
         p.events['blah.pdf'] = now
         f = p.check_queue()