From 9ca2857e63ceadb94d61927940f669cee96e9cb0 Mon Sep 17 00:00:00 2001 From: Nicholas Kuechler Date: Wed, 29 Jun 2016 16:11:42 -0500 Subject: [PATCH 1/5] Adds option to preserve title as file name. --- .gitignore | 56 ++++++++++++++++++++++++++++++++++++++-- ever2simple/converter.py | 43 +++++++++++++++++++++++++----- ever2simple/core.py | 36 ++++++++++++++++++++++---- setup.py | 2 +- 4 files changed, 123 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 572bb84..db4561e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,54 @@ -dist -/env/ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ diff --git a/ever2simple/converter.py b/ever2simple/converter.py index 4a3f849..6304628 100644 --- a/ever2simple/converter.py +++ b/ever2simple/converter.py @@ -15,7 +15,8 @@ class EverConverter(object): fieldnames = ['createdate', 'modifydate', 'content', 'tags'] date_fmt = '%h %d %Y %H:%M:%S' - def __init__(self, enex_filename, simple_filename=None, fmt='json'): + def __init__(self, enex_filename, simple_filename=None, fmt='json', + preserve_title=False, verbose=False): self.enex_filename = os.path.expanduser(enex_filename) self.stdout = False if simple_filename is None: @@ -24,6 +25,8 @@ def __init__(self, enex_filename, simple_filename=None, fmt='json'): else: self.simple_filename = os.path.expanduser(simple_filename) self.fmt = fmt + self.preserve_title = preserve_title + self.verbose = verbose def _load_xml(self, enex_file): try: @@ -41,6 +44,7 @@ def prepare_notes(self, xml_tree): for note in raw_notes: note_dict = {} title = note.xpath('title')[0].text + note_dict['title'] = title # Use dateutil to figure out these dates # 20110610T182917Z created_string = parse('19700101T000017Z') @@ -66,6 +70,8 @@ def prepare_notes(self, xml_tree): # ignoring the problem for now. converted_text = converted_text.encode('ascii', 'ignore') note_dict['content'] = converted_text + if self.verbose: + print "note_dict: %s" % (note_dict) notes.append(note_dict) return notes @@ -115,12 +121,37 @@ def _convert_dir(self, notes): if self.simple_filename is None: sys.stdout.write(json.dumps(notes)) else: - if os.path.exists(self.simple_filename) and not os.path.isdir(self.simple_filename): - print '"%s" exists but is not a directory. %s' % self.simple_filename + if (os.path.exists(self.simple_filename) and + not os.path.isdir(self.simple_filename)): + print ('"%s" exists but is not a directory. %s' + % self.simple_filename) sys.exit(1) elif not os.path.exists(self.simple_filename): os.makedirs(self.simple_filename) for i, note in enumerate(notes): - output_file_path = os.path.join(self.simple_filename, str(i) + '.txt') - with open(output_file_path, 'w') as output_file: - output_file.write(note['content'].encode(encoding='utf-8')) + if self.preserve_title: + # (nicholaskuechler) try to preserve the title, but replace + # spaces with underscores, replace forward slash with dash, + # and preserve the note number in case of duplicate titles. + note_title = note['title'] + # replace spaces with underscores + note_title = note_title.replace(' ', '_') + # replace forward slaces with dashes + note_title = note_title.replace('/', '-') + note_title = "%s-%s" % (note_title, i) + else: + note_title = str(i) + + try: + output_file_path = \ + os.path.join(self.simple_filename, note_title + '.txt') + with open(output_file_path, 'w') as output_file: + output_file.write( + note['content'].encode(encoding='utf-8')) + except: + output_file_path = os.path.join( + self.simple_filename, + "title_fail" + '-' + str(i) + '.txt') + with open(output_file_path, 'w') as output_file: + output_file.write( + note['content'].encode(encoding='utf-8')) diff --git a/ever2simple/core.py b/ever2simple/core.py index cfdcf33..be6d02f 100644 --- a/ever2simple/core.py +++ b/ever2simple/core.py @@ -5,19 +5,45 @@ def main(): - parser = argparse.ArgumentParser(prog=None, description="Convert Evernote.enex files to Markdown", formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('enex-file', help="the path to the Evernote.enex file") - parser.add_argument('-o', '--output', help="the path to the output file or directory, leave black to output to the terminal (stdout)") - parser.add_argument('-f', '--format', help="the output format, json, csv or a directory", choices=['json', 'csv', 'dir'], default='json') + parser = argparse.ArgumentParser( + prog=None, + description="Convert Evernote.enex files to Markdown", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + 'enex-file', + help="the path to the Evernote.enex file") + parser.add_argument( + '-o', '--output', + help=("the path to the output file or directory, leave blank to " + "output to the terminal (stdout)")) + parser.add_argument( + '-f', '--format', + help="the output format, json, csv or a directory", + choices=['json', 'csv', 'dir'], + default='json') + parser.add_argument( + '--preserve_title', + action="store_true", + help="Try to preserve the note title as the file name", + default=False) + parser.add_argument( + '-v', '--verbose', + action="store_true", + help="Increase output verbosity", + default=False) args = parser.parse_args() enex_file = vars(args)['enex-file'] output = args.output fmt = args.format + preserve_title = args.preserve_title + verbose = args.verbose filepath = os.path.expanduser(enex_file) + if not os.path.exists(filepath): print 'File does not exist: %s' % filepath sys.exit(1) - converter = EverConverter(filepath, simple_filename=output, fmt=fmt) + converter = EverConverter(filepath, simple_filename=output, fmt=fmt, + preserve_title=preserve_title, verbose=verbose) converter.convert() sys.exit() diff --git a/setup.py b/setup.py index 07936af..e117600 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import setup, find_packages -version = '2.0' +version = '2.1' setup( name='ever2simple', From 1ed23a3a900880855b76902ad0a3ffaaf437192f Mon Sep 17 00:00:00 2001 From: tedder Date: Wed, 6 Jul 2016 08:36:45 -0700 Subject: [PATCH 2/5] py3 compatability - fully compatible now. - flipped the dependency on dateutil, as 1.5.x is incompatible with py3. - bumped version num to 2.1. --- .gitignore | 2 ++ ever2simple/converter.py | 20 ++++++++++++++------ ever2simple/core.py | 8 +++++++- setup.py | 4 ++-- 4 files changed, 25 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 572bb84..1f5404c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ dist /env/ +ever2simple.egg-info +ever2simple/__pycache__ diff --git a/ever2simple/converter.py b/ever2simple/converter.py index 4a3f849..628c76a 100644 --- a/ever2simple/converter.py +++ b/ever2simple/converter.py @@ -1,8 +1,16 @@ +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from builtins import open +from builtins import str +from future import standard_library +standard_library.install_aliases() import json import os import sys from csv import DictWriter -from cStringIO import StringIO +from io import StringIO from dateutil.parser import parse from html2text import HTML2Text from lxml import etree @@ -29,9 +37,9 @@ def _load_xml(self, enex_file): try: parser = etree.XMLParser(huge_tree=True) xml_tree = etree.parse(enex_file, parser) - except (etree.XMLSyntaxError, ), e: - print 'Could not parse XML' - print e + except (etree.XMLSyntaxError, ) as e: + print('Could not parse XML') + print(e) sys.exit(1) return xml_tree @@ -71,7 +79,7 @@ def prepare_notes(self, xml_tree): def convert(self): if not os.path.exists(self.enex_filename): - print "File does not exist: %s" % self.enex_filename + print("File does not exist: %s" % self.enex_filename) sys.exit(1) # TODO: use with here, but pyflakes barfs on it enex_file = open(self.enex_filename) @@ -116,7 +124,7 @@ def _convert_dir(self, notes): sys.stdout.write(json.dumps(notes)) else: if os.path.exists(self.simple_filename) and not os.path.isdir(self.simple_filename): - print '"%s" exists but is not a directory. %s' % self.simple_filename + print('"%s" exists but is not a directory. %s' % self.simple_filename) sys.exit(1) elif not os.path.exists(self.simple_filename): os.makedirs(self.simple_filename) diff --git a/ever2simple/core.py b/ever2simple/core.py index cfdcf33..9bae8a1 100644 --- a/ever2simple/core.py +++ b/ever2simple/core.py @@ -1,3 +1,9 @@ +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from future import standard_library +standard_library.install_aliases() import os import sys from ever2simple.converter import EverConverter @@ -15,7 +21,7 @@ def main(): fmt = args.format filepath = os.path.expanduser(enex_file) if not os.path.exists(filepath): - print 'File does not exist: %s' % filepath + print('File does not exist: {}'.format(filepath)) sys.exit(1) converter = EverConverter(filepath, simple_filename=output, fmt=fmt) converter.convert() diff --git a/setup.py b/setup.py index 07936af..20f7d93 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import setup, find_packages -version = '2.0' +version = '2.1' setup( name='ever2simple', @@ -24,7 +24,7 @@ zip_safe=False, install_requires=[ 'lxml', - 'python-dateutil<2.0', + 'python-dateutil>=2.0', 'html2text', ], entry_points=""" From 5631dc68055d005e4fea665aac20c1a91b8bc2da Mon Sep 17 00:00:00 2001 From: tedder Date: Wed, 6 Jul 2016 08:44:03 -0700 Subject: [PATCH 3/5] remove dangerous install_aliases (imports from other packages) --- ever2simple/converter.py | 2 -- ever2simple/core.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/ever2simple/converter.py b/ever2simple/converter.py index 628c76a..4cbfb18 100644 --- a/ever2simple/converter.py +++ b/ever2simple/converter.py @@ -4,8 +4,6 @@ from __future__ import absolute_import from builtins import open from builtins import str -from future import standard_library -standard_library.install_aliases() import json import os import sys diff --git a/ever2simple/core.py b/ever2simple/core.py index 9bae8a1..0dca47b 100644 --- a/ever2simple/core.py +++ b/ever2simple/core.py @@ -2,8 +2,6 @@ from __future__ import unicode_literals from __future__ import division from __future__ import absolute_import -from future import standard_library -standard_library.install_aliases() import os import sys from ever2simple.converter import EverConverter From 988706151f8ddb74b4d6b4f5f071e3ff4cfaa646 Mon Sep 17 00:00:00 2001 From: tedder Date: Wed, 6 Jul 2016 09:00:18 -0700 Subject: [PATCH 4/5] remove encoding; works with both py2/py3 now --- ever2simple/converter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ever2simple/converter.py b/ever2simple/converter.py index daa7fd5..cf91919 100644 --- a/ever2simple/converter.py +++ b/ever2simple/converter.py @@ -159,5 +159,5 @@ def _convert_dir(self, notes): self.simple_filename, "title_fail" + '-' + str(i) + '.txt') with open(output_file_path, 'w') as output_file: - output_file.write( - note['content'].encode(encoding='utf-8')) + output_file.write(note['content']) + From b7325054764c1a180e3b1475c65210ce23793f9c Mon Sep 17 00:00:00 2001 From: tedder Date: Wed, 6 Jul 2016 09:06:06 -0700 Subject: [PATCH 5/5] remove encoding here too; was silently catching exception --- ever2simple/converter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ever2simple/converter.py b/ever2simple/converter.py index cf91919..0a511cb 100644 --- a/ever2simple/converter.py +++ b/ever2simple/converter.py @@ -152,12 +152,12 @@ def _convert_dir(self, notes): output_file_path = \ os.path.join(self.simple_filename, note_title + '.txt') with open(output_file_path, 'w') as output_file: - output_file.write( - note['content'].encode(encoding='utf-8')) - except: + output_file.write(note['content']) + except Exception as e: output_file_path = os.path.join( self.simple_filename, "title_fail" + '-' + str(i) + '.txt') + print("failed to use title for filename: {}".format(e)) with open(output_file_path, 'w') as output_file: output_file.write(note['content'])