diff --git a/.gitignore b/.gitignore index 572bb84..93989e4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,56 @@ dist /env/ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ diff --git a/ever2simple/converter.py b/ever2simple/converter.py index 4a3f849..0a511cb 100644 --- a/ever2simple/converter.py +++ b/ever2simple/converter.py @@ -1,8 +1,14 @@ +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import +from builtins import open +from builtins import str import json import os import sys from csv import DictWriter -from cStringIO import StringIO +from io import StringIO from dateutil.parser import parse from html2text import HTML2Text from lxml import etree @@ -15,7 +21,8 @@ class EverConverter(object): fieldnames = ['createdate', 'modifydate', 'content', 'tags'] date_fmt = '%h %d %Y %H:%M:%S' - def __init__(self, enex_filename, simple_filename=None, fmt='json'): + def __init__(self, enex_filename, simple_filename=None, fmt='json', + preserve_title=False, verbose=False): self.enex_filename = os.path.expanduser(enex_filename) self.stdout = False if simple_filename is None: @@ -24,14 +31,16 @@ def __init__(self, enex_filename, simple_filename=None, fmt='json'): else: self.simple_filename = os.path.expanduser(simple_filename) self.fmt = fmt + self.preserve_title = preserve_title + self.verbose = verbose def _load_xml(self, enex_file): try: parser = etree.XMLParser(huge_tree=True) xml_tree = etree.parse(enex_file, parser) - except (etree.XMLSyntaxError, ), e: - print 'Could not parse XML' - print e + except (etree.XMLSyntaxError, ) as e: + print('Could not parse XML') + print(e) sys.exit(1) return xml_tree @@ -41,6 +50,7 @@ def prepare_notes(self, xml_tree): for note in raw_notes: note_dict = {} title = note.xpath('title')[0].text + note_dict['title'] = title # Use dateutil to figure out these dates # 20110610T182917Z created_string = parse('19700101T000017Z') @@ -66,12 +76,14 @@ def prepare_notes(self, xml_tree): # ignoring the problem for now. converted_text = converted_text.encode('ascii', 'ignore') note_dict['content'] = converted_text + if self.verbose: + print("note_dict: {}".format(note_dict)) notes.append(note_dict) return notes def convert(self): if not os.path.exists(self.enex_filename): - print "File does not exist: %s" % self.enex_filename + print("File does not exist: {}".format(self.enex_filename)) sys.exit(1) # TODO: use with here, but pyflakes barfs on it enex_file = open(self.enex_filename) @@ -115,12 +127,37 @@ def _convert_dir(self, notes): if self.simple_filename is None: sys.stdout.write(json.dumps(notes)) else: - if os.path.exists(self.simple_filename) and not os.path.isdir(self.simple_filename): - print '"%s" exists but is not a directory. %s' % self.simple_filename + if (os.path.exists(self.simple_filename) and + not os.path.isdir(self.simple_filename)): + print('"{}" exists but is not a directory.'.format( + self.simple_filename)) sys.exit(1) elif not os.path.exists(self.simple_filename): os.makedirs(self.simple_filename) for i, note in enumerate(notes): - output_file_path = os.path.join(self.simple_filename, str(i) + '.txt') - with open(output_file_path, 'w') as output_file: - output_file.write(note['content'].encode(encoding='utf-8')) + if self.preserve_title: + # (nicholaskuechler) try to preserve the title, but replace + # spaces with underscores, replace forward slash with dash, + # and preserve the note number in case of duplicate titles. + note_title = note['title'] + # replace spaces with underscores + note_title = note_title.replace(' ', '_') + # replace forward slaces with dashes + note_title = note_title.replace('/', '-') + note_title = "%s-%s" % (note_title, i) + else: + note_title = str(i) + + try: + output_file_path = \ + os.path.join(self.simple_filename, note_title + '.txt') + with open(output_file_path, 'w') as output_file: + output_file.write(note['content']) + except Exception as e: + output_file_path = os.path.join( + self.simple_filename, + "title_fail" + '-' + str(i) + '.txt') + print("failed to use title for filename: {}".format(e)) + with open(output_file_path, 'w') as output_file: + output_file.write(note['content']) + diff --git a/ever2simple/core.py b/ever2simple/core.py index cfdcf33..2da5507 100644 --- a/ever2simple/core.py +++ b/ever2simple/core.py @@ -1,3 +1,7 @@ +from __future__ import print_function +from __future__ import unicode_literals +from __future__ import division +from __future__ import absolute_import import os import sys from ever2simple.converter import EverConverter @@ -5,19 +9,45 @@ def main(): - parser = argparse.ArgumentParser(prog=None, description="Convert Evernote.enex files to Markdown", formatter_class=argparse.ArgumentDefaultsHelpFormatter) - parser.add_argument('enex-file', help="the path to the Evernote.enex file") - parser.add_argument('-o', '--output', help="the path to the output file or directory, leave black to output to the terminal (stdout)") - parser.add_argument('-f', '--format', help="the output format, json, csv or a directory", choices=['json', 'csv', 'dir'], default='json') + parser = argparse.ArgumentParser( + prog=None, + description="Convert Evernote.enex files to Markdown", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + 'enex-file', + help="the path to the Evernote.enex file") + parser.add_argument( + '-o', '--output', + help=("the path to the output file or directory, leave blank to " + "output to the terminal (stdout)")) + parser.add_argument( + '-f', '--format', + help="the output format, json, csv or a directory", + choices=['json', 'csv', 'dir'], + default='json') + parser.add_argument( + '--preserve_title', + action="store_true", + help="Try to preserve the note title as the file name", + default=False) + parser.add_argument( + '-v', '--verbose', + action="store_true", + help="Increase output verbosity", + default=False) args = parser.parse_args() enex_file = vars(args)['enex-file'] output = args.output fmt = args.format + preserve_title = args.preserve_title + verbose = args.verbose filepath = os.path.expanduser(enex_file) + if not os.path.exists(filepath): - print 'File does not exist: %s' % filepath + print('File does not exist: {}'.format(filepath)) sys.exit(1) - converter = EverConverter(filepath, simple_filename=output, fmt=fmt) + converter = EverConverter(filepath, simple_filename=output, fmt=fmt, + preserve_title=preserve_title, verbose=verbose) converter.convert() sys.exit() diff --git a/setup.py b/setup.py index 07936af..20f7d93 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import setup, find_packages -version = '2.0' +version = '2.1' setup( name='ever2simple', @@ -24,7 +24,7 @@ zip_safe=False, install_requires=[ 'lxml', - 'python-dateutil<2.0', + 'python-dateutil>=2.0', 'html2text', ], entry_points="""