Skip to content

Commit

Permalink
done with version 1.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
FlorianDietz committed Nov 10, 2019
1 parent 29f0fe8 commit 59fcc98
Show file tree
Hide file tree
Showing 6 changed files with 320 additions and 0 deletions.
Binary file added .DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
include LICENSE
include README.md
30 changes: 30 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,32 @@
# loosejson

A JSON parser for Python that shows more useful error messages and fixes minor formatting errors automatically.

If you use JSON files a lot, it can get very frustrating to get an error just because you failed to delete a trailing comma, or to be forced to write unreadable multi-line strings without linebreaks, or to get error messages that don't tell you directly what is actually causing the error.

Since I needed to make this process as user-friendly as possible for my startup elody.com, I wrote this convenience library to make it easier.

## Features

* Supports Unicode (implicitly, since it just uses whatever string format python is using)
* Supports escape characters like normal JSON does.
* Supports extra commas
* Supports unquoted strings for several kinds of characters that can often occur in Rules and Options
* Supports linebreaks in quoted strings (they are treated the same as writing \n, except that any spaces and tabs following them are ignored.)
* Supports both ' and " as quotation marks
* Supports both null and None, so it works for parsing both Javascript and Python
* Supports True/true, False/false
* Does not support infinite or NaN numbers
* Has useful error messages

This works for Python 2.7 and 3+.

## Installation

`pip install loosejson`

## Usage

`from loosejson import loosejson`

Just run `loosejson.parse_loosely_defined_json(text)` on a string. It returns a standard json object, just like json.loads(text), but with less of a headache.
1 change: 1 addition & 0 deletions loosejson/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

270 changes: 270 additions & 0 deletions loosejson/loosejson.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
import ast
import json
import math
import re
import sys
import traceback

from six import string_types

##############################################################################################################
# This is a JSON parser that isn't as strict as the normal json libraries.
# I can't believe a library for this didn't already exist and I had to write it myself...
##############################################################################################################


class JsonParsingException(Exception):
pass


def parse_loosely_defined_json(text):
"""
This function parses a string that represents a JSON object and isn't as strict as the normal json libraries.
It has the following features:
* Supports Unicode (implicitly, since it just uses whatever string format python is using)
* Supports escape characters like normal JSON does.
* Supports extra commas
* Supports unquoted strings for several kinds of characters that can often occur in Rules and Options
* Supports linebreaks in quoted strings (they are treated the same as writing \n, except that any spaces and tabs following them are ignored.)
* Supports both ' and " as quotation marks
* Supports both null and None, so it works for parsing both Javascript and Python
* Supports True/true and False/false for the same reason
* Does not support infinite or NaN numbers
* Has useful error messages
"""
parser = LooseJsonParser(text)
raised_error = None
try:
res = parser.get_object()
except Exception as e:
# don't raise the exception here directly because python will "helpfully" chain the exceptions together
raised_error = e
raised_error_details = get_error_message_details()
if raised_error is not None:
raise JsonParsingException("exception while parsing text into JSON format.\nException occured at line %d, column %d, for character '%s':\n%s" % (parser.line, parser.col, parser.chars[parser.pos], str(raised_error),))
# convert to JSON string and back again, just to be sure it works and any error arises now and not later
res = json.loads(json.dumps(res))
return res


class LooseJsonParser:
def __init__(self, text):
self.pos = 0
self.line = 1
self.col = 1
self.chars = list(text)
self.unquoted_characters = '[a-zA-Z0-9.?!\-_]'
self.EOF = object()
self.chars.append(self.EOF)
def get_object(self):
"""
Starting at the current position, continues parsing new characters until it has parsed a complete object, then returns that object.
When this starts, self.pos should be at the first character of the object (or leading whitespace)
and when it returns self.pos will be at the last character of the object.
"""
task = None
while self.pos < len(self.chars):
char = self.chars[self.pos]
if char == self.EOF:
raise JsonParsingException("reached the end of the file without encountering anything to parse.")
# update line and column on a linebreak
if char == '\n':
self.line += 1
self.col = 1
# how to handle the character depends on what is currently being done
if task is None:
if re.match('\s', char):
# while there is no task yet, ignore whitespace and continue looking for an object
pass
elif char == '[':
task = 'building_list'
res_builder = []
expecting_comma = False
elif char == '{':
task = 'building_dict'
res_builder = {}
stage = 'expecting_key'
elif char == '"':
task = 'building_primitive'
quote_type = 'double_quotes'
res_builder = []
string_escape = False
elif char == "'":
task = 'building_primitive'
quote_type = 'single_quotes'
res_builder = []
string_escape = False
elif re.match(self.unquoted_characters, char):
task = 'building_primitive'
quote_type = 'no_quotes'
res_builder = [char]
string_escape = False
is_finished, res = self._unquoted_text_lookahead_and_optionally_finish(res_builder)
if is_finished:
return res
else:
raise JsonParsingException("reached an unexpected character while looking for the start of the next object: %s" % char)
elif task == 'building_list':
if re.match('\s', char):
pass # skip whitespace in a list
elif char == ',':
if expecting_comma:
expecting_comma = False
else:
raise JsonParsingException("encountered multiple commas after another while parsing a list. Did you forget a list element?")
elif char == ']':
# the end of the list has been reached.
return res_builder
else:
if expecting_comma:
raise JsonParsingException("expected a comma before the next list element.")
else:
# recurse to get the next element
next_list_element = self.get_object()
res_builder.append(next_list_element)
expecting_comma = True
elif task == 'building_dict':
if re.match('\s', char):
pass # skip whitespace in a dictionary
elif char == '}':
if stage in ['expecting_key', 'expecting_comma']:
return res_builder
else:
raise JsonParsingException("the dictionary was closed too early. It's missing a value to go with the last key.")
else:
if stage == 'expecting_key':
# recurse to get the next element, and verify it's a string and it's new
next_dict_key = self.get_object()
if not isinstance(next_dict_key, string_types):
# if the key is not a string, but is a primitive, coerce it into a string representing the JSON object
# (this uses str(json.dumps(next_dict_key)) instead of just str() so that None/null get turned to 'null' instead of 'None')
if isinstance(next_dict_key, (int, float, bool)):
next_dict_key = str(json.dumps(next_dict_key))
if next_dict_key in res_builder:
raise JsonParsingException("this string has already been used as a key of this dictionary. No duplicate keys are allowed:\n%s" % next_dict_key)
stage = 'expecting_colon'
elif stage == 'expecting_colon':
if char == ':':
stage = 'expecting_value'
else:
raise JsonParsingException("expected a colon separating the dictionary's key from its value")
elif stage == 'expecting_value':
# recurse to get the next element
next_dict_value = self.get_object()
res_builder[next_dict_key] = next_dict_value
stage = 'expecting_comma'
elif stage == 'expecting_comma':
if char == ',':
stage = 'expecting_key'
else:
raise JsonParsingException("expected a comma before the next dictionary key.")
else:
raise Exception("Programming error: undefined stage of dictionary parsing: %s" % stage)
elif task == 'building_primitive':
if quote_type in ['double_quotes', 'single_quotes']:
if quote_type == 'double_quotes':
limiting_quote = '"'
else:
limiting_quote = "'"
if char == limiting_quote and not string_escape:
# the end of the string has been reached. Build the string.
# before evaluating the string, do some preprocessing that makes linebreaks possible
tmp = []
encountered_linebreak = False
for chr in res_builder:
if chr == '\n':
encountered_linebreak = True
tmp.append('\\')
tmp.append('n')
elif (chr == ' ' or chr == '\t') and encountered_linebreak:
# ignore any spaces and tabs following a linebreak
pass
else:
encountered_linebreak = False
tmp.append(chr)
# combine the characters into a string and evaluate it
res = "".join(tmp)
res = ast.literal_eval(limiting_quote + res + limiting_quote)
return res
# add the current character to the list
# (we already know it's valid because of an earlier call to
# self._unquoted_text_lookahead_and_optionally_finish())
res_builder.append(char)
# if a backslash occurs, enter escape mode unless escape mode is already active,
# else deactivate escape mode
if char == '\\' and not string_escape:
string_escape = True
else:
string_escape = False
elif quote_type == 'no_quotes':
if not re.match(self.unquoted_characters, char):
raise Exception("Programming error: this should have never been reached because of _unquoted_text_lookahead_and_optionally_finish().")
# add the element
res_builder.append(char)
# look ahead, and possibly finish up
is_finished, res = self._unquoted_text_lookahead_and_optionally_finish(res_builder)
if is_finished:
return res
else:
raise Exception("Programming error: undefined kind of string quotation: %s" % quote_type)
else:
raise Exception("Programming error: undefined task: %s" % task)
# increment the position and column
self.pos += 1
self.col += 1
raise JsonParsingException("Programming Error: reached the end of the file, but this should have been noticed earlier, when reaching the self.EOF object.")
def _unquoted_text_lookahead_and_optionally_finish(self, res_builder):
"""
Check if the next position is EOF or a character that is invalid for unquoted objects.
If so, finish up and return the unquoted object.
"""
next_char = self.chars[self.pos+1]
if next_char != self.EOF and re.match(self.unquoted_characters, next_char):
return (False, None)
# we have encountered a value that is not a valid part of the parser
# try parsing the result in various ways before returning it
res = "".join(res_builder)
# booleans
if res in ['true', 'True']:
return (True, True)
if res in ['false', 'False']:
return (True, False)
# null / None
if res in ['null', 'None']:
return (True, None)
# int
try:
return (True, int(res))
except:
pass
# float
error = None
try:
flt = float(res)
if math.isnan(flt) or math.isinf(flt):
error = "NaN and infinite are not valid JSON values!"
else:
return (True, flt)
except:
pass
if error is not None:
raise JsonParsingException(error)
# default: string
return (True, res)


def get_error_message_details(exception=None):
"""
Get a nicely formatted string for an error message collected with sys.exc_info().
"""
if exception is None:
exception = sys.exc_info()
exc_type, exc_obj, exc_trace = exception
trace = traceback.extract_tb(exc_trace)
error_msg = "Traceback is:\n"
for (file,linenumber,affected,line) in trace:
error_msg += "\t> Error at function %s\n" % (affected)
error_msg += "\t At: %s:%s\n" % (file,linenumber)
error_msg += "\t Source: %s\n" % (line)
error_msg += "%s\n%s" % (exc_type, exc_obj,)
return error_msg
17 changes: 17 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from distutils.core import setup
setup(
name = 'loosejson',
packages = ['loosejson'],
version = '1.0.0',
description = 'A library containing basic code useful when creating Docker Images for elody.com',
long_description = 'A library containing basic code useful when creating Docker Images for elody.com',
author = 'Florian Dietz',
author_email = '[email protected]',
license = 'MIT',
package_data={
'': ['*.txt'],
},
install_requires=[
'six==1.11.0',
],
)

0 comments on commit 59fcc98

Please sign in to comment.