-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from ZacHooper/dev/cursor
perf: scan through rather than copying token list
- Loading branch information
Showing
12 changed files
with
164 additions
and
131 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# Python | ||
/__pycache__/ | ||
*.py[cod] | ||
venv/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import json | ||
from datetime import datetime as time | ||
|
||
|
||
def main(): | ||
# with open("data/citm_catalog.json", "r") as f: | ||
# with open("data/twitter.json", "r") as f: | ||
with open("data/canada_data.json", "r") as f: | ||
text = f.read() | ||
start = time.now() | ||
raw_data = json.loads(text) | ||
end = time.now() | ||
print("Time taken to parse JSON: ", (end - start) / 1000000, "ms") | ||
print("Time taken to parse JSON: ", (end - start), "ns") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,119 +1,102 @@ | ||
from json.types import JSON_QUOTE, JSON_WHITESPACE, JSON_SYNTAX, Value | ||
from json.types import ( | ||
JSON_QUOTE, | ||
JSON_WHITESPACE, | ||
JSON_SYNTAX, | ||
Value, | ||
JSON_NUMBER, | ||
JSON_ESCAPE, | ||
) | ||
|
||
|
||
@value | ||
struct LexResult: | ||
var value: Value | ||
var is_null: Bool | ||
|
||
fn __init__(inout self, value: Value, is_null: Bool): | ||
self.value = value | ||
self.is_null = is_null | ||
|
||
|
||
fn lex_string(inout string: String) raises -> LexResult: | ||
fn lex_string(string: String, inout position: Int) raises -> String: | ||
var json_string: String = "" | ||
var start_of_string = position | ||
|
||
if string[0] == JSON_QUOTE: | ||
string = string[1:] | ||
else: | ||
return LexResult(Value(None), True) | ||
var skip = False | ||
|
||
for i in range(len(string)): | ||
for i in range(len(string) - position): | ||
if skip: | ||
skip = False | ||
continue | ||
var c = string[i + start_of_string] | ||
# Handle empty string | ||
if string[i] == JSON_QUOTE and len(json_string) == 0: | ||
string = string[1:] | ||
return LexResult(Value(json_string), False) | ||
# Handle end of string. Make sure the previous character is not an escape character | ||
if string[i] == JSON_QUOTE and json_string[-1] != "\\": | ||
string = string[i + 1 :] | ||
return LexResult(Value(json_string), False) | ||
if c == JSON_QUOTE and len(json_string) == 0: | ||
position += 1 | ||
return json_string | ||
elif c == JSON_ESCAPE: | ||
# Add the escape character and the next character | ||
json_string += c | ||
json_string += string[i + start_of_string + 1] | ||
# Then skip the next character | ||
skip = True | ||
continue | ||
# Handle end of string | ||
elif c == JSON_QUOTE: | ||
position += i + 1 | ||
return json_string | ||
# Handle escape characters | ||
elif json_string == "\\" and string[i] != "\\": | ||
json_string = json_string[:-1] | ||
json_string += string[i] | ||
else: | ||
json_string += string[i] | ||
json_string += c | ||
|
||
raise Error("Expected end-of-string quote") | ||
|
||
|
||
fn lex_number(inout string: String) raises -> LexResult: | ||
fn lex_number(string: String, inout position: Int) raises -> Value: | ||
var json_number: String = "" | ||
var number_characters = "1234567890-e." | ||
var original_position = position | ||
|
||
for i in range(len(string)): | ||
var c = string[i] | ||
for i in range(len(string) - position): | ||
var c = string[i + original_position] | ||
if c in number_characters: | ||
json_number += c | ||
else: | ||
break | ||
|
||
# Remove the number from the full JSON String | ||
string = string[len(json_number) :] | ||
|
||
if not len(json_number): | ||
return LexResult(Value(None), True) | ||
position += len(json_number) | ||
|
||
if "." in json_number: | ||
var num = atof(json_number) | ||
return LexResult(Value(num), False) | ||
|
||
return LexResult(Value(atol(json_number)), False) | ||
|
||
|
||
fn lex_bool(inout string: String) -> LexResult: | ||
if string.startswith("true"): | ||
string = string[4:] | ||
return LexResult(Value(True), False) | ||
elif string.startswith("false"): | ||
string = string[5:] | ||
return LexResult(Value(False), False) | ||
else: | ||
return LexResult(Value(None), True) | ||
return Value(num) | ||
|
||
|
||
fn lex_null(inout string: String) -> LexResult: | ||
if string.startswith("null"): | ||
string = string[4:] | ||
return LexResult(Value(None), True) | ||
else: | ||
return LexResult(Value(None), False) | ||
return Value(atol(json_number)) | ||
|
||
|
||
fn lex(raw_string: String) raises -> List[Value]: | ||
var tokens = List[Value]() | ||
var string = raw_string | ||
var position: Int = 0 | ||
|
||
while len(string): | ||
var json_string = lex_string(string) | ||
if json_string.is_null == False: | ||
tokens.append(json_string.value) | ||
continue | ||
|
||
var json_number = lex_number(string) | ||
if json_number.is_null == False: | ||
tokens.append(json_number.value) | ||
continue | ||
|
||
var json_bool = lex_bool(string) | ||
if json_bool.is_null == False: | ||
tokens.append(json_bool.value) | ||
while position < len(string): | ||
if string[position] in JSON_WHITESPACE: | ||
position += 1 | ||
continue | ||
|
||
var json_null = lex_null(string) | ||
if json_null.is_null == True: | ||
elif string[position] == JSON_QUOTE: | ||
position += 1 | ||
var json_string = lex_string(string, position) | ||
tokens.append(Value(json_string)) | ||
elif string[position] in JSON_NUMBER: | ||
var json_number = lex_number(string, position) | ||
tokens.append(json_number) | ||
elif string[position] == "t": | ||
tokens.append(Value(True)) | ||
position += 4 | ||
elif string[position] == "f": | ||
tokens.append(Value(False)) | ||
position += 5 | ||
elif string[position] == "n": | ||
tokens.append(Value(None)) | ||
continue | ||
|
||
if string[0] in JSON_WHITESPACE: | ||
string = string[1:] | ||
elif string[0] in JSON_SYNTAX: | ||
tokens.append(Value(string[0])) | ||
string = string[1:] | ||
position += 4 | ||
elif string[position] in JSON_SYNTAX: | ||
tokens.append(Value(string[position])) | ||
position += 1 | ||
else: | ||
raise Error( | ||
"Unexpected character: " + string[0] + " Near: " + string[1:] | ||
"Unexpected character: " | ||
+ string[position] | ||
+ " Near: " | ||
+ string[position - 10 : position + 10] | ||
) | ||
|
||
return tokens |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.