diff --git a/README.md b/README.md index 5c780a5..c1b183e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -

Salve v0.5.2

+

Salve v0.6.0

# Installation @@ -9,7 +9,7 @@ In the Command Line, paste the following: `pip install salve_ipc` Salve is an IPC library that can be used by code editors to easily get autocompletions, replacements, editorconfig suggestions, definitions, and syntax highlighting. > **Note** -> The first time that the system is loaded or a new server needs to be started it will take a fair bit longer +> The first time that the system is loaded or a new server needs to be started it will take a fair bit longer. Additionally, any usage of IPC needs to eventually be called from an `if __name__ == "__main__":` block to prevent a multiproccesing error. ## Documentation diff --git a/salve_ipc/server_functions/highlight.py b/salve_ipc/server_functions/highlight.py index 938c8e6..946cb6f 100644 --- a/salve_ipc/server_functions/highlight.py +++ b/salve_ipc/server_functions/highlight.py @@ -1,9 +1,11 @@ -from re import Match, Pattern, compile +from re import MULTILINE, Match, Pattern, compile +from beartype.typing import Callable from pygments import lex -from pygments.lexer import Lexer +from pygments.lexer import Lexer, RegexLexer, default from pygments.lexers import get_lexer_by_name -from pygments.token import _TokenType +from pygments.token import Comment as CommentToken +from pygments.token import String as StringToken from .misc import Token, generic_tokens @@ -140,33 +142,207 @@ def find_hidden_chars(lines: list[str], start_line: int = 1) -> list[Token]: return tok_list +# Instantiate some useful variables/types for the following functions +useful_toks = { + StringToken.Doc, + StringToken.Heredoc, + CommentToken, + CommentToken.Multiline, +} + +# Beartype speed optimizations +_TokenType = type(StringToken) # Resolves to pygments.token._TokenType +_TokenTupleInternalType = tuple[_TokenType | Callable, ...] +_TokenTupleReturnType = list[tuple[str, _TokenType]] +_ListOfStrs = list[str] +_LexReturnTokens = list[tuple[_TokenType, str]] + + +def get_pygments_comment_regexes(lexer: RegexLexer) -> _TokenTupleReturnType: + """ + Steals the regexes that pgments uses to give docstring, heredoc, comment, and multiline comment highlights + (css comments, though multine, aren't called multiline comments) + """ + + regexes: _TokenTupleReturnType = [] + + for path in lexer.tokens: + # This should have a better type definition but I didn't have the mental capacity to + # write each possibility so I'm waiting for beartype to implement the functionality for me like the bum I am + path_tokens: list = lexer.tokens[path] + + if isinstance(path_tokens[0], str): + # This means that the path is redirecting to another path in its place but we check them all anyway so just exit this path + continue + + for token_tuple in path_tokens: + # Ensure that this is actually a tuple and not a random type + if isinstance(token_tuple, default): + continue + + if token_tuple[1] in useful_toks: + regexes.append((token_tuple[0], token_tuple[1])) + continue + + # The Token tuple SHOULD be a callable at this point + if not callable(token_tuple[1]): + continue + + pygments_func: Callable = token_tuple[1] + + if pygments_func.__closure__ is None: + # Will always evaluate to False but its for the static type checkers appeasement + continue + + tokens: _TokenTupleInternalType = [ + cell.cell_contents for cell in token_tuple[1].__closure__ + ][ + 0 + ] # Sometimes pygments hides these types in functional programming + + for token in tokens: + if token in useful_toks: + # We know if its in the useful tokens list that its a token type but the static type checker doesn't + regexes.append((token_tuple[0], token)) # type: ignore + continue + + return list(set(regexes)) # type: ignore + + +def proper_docstring_tokens(lexer: RegexLexer, full_text: str) -> list[Token]: + proper_highlight_regexes: _TokenTupleReturnType = ( + get_pygments_comment_regexes(lexer) + ) + + new_docstring_tokens: list[Token] = [] + split_text: _ListOfStrs = full_text.splitlines() + + for regex, token_type in proper_highlight_regexes: + current_text = full_text + match: Match[str] | None = compile(regex, flags=MULTILINE).search( + full_text + ) + + if match is None: + # Onwards to the next regex! + continue + + start_pos: tuple[int, int] = (1, 0) + simple_token_type: str = get_new_token_type(str(token_type)) + + while match: + span: tuple[int, int] = match.span() + matched_str: str = current_text[span[0] : span[1]] + + # Remove any whitespace previous to the match and update span accordingly + matched_len_initial: int = len(matched_str) + matched_str = matched_str.lstrip() + matched_len_lstripped: int = len(matched_str) + span = ( + (span[0] + matched_len_initial - matched_len_lstripped), + span[1], + ) + + # Other useful variables without relation + newline_count: int = matched_str.count("\n") + previous_text: str = current_text[: span[0]] + + start_line: int = previous_text.count("\n") + start_pos[0] + + # Deal with the easy case first + if not newline_count: + # Prepare token variables + start_col: int = split_text[start_line].find(matched_str) + current_text: str = full_text[span[0] + span[1] - span[0] :] + + # Create and add token + token: Token = ( + (start_line, start_col), + matched_len_lstripped, + simple_token_type, + ) + new_docstring_tokens.append(token) + + start_pos = (start_line, start_col + matched_len_lstripped) + current_text = current_text[: span[1]] + + # Continue onward! + match = compile(regex, flags=MULTILINE).search(current_text) + continue + + # Now for multiple line matches + split_match: list[str] = matched_str.splitlines() + for i in range(newline_count + 1): + match_str: str = split_match[i] + initial_len: int = len(match_str) + start_col: int = initial_len - len(match_str.lstrip()) + + if i == 0: + line: str = split_text[start_line - 1] + + true_len: int = len(line) + lstripped_len: int = len(line.lstrip()) + initial_len = lstripped_len + if lstripped_len != true_len: + # In case the regex doesn't skip whitespace/junk + initial_len = true_len + + start_col = line.find(match_str) + + # Create and add token + token: Token = ( + (start_line + i, start_col), + initial_len - start_col, + simple_token_type, + ) + new_docstring_tokens.append(token) + + start_pos = (start_line + i, start_col + len(match_str)) + + # Continue onward! + current_text = current_text[span[1] :] + match = compile(regex, flags=MULTILINE).search(current_text) + + return new_docstring_tokens + + def get_highlights( full_text: str, language: str = "text", text_range: tuple[int, int] = (1, -1), ) -> list[Token]: """Gets pygments tokens from text provided in language proved and converts them to Token's""" + + # Create some variables used all throughout the function lexer: Lexer = get_lexer_by_name(language) - split_text: list[str] = full_text.splitlines() + split_text: _ListOfStrs = full_text.splitlines() new_tokens: list[Token] = [] + if text_range[1] == -1: + # This indicates that the text range should span the length of the entire code text_range = (text_range[0], len(split_text)) + start_index: tuple[int, int] = (text_range[0], 0) - split_text = split_text[text_range[0] - 1 : text_range[1]] + # We want only the lines in the text range because this list is iterated + split_text: _ListOfStrs = split_text[text_range[0] - 1 : text_range[1]] for line in split_text: - og_tokens: list[tuple[_TokenType, str]] = list(lex(line, lexer)) + og_tokens: _LexReturnTokens = list(lex(line, lexer)) for token in og_tokens: new_type: str = get_new_token_type(str(token[0])) token_str: str = token[1] token_len: int = len(token_str) - if token_str == "\n": # Lexer adds the newline back + if token_str == "\n": + # Lexer adds the newline back as its own token continue + if not token_str.strip() and new_type == "Text": + # If the token is empty or is plain Text we simply skip it because thats ultimately useless info start_index = (start_index[0], start_index[1] + token_len) continue + # Create and append the Token that will be returned new_token = (start_index, token_len, new_type) new_tokens.append(new_token) @@ -174,8 +350,15 @@ def get_highlights( start_index = (start_index[0] + 1, 0) # Add extra token types + # NOTE: we add these at the end so that when they are applied one by one by the editor these + # override older tokens that may not be as accurate + + if isinstance(lexer, RegexLexer): + new_tokens += proper_docstring_tokens(lexer, full_text) + new_tokens += get_urls(split_text, text_range[0]) if [char for char in hidden_chars if char in full_text]: + # if there are not hidden chars we don't want to needlessly compute this new_tokens += find_hidden_chars(split_text, text_range[0]) return new_tokens diff --git a/setup.py b/setup.py index 51ca4a6..6a77db1 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,4 @@ -# pip install -r requirements.txt --break-system-packages; pip uninstall salve_ipc -y --break-system-packages; pip install . --break-system-packages --no-build-isolation; pytest . +# pip install -r requirements.txt --break-system-packages; pip uninstall salve_ipc -y --break-system-packages; pip install . --break-system-packages --no-build-isolation; python3 -m pytest . from setuptools import setup with open("README.md", "r") as file: @@ -7,7 +7,7 @@ setup( name="salve_ipc", - version="0.5.2", + version="0.6.0", description="Salve is an IPC library that can be used by code editors to easily get autocompletions, replacements, editorconfig suggestions, definitions, and syntax highlighting.", author="Moosems", author_email="moosems.j@gmail.com", diff --git a/tests/test_ipc.py b/tests/test_ipc.py index 5f24632..914d51c 100644 --- a/tests/test_ipc.py +++ b/tests/test_ipc.py @@ -55,7 +55,7 @@ def test_IPC(): "type": "response", "cancelled": False, "command": AUTOCOMPLETE, - "result": ["this"], + "result": ["test", "this"], } replacements_output: Response | None = context.get_response(REPLACEMENTS) @@ -102,52 +102,28 @@ def test_IPC(): ((8, 10), 3, "Name"), ((8, 13), 1, "Punctuation"), ((8, 14), 1, "Punctuation"), - ((9, 4), 3, "Keyword"), - ((9, 8), 8, "Name"), - ((9, 16), 1, "Punctuation"), - ((9, 17), 4, "Name"), - ((9, 21), 1, "Punctuation"), - ((9, 22), 1, "Punctuation"), - ((10, 8), 4, "Keyword"), - ((13, 0), 3, "Name"), - ((13, 3), 1, "Punctuation"), - ((13, 4), 1, "Punctuation"), - ((14, 0), 24, "Comment"), - ((14, 2), 22, "Link"), + ((9, 4), 3, "String"), + ((10, 4), 4, "Name"), + ((11, 4), 3, "String"), + ((13, 4), 3, "Keyword"), + ((13, 8), 8, "Name"), + ((13, 16), 1, "Punctuation"), + ((13, 17), 4, "Name"), + ((13, 21), 1, "Punctuation"), + ((13, 22), 1, "Punctuation"), + ((14, 8), 4, "Keyword"), + ((17, 0), 3, "Name"), + ((17, 3), 1, "Punctuation"), + ((17, 4), 1, "Punctuation"), + ((18, 0), 24, "Comment"), + ((9, 4), 3, "String"), + ((10, 4), 4, "String"), + ((11, 4), 3, "String"), + ((18, 2), 22, "Link"), ((5, 7), 1, "Hidden_Char"), ], } - editorconfig_response: Response | None = context.get_response(EDITORCONFIG) - if editorconfig_response is None: - raise AssertionError("Editorconfig output is None") - editorconfig_response["id"] = 0 - assert editorconfig_response == { - "id": 0, - "type": "response", - "cancelled": False, - "command": EDITORCONFIG, - "result": { - "end_of_line": "lf", - "insert_final_newline": "true", - "charset": "utf-8", - "indent_style": "space", - "indent_size": "4", - }, - } - - definition_response: Response | None = context.get_response(DEFINITION) - if definition_response is None: - raise AssertionError("Definition output is None") - definition_response["id"] = 0 - assert definition_response == { - "id": 0, - "type": "response", - "cancelled": False, - "command": DEFINITION, - "result": ((3, 0), 3, "Definition"), - } - context.remove_file("test") context.kill_IPC() diff --git a/tests/testing_file1.py b/tests/testing_file1.py index e5f5024..b40a7f8 100644 --- a/tests/testing_file1.py +++ b/tests/testing_file1.py @@ -6,6 +6,10 @@ class Foo(Bar): + """ + test + """ + def __init__(self): pass