diff --git a/README.md b/README.md
index 5c780a5..c1b183e 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-
Salve v0.5.2
+Salve v0.6.0
# Installation
@@ -9,7 +9,7 @@ In the Command Line, paste the following: `pip install salve_ipc`
Salve is an IPC library that can be used by code editors to easily get autocompletions, replacements, editorconfig suggestions, definitions, and syntax highlighting.
> **Note**
-> The first time that the system is loaded or a new server needs to be started it will take a fair bit longer
+> The first time that the system is loaded or a new server needs to be started it will take a fair bit longer. Additionally, any usage of IPC needs to eventually be called from an `if __name__ == "__main__":` block to prevent a multiproccesing error.
## Documentation
diff --git a/salve_ipc/server_functions/highlight.py b/salve_ipc/server_functions/highlight.py
index 938c8e6..946cb6f 100644
--- a/salve_ipc/server_functions/highlight.py
+++ b/salve_ipc/server_functions/highlight.py
@@ -1,9 +1,11 @@
-from re import Match, Pattern, compile
+from re import MULTILINE, Match, Pattern, compile
+from beartype.typing import Callable
from pygments import lex
-from pygments.lexer import Lexer
+from pygments.lexer import Lexer, RegexLexer, default
from pygments.lexers import get_lexer_by_name
-from pygments.token import _TokenType
+from pygments.token import Comment as CommentToken
+from pygments.token import String as StringToken
from .misc import Token, generic_tokens
@@ -140,33 +142,207 @@ def find_hidden_chars(lines: list[str], start_line: int = 1) -> list[Token]:
return tok_list
+# Instantiate some useful variables/types for the following functions
+useful_toks = {
+ StringToken.Doc,
+ StringToken.Heredoc,
+ CommentToken,
+ CommentToken.Multiline,
+}
+
+# Beartype speed optimizations
+_TokenType = type(StringToken) # Resolves to pygments.token._TokenType
+_TokenTupleInternalType = tuple[_TokenType | Callable, ...]
+_TokenTupleReturnType = list[tuple[str, _TokenType]]
+_ListOfStrs = list[str]
+_LexReturnTokens = list[tuple[_TokenType, str]]
+
+
+def get_pygments_comment_regexes(lexer: RegexLexer) -> _TokenTupleReturnType:
+ """
+ Steals the regexes that pgments uses to give docstring, heredoc, comment, and multiline comment highlights
+ (css comments, though multine, aren't called multiline comments)
+ """
+
+ regexes: _TokenTupleReturnType = []
+
+ for path in lexer.tokens:
+ # This should have a better type definition but I didn't have the mental capacity to
+ # write each possibility so I'm waiting for beartype to implement the functionality for me like the bum I am
+ path_tokens: list = lexer.tokens[path]
+
+ if isinstance(path_tokens[0], str):
+ # This means that the path is redirecting to another path in its place but we check them all anyway so just exit this path
+ continue
+
+ for token_tuple in path_tokens:
+ # Ensure that this is actually a tuple and not a random type
+ if isinstance(token_tuple, default):
+ continue
+
+ if token_tuple[1] in useful_toks:
+ regexes.append((token_tuple[0], token_tuple[1]))
+ continue
+
+ # The Token tuple SHOULD be a callable at this point
+ if not callable(token_tuple[1]):
+ continue
+
+ pygments_func: Callable = token_tuple[1]
+
+ if pygments_func.__closure__ is None:
+ # Will always evaluate to False but its for the static type checkers appeasement
+ continue
+
+ tokens: _TokenTupleInternalType = [
+ cell.cell_contents for cell in token_tuple[1].__closure__
+ ][
+ 0
+ ] # Sometimes pygments hides these types in functional programming
+
+ for token in tokens:
+ if token in useful_toks:
+ # We know if its in the useful tokens list that its a token type but the static type checker doesn't
+ regexes.append((token_tuple[0], token)) # type: ignore
+ continue
+
+ return list(set(regexes)) # type: ignore
+
+
+def proper_docstring_tokens(lexer: RegexLexer, full_text: str) -> list[Token]:
+ proper_highlight_regexes: _TokenTupleReturnType = (
+ get_pygments_comment_regexes(lexer)
+ )
+
+ new_docstring_tokens: list[Token] = []
+ split_text: _ListOfStrs = full_text.splitlines()
+
+ for regex, token_type in proper_highlight_regexes:
+ current_text = full_text
+ match: Match[str] | None = compile(regex, flags=MULTILINE).search(
+ full_text
+ )
+
+ if match is None:
+ # Onwards to the next regex!
+ continue
+
+ start_pos: tuple[int, int] = (1, 0)
+ simple_token_type: str = get_new_token_type(str(token_type))
+
+ while match:
+ span: tuple[int, int] = match.span()
+ matched_str: str = current_text[span[0] : span[1]]
+
+ # Remove any whitespace previous to the match and update span accordingly
+ matched_len_initial: int = len(matched_str)
+ matched_str = matched_str.lstrip()
+ matched_len_lstripped: int = len(matched_str)
+ span = (
+ (span[0] + matched_len_initial - matched_len_lstripped),
+ span[1],
+ )
+
+ # Other useful variables without relation
+ newline_count: int = matched_str.count("\n")
+ previous_text: str = current_text[: span[0]]
+
+ start_line: int = previous_text.count("\n") + start_pos[0]
+
+ # Deal with the easy case first
+ if not newline_count:
+ # Prepare token variables
+ start_col: int = split_text[start_line].find(matched_str)
+ current_text: str = full_text[span[0] + span[1] - span[0] :]
+
+ # Create and add token
+ token: Token = (
+ (start_line, start_col),
+ matched_len_lstripped,
+ simple_token_type,
+ )
+ new_docstring_tokens.append(token)
+
+ start_pos = (start_line, start_col + matched_len_lstripped)
+ current_text = current_text[: span[1]]
+
+ # Continue onward!
+ match = compile(regex, flags=MULTILINE).search(current_text)
+ continue
+
+ # Now for multiple line matches
+ split_match: list[str] = matched_str.splitlines()
+ for i in range(newline_count + 1):
+ match_str: str = split_match[i]
+ initial_len: int = len(match_str)
+ start_col: int = initial_len - len(match_str.lstrip())
+
+ if i == 0:
+ line: str = split_text[start_line - 1]
+
+ true_len: int = len(line)
+ lstripped_len: int = len(line.lstrip())
+ initial_len = lstripped_len
+ if lstripped_len != true_len:
+ # In case the regex doesn't skip whitespace/junk
+ initial_len = true_len
+
+ start_col = line.find(match_str)
+
+ # Create and add token
+ token: Token = (
+ (start_line + i, start_col),
+ initial_len - start_col,
+ simple_token_type,
+ )
+ new_docstring_tokens.append(token)
+
+ start_pos = (start_line + i, start_col + len(match_str))
+
+ # Continue onward!
+ current_text = current_text[span[1] :]
+ match = compile(regex, flags=MULTILINE).search(current_text)
+
+ return new_docstring_tokens
+
+
def get_highlights(
full_text: str,
language: str = "text",
text_range: tuple[int, int] = (1, -1),
) -> list[Token]:
"""Gets pygments tokens from text provided in language proved and converts them to Token's"""
+
+ # Create some variables used all throughout the function
lexer: Lexer = get_lexer_by_name(language)
- split_text: list[str] = full_text.splitlines()
+ split_text: _ListOfStrs = full_text.splitlines()
new_tokens: list[Token] = []
+
if text_range[1] == -1:
+ # This indicates that the text range should span the length of the entire code
text_range = (text_range[0], len(split_text))
+
start_index: tuple[int, int] = (text_range[0], 0)
- split_text = split_text[text_range[0] - 1 : text_range[1]]
+ # We want only the lines in the text range because this list is iterated
+ split_text: _ListOfStrs = split_text[text_range[0] - 1 : text_range[1]]
for line in split_text:
- og_tokens: list[tuple[_TokenType, str]] = list(lex(line, lexer))
+ og_tokens: _LexReturnTokens = list(lex(line, lexer))
for token in og_tokens:
new_type: str = get_new_token_type(str(token[0]))
token_str: str = token[1]
token_len: int = len(token_str)
- if token_str == "\n": # Lexer adds the newline back
+ if token_str == "\n":
+ # Lexer adds the newline back as its own token
continue
+
if not token_str.strip() and new_type == "Text":
+ # If the token is empty or is plain Text we simply skip it because thats ultimately useless info
start_index = (start_index[0], start_index[1] + token_len)
continue
+ # Create and append the Token that will be returned
new_token = (start_index, token_len, new_type)
new_tokens.append(new_token)
@@ -174,8 +350,15 @@ def get_highlights(
start_index = (start_index[0] + 1, 0)
# Add extra token types
+ # NOTE: we add these at the end so that when they are applied one by one by the editor these
+ # override older tokens that may not be as accurate
+
+ if isinstance(lexer, RegexLexer):
+ new_tokens += proper_docstring_tokens(lexer, full_text)
+
new_tokens += get_urls(split_text, text_range[0])
if [char for char in hidden_chars if char in full_text]:
+ # if there are not hidden chars we don't want to needlessly compute this
new_tokens += find_hidden_chars(split_text, text_range[0])
return new_tokens
diff --git a/setup.py b/setup.py
index 51ca4a6..6a77db1 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-# pip install -r requirements.txt --break-system-packages; pip uninstall salve_ipc -y --break-system-packages; pip install . --break-system-packages --no-build-isolation; pytest .
+# pip install -r requirements.txt --break-system-packages; pip uninstall salve_ipc -y --break-system-packages; pip install . --break-system-packages --no-build-isolation; python3 -m pytest .
from setuptools import setup
with open("README.md", "r") as file:
@@ -7,7 +7,7 @@
setup(
name="salve_ipc",
- version="0.5.2",
+ version="0.6.0",
description="Salve is an IPC library that can be used by code editors to easily get autocompletions, replacements, editorconfig suggestions, definitions, and syntax highlighting.",
author="Moosems",
author_email="moosems.j@gmail.com",
diff --git a/tests/test_ipc.py b/tests/test_ipc.py
index 5f24632..914d51c 100644
--- a/tests/test_ipc.py
+++ b/tests/test_ipc.py
@@ -55,7 +55,7 @@ def test_IPC():
"type": "response",
"cancelled": False,
"command": AUTOCOMPLETE,
- "result": ["this"],
+ "result": ["test", "this"],
}
replacements_output: Response | None = context.get_response(REPLACEMENTS)
@@ -102,52 +102,28 @@ def test_IPC():
((8, 10), 3, "Name"),
((8, 13), 1, "Punctuation"),
((8, 14), 1, "Punctuation"),
- ((9, 4), 3, "Keyword"),
- ((9, 8), 8, "Name"),
- ((9, 16), 1, "Punctuation"),
- ((9, 17), 4, "Name"),
- ((9, 21), 1, "Punctuation"),
- ((9, 22), 1, "Punctuation"),
- ((10, 8), 4, "Keyword"),
- ((13, 0), 3, "Name"),
- ((13, 3), 1, "Punctuation"),
- ((13, 4), 1, "Punctuation"),
- ((14, 0), 24, "Comment"),
- ((14, 2), 22, "Link"),
+ ((9, 4), 3, "String"),
+ ((10, 4), 4, "Name"),
+ ((11, 4), 3, "String"),
+ ((13, 4), 3, "Keyword"),
+ ((13, 8), 8, "Name"),
+ ((13, 16), 1, "Punctuation"),
+ ((13, 17), 4, "Name"),
+ ((13, 21), 1, "Punctuation"),
+ ((13, 22), 1, "Punctuation"),
+ ((14, 8), 4, "Keyword"),
+ ((17, 0), 3, "Name"),
+ ((17, 3), 1, "Punctuation"),
+ ((17, 4), 1, "Punctuation"),
+ ((18, 0), 24, "Comment"),
+ ((9, 4), 3, "String"),
+ ((10, 4), 4, "String"),
+ ((11, 4), 3, "String"),
+ ((18, 2), 22, "Link"),
((5, 7), 1, "Hidden_Char"),
],
}
- editorconfig_response: Response | None = context.get_response(EDITORCONFIG)
- if editorconfig_response is None:
- raise AssertionError("Editorconfig output is None")
- editorconfig_response["id"] = 0
- assert editorconfig_response == {
- "id": 0,
- "type": "response",
- "cancelled": False,
- "command": EDITORCONFIG,
- "result": {
- "end_of_line": "lf",
- "insert_final_newline": "true",
- "charset": "utf-8",
- "indent_style": "space",
- "indent_size": "4",
- },
- }
-
- definition_response: Response | None = context.get_response(DEFINITION)
- if definition_response is None:
- raise AssertionError("Definition output is None")
- definition_response["id"] = 0
- assert definition_response == {
- "id": 0,
- "type": "response",
- "cancelled": False,
- "command": DEFINITION,
- "result": ((3, 0), 3, "Definition"),
- }
-
context.remove_file("test")
context.kill_IPC()
diff --git a/tests/testing_file1.py b/tests/testing_file1.py
index e5f5024..b40a7f8 100644
--- a/tests/testing_file1.py
+++ b/tests/testing_file1.py
@@ -6,6 +6,10 @@
class Foo(Bar):
+ """
+ test
+ """
+
def __init__(self):
pass