executablebooks · hukkin · Feb 13, 2022 · Feb 13, 2022 · Feb 13, 2022 · Feb 13, 2022
diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py
@@ -3,6 +3,7 @@
 import html
 import re
 from typing import Any
+import warnings
 
 from .entities import entities
 
@@ -111,7 +112,7 @@ def replaceEntityPattern(match: str, name: str) -> str:
     if name in entities:
         return entities[name]
 
-    if ord(name[0]) == 0x23 and DIGITAL_ENTITY_TEST_RE.search(name):
+    if name[0] == "#" and DIGITAL_ENTITY_TEST_RE.search(name):
         code = int(name[2:], 16) if name[1].lower() == "x" else int(name[1:], 10)
         if isValidEntityCode(code):
             return fromCodePoint(code)
@@ -194,7 +195,12 @@ def escapeRE(string: str) -> str:
 
 
 def isSpace(code: object) -> bool:
-    return code in {0x09, 0x20}
+    if isinstance(code, int):
+        warnings.warn(
+            "`int`s are deprecated as `isSpace` input", DeprecationWarning, stacklevel=2
+        )
+        code = chr(code)
+    return code in {"\t", " "}
 
 
 MD_WHITESPACE = {

diff --git a/markdown_it/helpers/parse_link_label.py b/markdown_it/helpers/parse_link_label.py
@@ -17,16 +17,16 @@ def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False)
     level = 1
 
     while state.pos < state.posMax:
-        marker = state.srcCharCode[state.pos]
-        if marker == 0x5D:  # /* ] */)
+        marker = state.src[state.pos]
+        if marker == "]":
             level -= 1
             if level == 0:
                 found = True
                 break
 
         prevPos = state.pos
         state.md.inline.skipToken(state)
-        if marker == 0x5B:  # /* [ */)
+        if marker == "[":
             if prevPos == state.pos - 1:
                 # increase level if we find text `[`,
                 # which is not a part of any token

diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py
@@ -93,17 +93,10 @@ def tokenize(
                 line += 1
                 state.line = line
 
-    def parse(
-        self,
-        src: str,
-        md,
-        env,
-        outTokens: list[Token],
-        ords: tuple[int, ...] | None = None,
-    ) -> list[Token] | None:
+    def parse(self, src: str, md, env, outTokens: list[Token]) -> list[Token] | None:
         """Process input string and push block tokens into `outTokens`."""
         if not src:
             return None
-        state = StateBlock(src, md, env, outTokens, ords)
+        state = StateBlock(src, md, env, outTokens)
         self.tokenize(state, state.line, state.lineMax)
         return state.tokens
diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml
@@ -23,8 +23,7 @@
       to manipulate `Token.attrs`, which have an identical signature to those upstream.
     - Use python version of `charCodeAt`
     - |
-      Reduce use of charCodeAt() by storing char codes in a srcCharCodes attribute for state
-      objects and sharing those whenever possible
+      Use `str` units instead of `int`s to represent Unicode codepoints.
       This provides a significant performance boost
     - |
       In markdown_it/rules_block/reference.py,

diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py
@@ -19,30 +19,35 @@ class Ruler
 
 from collections.abc import Callable, Iterable, MutableMapping
 from dataclasses import dataclass, field
+from functools import lru_cache
 from typing import TYPE_CHECKING
+import warnings
 
 from markdown_it._compat import DATACLASS_KWARGS
 
 if TYPE_CHECKING:
     from markdown_it import MarkdownIt
 
 
-class StateBase:
-    srcCharCode: tuple[int, ...]
+@lru_cache()
+def _str_to_ords(s: str) -> tuple[int, ...]:
+    return tuple(ord(c) for c in s)
+
 
+class StateBase:
     def __init__(self, src: str, md: MarkdownIt, env: MutableMapping):
         self.src = src
         self.env = env
         self.md = md
 
     @property
-    def src(self) -> str:
-        return self._src
-
-    @src.setter
-    def src(self, value: str) -> None:
-        self._src = value
-        self.srcCharCode = tuple(ord(c) for c in self.src)
+    def srcCharCode(self) -> tuple[int, ...]:
+        warnings.warn(
+            "`StateBase.srcCharCode` is deprecated. Use `StateBase.src`.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        return _str_to_ords(self.src)
 
 
 # The first positional arg is always a subtype of `StateBase`. Other

diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py
@@ -23,7 +23,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
         return False
 
     # check the block quote marker
-    if state.srcCharCode[pos] != 0x3E:  # /* > */
+    if state.src[pos] != ">":
         return False
     pos += 1
 
@@ -36,20 +36,20 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
     initial = offset = state.sCount[startLine] + 1
 
     try:
-        second_char_code: int | None = state.srcCharCode[pos]
+        second_char_code: str | None = state.src[pos]
     except IndexError:
         second_char_code = None
 
     # skip one optional space after '>'
-    if second_char_code == 0x20:  # /* space */
+    if second_char_code == " ":
         # ' >   test '
         #     ^ -- position start of line here:
         pos += 1
         initial += 1
         offset += 1
         adjustTab = False
         spaceAfterMarker = True
-    elif second_char_code == 0x09:  # /* tab */
+    elif second_char_code == "\t":
         spaceAfterMarker = True
 
         if (state.bsCount[startLine] + offset) % 4 == 3:
@@ -72,10 +72,10 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
     state.bMarks[startLine] = pos
 
     while pos < max:
-        ch = state.srcCharCode[pos]
+        ch = state.src[pos]
 
         if isSpace(ch):
-            if ch == 0x09:  # / tab /
+            if ch == "\t":
                 offset += (
                     4
                     - (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4
@@ -145,7 +145,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
             # Case 1: line is not inside the blockquote, and this line is empty.
             break
 
-        evaluatesTrue = state.srcCharCode[pos] == 0x3E and not isOutdented  # /* > */
+        evaluatesTrue = state.src[pos] == ">" and not isOutdented
         pos += 1
         if evaluatesTrue:
             # This line is inside the blockquote.
@@ -154,20 +154,20 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
             initial = offset = state.sCount[nextLine] + 1
 
             try:
-                next_char: int | None = state.srcCharCode[pos]
+                next_char: str | None = state.src[pos]
             except IndexError:
                 next_char = None
 
             # skip one optional space after '>'
-            if next_char == 0x20:  # /* space */
+            if next_char == " ":
                 # ' >   test '
                 #     ^ -- position start of line here:
                 pos += 1
                 initial += 1
                 offset += 1
                 adjustTab = False
                 spaceAfterMarker = True
-            elif next_char == 0x09:  # /* tab */
+            elif next_char == "\t":
                 spaceAfterMarker = True
 
                 if (state.bsCount[nextLine] + offset) % 4 == 3:
@@ -190,10 +190,10 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
             state.bMarks[nextLine] = pos
 
             while pos < max:
-                ch = state.srcCharCode[pos]
+                ch = state.src[pos]
 
                 if isSpace(ch):
-                    if ch == 0x09:
+                    if ch == "\t":
                         offset += (
                             4
                             - (

diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py
@@ -20,10 +20,9 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
     if pos + 3 > maximum:
         return False
 
-    marker = state.srcCharCode[pos]
+    marker = state.src[pos]
 
-    # /* ~ */  /* ` */
-    if marker != 0x7E and marker != 0x60:
+    if marker != "~" and marker != "`":
         return False
 
     # scan marker length
@@ -38,9 +37,8 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
     markup = state.src[mem:pos]
     params = state.src[pos:maximum]
 
-    # /* ` */
-    if marker == 0x60:
-        if chr(marker) in params:
+    if marker == "`":
+        if marker in params:
             return False
 
     # Since start is found, we can report success here in validation mode
@@ -66,7 +64,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
             #  test
             break
 
-        if state.srcCharCode[pos] != marker:
+        if state.src[pos] != marker:
             continue
 
         if state.sCount[nextLine] - state.blkIndent >= 4:

diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py
@@ -19,25 +19,23 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool):
     if state.sCount[startLine] - state.blkIndent >= 4:
         return False
 
-    ch: int | None = state.srcCharCode[pos]
+    ch: str | None = state.src[pos]
 
-    # /* # */
-    if ch != 0x23 or pos >= maximum:
+    if ch != "#" or pos >= maximum:
         return False
 
     # count heading level
     level = 1
     pos += 1
     try:
-        ch = state.srcCharCode[pos]
+        ch = state.src[pos]
     except IndexError:
         ch = None
-    # /* # */
-    while ch == 0x23 and pos < maximum and level <= 6:
+    while ch == "#" and pos < maximum and level <= 6:
         level += 1
         pos += 1
         try:
-            ch = state.srcCharCode[pos]
+            ch = state.src[pos]
         except IndexError:
             ch = None
 
@@ -50,8 +48,8 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool):
     # Let's cut tails like '    ###  ' from the end of string
 
     maximum = state.skipSpacesBack(maximum, pos)
-    tmp = state.skipCharsBack(maximum, 0x23, pos)  # #
-    if tmp > pos and isSpace(state.srcCharCode[tmp - 1]):
+    tmp = state.skipCharsBack(maximum, "#", pos)
+    if tmp > pos and isSpace(state.src[tmp - 1]):
         maximum = tmp
 
     state.line = startLine + 1

diff --git a/markdown_it/rules_block/hr.py b/markdown_it/rules_block/hr.py
@@ -20,18 +20,18 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool):
     if state.sCount[startLine] - state.blkIndent >= 4:
         return False
 
-    marker = state.srcCharCode[pos]
+    marker = state.src[pos]
     pos += 1
 
-    # Check hr marker: /* * */ /* - */ /* _ */
-    if marker != 0x2A and marker != 0x2D and marker != 0x5F:
+    # Check hr marker
+    if marker != "*" and marker != "-" and marker != "_":
         return False
 
     # markers can be mixed with spaces, but there should be at least 3 of them
 
     cnt = 1
     while pos < maximum:
-        ch = state.srcCharCode[pos]
+        ch = state.src[pos]
         pos += 1
         if ch != marker and not isSpace(ch):
             return False
@@ -48,6 +48,6 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool):
 
     token = state.push("hr", "hr", 0)
     token.map = [startLine, state.line]
-    token.markup = chr(marker) * (cnt + 1)
+    token.markup = marker * (cnt + 1)
 
     return True
diff --git a/markdown_it/rules_block/html_block.py b/markdown_it/rules_block/html_block.py
@@ -45,7 +45,7 @@ def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool):
     if not state.md.options.get("html", None):
         return False
 
-    if state.srcCharCode[pos] != 0x3C:  # /* < */
+    if state.src[pos] != "<":
         return False
 
     lineText = state.src[pos:maximum]

diff --git a/markdown_it/rules_block/lheading.py b/markdown_it/rules_block/lheading.py
@@ -36,16 +36,14 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool):
             maximum = state.eMarks[nextLine]
 
             if pos < maximum:
-                marker = state.srcCharCode[pos]
+                marker = state.src[pos]
 
-                # /* - */  /* = */
-                if marker == 0x2D or marker == 0x3D:
+                if marker == "-" or marker == "=":
                     pos = state.skipChars(pos, marker)
                     pos = state.skipSpaces(pos)
 
-                    # /* = */
                     if pos >= maximum:
-                        level = 1 if marker == 0x3D else 2
+                        level = 1 if marker == "=" else 2
                         break
 
         # quirk for blockquotes, this line should already be checked by that rule
@@ -73,7 +71,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool):
     state.line = nextLine + 1
 
     token = state.push("heading_open", "h" + str(level), 1)
-    token.markup = chr(marker)
+    token.markup = marker
     token.map = [startLine, state.line]
 
     token = state.push("inline", "", 0)
@@ -82,7 +80,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool):
     token.children = []
 
     token = state.push("heading_close", "h" + str(level), -1)
-    token.markup = chr(marker)
+    token.markup = marker
 
     state.parentType = oldParentType