Skip to content

🗑 DEPRECATE: StateBase.srcCharCode, int as input to skipChars, skipCharsBack, isTerminatorChar, isLetter and isSpace #199

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 13 commits into from
10 changes: 8 additions & 2 deletions markdown_it/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import html
import re
from typing import Any
import warnings

from .entities import entities

Expand Down Expand Up @@ -111,7 +112,7 @@ def replaceEntityPattern(match: str, name: str) -> str:
if name in entities:
return entities[name]

if ord(name[0]) == 0x23 and DIGITAL_ENTITY_TEST_RE.search(name):
if name[0] == "#" and DIGITAL_ENTITY_TEST_RE.search(name):
code = int(name[2:], 16) if name[1].lower() == "x" else int(name[1:], 10)
if isValidEntityCode(code):
return fromCodePoint(code)
Expand Down Expand Up @@ -194,7 +195,12 @@ def escapeRE(string: str) -> str:


def isSpace(code: object) -> bool:
return code in {0x09, 0x20}
if isinstance(code, int):
warnings.warn(
"`int`s are deprecated as `isSpace` input", DeprecationWarning, stacklevel=2
)
code = chr(code)
return code in {"\t", " "}


MD_WHITESPACE = {
Expand Down
6 changes: 3 additions & 3 deletions markdown_it/helpers/parse_link_label.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,16 @@ def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False)
level = 1

while state.pos < state.posMax:
marker = state.srcCharCode[state.pos]
if marker == 0x5D: # /* ] */)
marker = state.src[state.pos]
if marker == "]":
level -= 1
if level == 0:
found = True
break

prevPos = state.pos
state.md.inline.skipToken(state)
if marker == 0x5B: # /* [ */)
if marker == "[":
if prevPos == state.pos - 1:
# increase level if we find text `[`,
# which is not a part of any token
Expand Down
11 changes: 2 additions & 9 deletions markdown_it/parser_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,17 +93,10 @@ def tokenize(
line += 1
state.line = line

def parse(
self,
src: str,
md,
env,
outTokens: list[Token],
ords: tuple[int, ...] | None = None,
) -> list[Token] | None:
def parse(self, src: str, md, env, outTokens: list[Token]) -> list[Token] | None:
"""Process input string and push block tokens into `outTokens`."""
if not src:
return None
state = StateBlock(src, md, env, outTokens, ords)
state = StateBlock(src, md, env, outTokens)
self.tokenize(state, state.line, state.lineMax)
return state.tokens
3 changes: 1 addition & 2 deletions markdown_it/port.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
to manipulate `Token.attrs`, which have an identical signature to those upstream.
- Use python version of `charCodeAt`
- |
Reduce use of charCodeAt() by storing char codes in a srcCharCodes attribute for state
objects and sharing those whenever possible
Use `str` units instead of `int`s to represent Unicode codepoints.
This provides a significant performance boost
- |
In markdown_it/rules_block/reference.py,
Expand Down
23 changes: 14 additions & 9 deletions markdown_it/ruler.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,30 +19,35 @@ class Ruler

from collections.abc import Callable, Iterable, MutableMapping
from dataclasses import dataclass, field
from functools import lru_cache
from typing import TYPE_CHECKING
import warnings

from markdown_it._compat import DATACLASS_KWARGS

if TYPE_CHECKING:
from markdown_it import MarkdownIt


class StateBase:
srcCharCode: tuple[int, ...]
@lru_cache()
def _str_to_ords(s: str) -> tuple[int, ...]:
return tuple(ord(c) for c in s)


class StateBase:
def __init__(self, src: str, md: MarkdownIt, env: MutableMapping):
self.src = src
self.env = env
self.md = md

@property
def src(self) -> str:
return self._src

@src.setter
def src(self, value: str) -> None:
self._src = value
self.srcCharCode = tuple(ord(c) for c in self.src)
def srcCharCode(self) -> tuple[int, ...]:
warnings.warn(
"`StateBase.srcCharCode` is deprecated. Use `StateBase.src`.",
DeprecationWarning,
stacklevel=2,
)
return _str_to_ords(self.src)


# The first positional arg is always a subtype of `StateBase`. Other
Expand Down
24 changes: 12 additions & 12 deletions markdown_it/rules_block/blockquote.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
return False

# check the block quote marker
if state.srcCharCode[pos] != 0x3E: # /* > */
if state.src[pos] != ">":
return False
pos += 1

Expand All @@ -36,20 +36,20 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
initial = offset = state.sCount[startLine] + 1

try:
second_char_code: int | None = state.srcCharCode[pos]
second_char_code: str | None = state.src[pos]
except IndexError:
second_char_code = None

# skip one optional space after '>'
if second_char_code == 0x20: # /* space */
if second_char_code == " ":
# ' > test '
# ^ -- position start of line here:
pos += 1
initial += 1
offset += 1
adjustTab = False
spaceAfterMarker = True
elif second_char_code == 0x09: # /* tab */
elif second_char_code == "\t":
spaceAfterMarker = True

if (state.bsCount[startLine] + offset) % 4 == 3:
Expand All @@ -72,10 +72,10 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
state.bMarks[startLine] = pos

while pos < max:
ch = state.srcCharCode[pos]
ch = state.src[pos]

if isSpace(ch):
if ch == 0x09: # / tab /
if ch == "\t":
offset += (
4
- (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4
Expand Down Expand Up @@ -145,7 +145,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
# Case 1: line is not inside the blockquote, and this line is empty.
break

evaluatesTrue = state.srcCharCode[pos] == 0x3E and not isOutdented # /* > */
evaluatesTrue = state.src[pos] == ">" and not isOutdented
pos += 1
if evaluatesTrue:
# This line is inside the blockquote.
Expand All @@ -154,20 +154,20 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
initial = offset = state.sCount[nextLine] + 1

try:
next_char: int | None = state.srcCharCode[pos]
next_char: str | None = state.src[pos]
except IndexError:
next_char = None

# skip one optional space after '>'
if next_char == 0x20: # /* space */
if next_char == " ":
# ' > test '
# ^ -- position start of line here:
pos += 1
initial += 1
offset += 1
adjustTab = False
spaceAfterMarker = True
elif next_char == 0x09: # /* tab */
elif next_char == "\t":
spaceAfterMarker = True

if (state.bsCount[nextLine] + offset) % 4 == 3:
Expand All @@ -190,10 +190,10 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
state.bMarks[nextLine] = pos

while pos < max:
ch = state.srcCharCode[pos]
ch = state.src[pos]

if isSpace(ch):
if ch == 0x09:
if ch == "\t":
offset += (
4
- (
Expand Down
12 changes: 5 additions & 7 deletions markdown_it/rules_block/fence.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,9 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
if pos + 3 > maximum:
return False

marker = state.srcCharCode[pos]
marker = state.src[pos]

# /* ~ */ /* ` */
if marker != 0x7E and marker != 0x60:
if marker != "~" and marker != "`":
return False

# scan marker length
Expand All @@ -38,9 +37,8 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
markup = state.src[mem:pos]
params = state.src[pos:maximum]

# /* ` */
if marker == 0x60:
if chr(marker) in params:
if marker == "`":
if marker in params:
return False

# Since start is found, we can report success here in validation mode
Expand All @@ -66,7 +64,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
# test
break

if state.srcCharCode[pos] != marker:
if state.src[pos] != marker:
continue

if state.sCount[nextLine] - state.blkIndent >= 4:
Expand Down
16 changes: 7 additions & 9 deletions markdown_it/rules_block/heading.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,23 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool):
if state.sCount[startLine] - state.blkIndent >= 4:
return False

ch: int | None = state.srcCharCode[pos]
ch: str | None = state.src[pos]

# /* # */
if ch != 0x23 or pos >= maximum:
if ch != "#" or pos >= maximum:
return False

# count heading level
level = 1
pos += 1
try:
ch = state.srcCharCode[pos]
ch = state.src[pos]
except IndexError:
ch = None
# /* # */
while ch == 0x23 and pos < maximum and level <= 6:
while ch == "#" and pos < maximum and level <= 6:
level += 1
pos += 1
try:
ch = state.srcCharCode[pos]
ch = state.src[pos]
except IndexError:
ch = None

Expand All @@ -50,8 +48,8 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool):
# Let's cut tails like ' ### ' from the end of string

maximum = state.skipSpacesBack(maximum, pos)
tmp = state.skipCharsBack(maximum, 0x23, pos) # #
if tmp > pos and isSpace(state.srcCharCode[tmp - 1]):
tmp = state.skipCharsBack(maximum, "#", pos)
if tmp > pos and isSpace(state.src[tmp - 1]):
maximum = tmp

state.line = startLine + 1
Expand Down
10 changes: 5 additions & 5 deletions markdown_it/rules_block/hr.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool):
if state.sCount[startLine] - state.blkIndent >= 4:
return False

marker = state.srcCharCode[pos]
marker = state.src[pos]
pos += 1

# Check hr marker: /* * */ /* - */ /* _ */
if marker != 0x2A and marker != 0x2D and marker != 0x5F:
# Check hr marker
if marker != "*" and marker != "-" and marker != "_":
return False

# markers can be mixed with spaces, but there should be at least 3 of them

cnt = 1
while pos < maximum:
ch = state.srcCharCode[pos]
ch = state.src[pos]
pos += 1
if ch != marker and not isSpace(ch):
return False
Expand All @@ -48,6 +48,6 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool):

token = state.push("hr", "hr", 0)
token.map = [startLine, state.line]
token.markup = chr(marker) * (cnt + 1)
token.markup = marker * (cnt + 1)

return True
2 changes: 1 addition & 1 deletion markdown_it/rules_block/html_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool):
if not state.md.options.get("html", None):
return False

if state.srcCharCode[pos] != 0x3C: # /* < */
if state.src[pos] != "<":
return False

lineText = state.src[pos:maximum]
Expand Down
12 changes: 5 additions & 7 deletions markdown_it/rules_block/lheading.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,14 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool):
maximum = state.eMarks[nextLine]

if pos < maximum:
marker = state.srcCharCode[pos]
marker = state.src[pos]

# /* - */ /* = */
if marker == 0x2D or marker == 0x3D:
if marker == "-" or marker == "=":
pos = state.skipChars(pos, marker)
pos = state.skipSpaces(pos)

# /* = */
if pos >= maximum:
level = 1 if marker == 0x3D else 2
level = 1 if marker == "=" else 2
break

# quirk for blockquotes, this line should already be checked by that rule
Expand Down Expand Up @@ -73,7 +71,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool):
state.line = nextLine + 1

token = state.push("heading_open", "h" + str(level), 1)
token.markup = chr(marker)
token.markup = marker
token.map = [startLine, state.line]

token = state.push("inline", "", 0)
Expand All @@ -82,7 +80,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool):
token.children = []

token = state.push("heading_close", "h" + str(level), -1)
token.markup = chr(marker)
token.markup = marker

state.parentType = oldParentType

Expand Down
Loading