Skip to content

Change the iteration order for functions containing async for and yield from. #1887

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 99 additions & 6 deletions pytype/blocks/blocks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Functions for computing the execution order of bytecode."""

from collections.abc import Iterator
from typing import Any, cast
from typing import Any, Sequence, cast
from pycnite import bytecode as pyc_bytecode
from pycnite import marshal as pyc_marshal
import pycnite.types
Expand Down Expand Up @@ -333,7 +333,21 @@ def _split_bytecode(bytecode: list[opcodes.Opcode]) -> list[Block]:
targets = {op.target for op in bytecode if op.target}
blocks = []
code = []
for op in bytecode:
i = 0
while i < len(bytecode):
op = bytecode[i]
# GET_AITER is used only in the context of `async for`
# SEND is only used in the context of async for and `yield from`.
# These instructions are not used in other context, so it's safe to process
# it assuming that these are the only constructs they're being used.
if isinstance(op, (opcodes.GET_AITER, opcodes.SEND)):
if code:
blocks.append(Block(code))
code = []
block, i = _preprocess_async_for_and_yield(i, bytecode)
blocks.append(block)
continue

code.append(op)
if (
op.no_next()
Expand All @@ -344,9 +358,73 @@ def _split_bytecode(bytecode: list[opcodes.Opcode]) -> list[Block]:
):
blocks.append(Block(code))
code = []
i += 1
return blocks


def _preprocess_async_for_and_yield(
idx: int,
bytecode: Sequence[opcodes.Opcode],
) -> tuple[Block, int]:
"""Process async for and yield constructs in a way that pytype can iterate correctly.

'Async for' constructs starts with GET_AITER instruction and ends with a
JUMP_BACKWARD instruction, but we only want to handle the iteration part which
starts with GET_AITER and ends with END_SEND, usually comes with the following
order of instructions in the exact order:
GET_AITER, GET_ANEXT, LOAD_CONST, SEND, YIELD_VALUE, RESUME,
JUMP_BACKWARD_NO_INTERRUPT, END_SEND. END_SEND is only present from 3.12 on.

The reason why we need to pre process async for is because the control flow of
async for is drastically different from regular control flows also due to the
fact that the termination of the loop happens by STOP_ASYNC_ITERATION
exception, not a regular control flow. So we need to split (or merge) the
basic blocks in a way that pytype executes in the order that what'd happen in
the runtime, so that it doesn't fail with wrong order of execution, which can
result in a stack underrun.

The reason why we do not need to handle the jump back to the begin of loop
iteration (It is the JUMP_BACKWARD instruction(s) to GET_ANEXT) is because as
there's no exception involved for the jump back, we can treat it normally and
expect it to be handled by _split_bytecode and compute_order correctly.

Args:
idx: The index of the GET_AITER instruction.
bytecode: A list of instances of opcodes.Opcode

Returns:
A tuple of (Block, int), where the Block is the block containing the
iteration part of the async for construct, and the int is the index of the
END_SEND instruction.
"""
assert isinstance(bytecode[idx], (opcodes.GET_AITER, opcodes.SEND))

for i in range(idx + 1, len(bytecode)):
op = bytecode[i]
if isinstance(op, opcodes.JUMP_BACKWARD_NO_INTERRUPT):
end_block_idx = i + 1
# In CLEANUP_THROW can be present after JUMP_BACKWARD_NO_INTERRUPT
# depending on how the control flow graph is constructed.
# Usually, CLEANUP_THROW comes way after
if isinstance(bytecode[end_block_idx], opcodes.CLEANUP_THROW):
end_block_idx += 1

# From 3.12 on, END_SEND is present after JUMP_BACKWARD_NO_INTERRUPT
if isinstance(bytecode[end_block_idx], opcodes.END_SEND):
end_block_idx += 1

return Block(bytecode[idx:end_block_idx]), end_block_idx
# Should be unreachable
assert False, "No JUMP_BACKWARD_NO_INTERRUPT found after GET_AITER/SEND"


def _is_async_basic_block(block: Block) -> bool:
"""Returns true if the block is the iteration part of an async for construct."""
return isinstance(block.code[0], opcodes.GET_AITER) or isinstance(
block.code[0], opcodes.SEND
)


def compute_order(bytecode: list[opcodes.Opcode]) -> list[Block]:
"""Split bytecode into blocks and order the blocks.

Expand All @@ -360,19 +438,34 @@ def compute_order(bytecode: list[opcodes.Opcode]) -> list[Block]:
A list of Block instances.
"""
blocks = _split_bytecode(bytecode)
first_op_to_block = {block.code[0]: block for block in blocks}
# Usually, jumping into a non-first instruction of a block does not happen
# except for async constructs, due to the basic block split logic made in
# _preprocess_async_for_and_yield.
op_to_block = {}
for block in blocks:
if not _is_async_basic_block(block):
op_to_block[block.code[0]] = block
else:
for code in block.code:
op_to_block[code] = block
for i, block in enumerate(blocks):
next_block = blocks[i + 1] if i < len(blocks) - 1 else None
first_op, last_op = block.code[0], block.code[-1]
# Async BBs only have one edge which is to the next BB, because no explicit
# jump instruction is present in these blocks.
if _is_async_basic_block(block):
block.connect_outgoing(next_block)
continue

if next_block and not last_op.no_next():
block.connect_outgoing(next_block)
if first_op.target:
# Handles SETUP_EXCEPT -> except block
block.connect_outgoing(first_op_to_block[first_op.target])
block.connect_outgoing(op_to_block[first_op.target])
if last_op.target:
block.connect_outgoing(first_op_to_block[last_op.target])
block.connect_outgoing(op_to_block[last_op.target])
if last_op.block_target:
block.connect_outgoing(first_op_to_block[last_op.block_target])
block.connect_outgoing(op_to_block[last_op.block_target])
return cfg_utils.order_nodes(blocks)


Expand Down
24 changes: 0 additions & 24 deletions pytype/pyc/opcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1306,30 +1306,6 @@ def _should_elide_opcode(
and isinstance(op_items[i + 1][1], END_ASYNC_FOR)
)

# In 3.12 all generators are compiled into infinite loops, too. In addition,
# YIELD_VALUE inserts exception handling instructions:
# CLEANUP_THROW
# JUMP_BACKWARD
# These can appear on their own or they can be inserted between JUMP_BACKWARD
# and END_ASYNC_FOR, possibly many times. We keep eliding the `async for` jump
# and also elide the exception handling cleanup codes because they're not
# relevant for pytype and complicate the block graph.
if python_version == (3, 12):
return (
isinstance(op, CLEANUP_THROW)
or (
isinstance(op, JUMP_BACKWARD)
and i >= 1
and isinstance(op_items[i - 1][1], CLEANUP_THROW)
)
or (
isinstance(op, JUMP_BACKWARD)
and isinstance(
_get_opcode_following_cleanup_throw_jump_pairs(op_items, i + 1),
END_ASYNC_FOR,
)
)
)
return False


Expand Down
Loading