Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .cspell-repo-terms.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,6 @@ Nanvix
reqwest
requireapproval
uncallable
endswith
rfind
lstrip
19 changes: 18 additions & 1 deletion scripts/check_dependency_confusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
"opentelemetry-sdk", "fhir.resources", "hl7apy", "zenpy", "freshdesk",
"google-adk", "safety", "jupyter", "vitest", "tsup", "typescript",
"requests",
"twine",
# Dashboard / visualization (used in examples)
"streamlit", "plotly", "pandas", "networkx", "matplotlib", "pyvis",
# Async / caching (used in examples)
Expand Down Expand Up @@ -211,8 +212,24 @@ def check_file(filepath: str) -> list[str]:
except (OSError, UnicodeDecodeError):
return findings

is_shell = filepath.endswith((".sh", ".bash"))

for match in PIP_INSTALL_RE.finditer(content):
line_num = content[:match.start()].count("\n") + 1
# For shell scripts, filter out matches that are inside a comment
# or an echo/printf invocation. Only the current shell command
# segment (split on ;, &&, ||, |) is examined so that
# `echo done; pip install foo` is still flagged.
if is_shell:
line_start = content.rfind("\n", 0, match.start()) + 1
before_pip = content[line_start:match.start()]
# Take the last command segment on this line.
segment = re.split(r';|&&|\|\||(?<!\|)\|(?!\|)', before_pip)[-1]
if "#" in segment:
continue
stripped = segment.lstrip()
if re.match(r'(?:sudo\s+)?(?:echo|printf)\b', stripped):
continue
packages = extract_package_names(match.group(1))
for pkg in packages:
if pkg.lower() not in {p.lower() for p in REGISTERED_PACKAGES}:
Expand Down Expand Up @@ -412,7 +429,7 @@ def main() -> int:
)
files = [
f for f in result.stdout.strip().split("\n")
if f.endswith((".md", ".py", ".ts", ".txt", ".yaml", ".yml", ".ipynb", ".svg"))
if f.endswith((".md", ".py", ".ts", ".txt", ".yaml", ".yml", ".ipynb", ".svg", ".sh", ".bash"))
]

all_findings = []
Expand Down
144 changes: 144 additions & 0 deletions tests/ci/test_check_dependency_confusion.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""Regression tests for scripts/check_dependency_confusion.py.

Covers the fix for issue #2207: shell-script (.sh/.bash) scanning in
pre-commit mode and false-positive avoidance for echoed/commented
``pip install`` lines.
"""

from __future__ import annotations

import importlib.util
from pathlib import Path

import pytest


SCRIPT_PATH = (
Path(__file__).resolve().parents[2]
/ "scripts"
/ "check_dependency_confusion.py"
)
SPEC = importlib.util.spec_from_file_location("check_dep_conf", SCRIPT_PATH)
assert SPEC is not None
check_dep_conf = importlib.util.module_from_spec(SPEC)
assert SPEC.loader is not None
SPEC.loader.exec_module(check_dep_conf)


def _write(tmp_path: Path, name: str, content: str) -> str:
p = tmp_path / name
p.write_text(content, encoding="utf-8")
return str(p)


def test_sh_real_pip_install_unregistered_is_flagged(tmp_path: Path) -> None:
path = _write(
tmp_path,
"build.sh",
"#!/usr/bin/env bash\nset -e\npip install totally-not-a-real-package-xyz\n",
)
findings = check_dep_conf.check_file(path)
assert any("totally-not-a-real-package-xyz" in f for f in findings), findings


def test_sh_echoed_pip_install_is_not_flagged(tmp_path: Path) -> None:
path = _write(
tmp_path,
"build.sh",
'#!/usr/bin/env bash\necho "pip install totally-not-a-real-package-xyz"\n',
)
assert check_dep_conf.check_file(path) == []


def test_sh_printf_pip_install_is_not_flagged(tmp_path: Path) -> None:
path = _write(
tmp_path,
"build.sh",
'#!/usr/bin/env bash\nprintf "run: pip install totally-not-a-real-package-xyz\\n"\n',
)
assert check_dep_conf.check_file(path) == []


def test_sh_commented_pip_install_is_not_flagged(tmp_path: Path) -> None:
path = _write(
tmp_path,
"build.sh",
"#!/usr/bin/env bash\n# pip install totally-not-a-real-package-xyz\n",
)
assert check_dep_conf.check_file(path) == []


def test_sh_inline_comment_pip_install_is_not_flagged(tmp_path: Path) -> None:
path = _write(
tmp_path,
"build.sh",
"#!/usr/bin/env bash\ntrue # pip install totally-not-a-real-package-xyz\n",
)
assert check_dep_conf.check_file(path) == []


def test_sh_known_package_is_not_flagged(tmp_path: Path) -> None:
path = _write(
tmp_path,
"build.sh",
"#!/usr/bin/env bash\npip install pydantic\n",
)
assert check_dep_conf.check_file(path) == []


def test_sh_real_install_after_echo_separator_is_flagged(tmp_path: Path) -> None:
# Regression: `echo ...; pip install <unregistered>` is a real install.
path = _write(
tmp_path,
"build.sh",
'#!/usr/bin/env bash\necho preparing; pip install totally-not-a-real-package-xyz\n',
)
findings = check_dep_conf.check_file(path)
assert any("totally-not-a-real-package-xyz" in f for f in findings), findings


def test_md_pip_install_after_hash_still_flagged(tmp_path: Path) -> None:
# Suppression must not affect non-shell files: a markdown heading like
# `# Install: pip install <unregistered>` should still be flagged.
path = _write(
tmp_path,
"guide.md",
"# Install: pip install totally-not-a-real-package-xyz\n",
)
findings = check_dep_conf.check_file(path)
assert any("totally-not-a-real-package-xyz" in f for f in findings), findings


def test_txt_existing_behavior_unchanged_flags_unregistered(tmp_path: Path) -> None:
path = _write(
tmp_path,
"install.txt",
"Run: pip install totally-not-a-real-package-xyz\n",
)
findings = check_dep_conf.check_file(path)
assert any("totally-not-a-real-package-xyz" in f for f in findings), findings


def test_md_existing_behavior_unchanged_known_package_ok(tmp_path: Path) -> None:
path = _write(
tmp_path,
"README.md",
"# Install\n\n```bash\npip install pydantic\n```\n",
)
assert check_dep_conf.check_file(path) == []


def test_pre_commit_extension_filter_includes_shell() -> None:
# Sanity check on the extensions we want to be picked up by pre-commit.
src = SCRIPT_PATH.read_text(encoding="utf-8")
assert '".sh"' in src
assert '".bash"' in src


@pytest.mark.parametrize("ext", [".py", ".md", ".txt", ".ipynb"])
def test_pre_commit_extension_filter_keeps_existing(ext: str) -> None:
src = SCRIPT_PATH.read_text(encoding="utf-8")
assert f'"{ext}"' in src
Loading