diff --git a/Makefile b/Makefile index 12151d70..2f365caf 100644 --- a/Makefile +++ b/Makefile @@ -120,6 +120,14 @@ docs-view: ## View documentation docstrings: ## Check docstrings pydocstyle edsl +style-report: ## Check docstrings and generate a report + python scripts/style_report.py --source edsl --output style_report + open style_report/index.html + +typing-report: + python scripts/typing_report.py --source edsl --output typing_report + open typing_report/index.html + format: ## Run code autoformatters (black). pre-commit install pre-commit run black-jupyter --all-files --all diff --git a/edsl/scenarios/Scenario.py b/edsl/scenarios/Scenario.py index fbeed32b..6b091d27 100644 --- a/edsl/scenarios/Scenario.py +++ b/edsl/scenarios/Scenario.py @@ -19,6 +19,8 @@ class DisplayJSON: + """Display a dictionary as JSON.""" + def __init__(self, input_dict: dict): self.text = json.dumps(input_dict, indent=4) @@ -27,6 +29,8 @@ def __repr__(self): class DisplayYAML: + """Display a dictionary as YAML.""" + def __init__(self, input_dict: dict): import yaml @@ -41,7 +45,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin): __documentation__ = "https://docs.expectedparrot.com/en/latest/scenarios.html" - def __init__(self, data: Optional[dict] = None, name: str = None): + def __init__(self, data: Optional[dict] = None, name: Optional[str] = None): """Initialize a new Scenario. :param data: A dictionary of keys/values for parameterizing questions. @@ -66,7 +70,6 @@ def replicate(self, n: int) -> "ScenarioList": :param n: The number of times to replicate the scenario. Example: - >>> s = Scenario({"food": "wood chips"}) >>> s.replicate(2) ScenarioList([Scenario({'food': 'wood chips'}), Scenario({'food': 'wood chips'})]) @@ -135,7 +138,7 @@ def __add__(self, other_scenario: Scenario) -> Scenario: def rename( self, - old_name_or_replacement_dict: Union[str, dict], + old_name_or_replacement_dict: Union[str, dict[str, str]], new_name: Optional[str] = None, ) -> Scenario: """Rename the keys of a scenario. @@ -267,7 +270,7 @@ def select(self, list_of_keys: Collection[str]) -> "Scenario": new_scenario[key] = self[key] return new_scenario - def drop(self, list_of_keys: List[str]) -> "Scenario": + def drop(self, list_of_keys: Collection[str]) -> "Scenario": """Drop a subset of keys from a scenario. :param list_of_keys: The keys to drop. @@ -455,7 +458,11 @@ def from_dict(cls, d: dict) -> "Scenario": return cls(d) def _table(self) -> tuple[dict, list]: - """Prepare generic table data.""" + """Prepare generic table data. + >>> s = Scenario({"food": "wood chips"}) + >>> s._table() + ([{'Attribute': 'data', 'Value': "{'food': 'wood chips'}"}, {'Attribute': 'name', 'Value': 'None'}], ['Attribute', 'Value']) + """ table_data = [] for attr_name, attr_value in self.__dict__.items(): table_data.append({"Attribute": attr_name, "Value": repr(attr_value)}) @@ -463,21 +470,18 @@ def _table(self) -> tuple[dict, list]: return table_data, column_names @classmethod - def example(cls, randomize: bool = False, has_image=False) -> Scenario: + def example(cls, randomize: bool = False) -> Scenario: """ Returns an example Scenario instance. :param randomize: If True, adds a random string to the value of the example key. """ - if not has_image: - addition = "" if not randomize else str(uuid4()) - return cls( - { - "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}", - } - ) - else: - return cls.from_image(cls.example_image()) + addition = "" if not randomize else str(uuid4()) + return cls( + { + "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}", + } + ) def code(self) -> List[str]: """Return the code for the scenario.""" diff --git a/scripts/style_report.py b/scripts/style_report.py new file mode 100644 index 00000000..a0c2ea71 --- /dev/null +++ b/scripts/style_report.py @@ -0,0 +1,214 @@ +import os +import glob +from pydocstyle import check +from datetime import datetime + + +def create_html_report(filename, violations): + """Create an HTML report for a single file's pydocstyle violations.""" + html_content = f""" + + + + PyDocStyle Report - {filename} + + + +
+

PyDocStyle Report

+

File: {filename}

+

Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

+

Back to Index

+
+ """ + + if not violations: + html_content += "

✅ No documentation style violations found!

" + else: + html_content += f"

Found {len(violations)} violation(s):

" + for violation in violations: + html_content += f""" +
+

{violation.code} at line {violation.line}

+

{violation.message}

+
{violation.source}
+
+ """ + + html_content += """ + + + """ + return html_content + + +def create_index_html(file_reports, output_dir): + """Create an index.html with a summary table of all reports.""" + template = """ + + + + PyDocStyle Reports Index + + + +

PyDocStyle Reports Index

+
+

Generated: {}

+

Total files analyzed: {}

+

Total violations found: {}

+
+ + + + + + + + + """ + html_content = template.format( + datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + len(file_reports), + sum(count for _, count in file_reports.values()), + ) + + # Sort files by violation count (descending) and then by path + sorted_files = sorted(file_reports.items(), key=lambda x: (-x[1][1], x[0])) + + for file_path, (report_path, violation_count) in sorted_files: + # Make the report path relative to the output directory + relative_report_path = os.path.relpath(report_path, output_dir) + + error_class = "no-errors" if violation_count == 0 else "error-count" + html_content += f""" + + + + + """ + + html_content += """ + +
File PathViolations
{file_path}{violation_count}
+ + + """ + + index_path = os.path.join(output_dir, "index.html") + with open(index_path, "w", encoding="utf-8") as f: + f.write(html_content) + + return index_path + + +def process_files(source_dir=".", output_dir="pydocstyle_reports"): + """Process all Python files and generate HTML reports.""" + # Create output directory if it doesn't exist + os.makedirs(output_dir, exist_ok=True) + + # Dictionary to store file reports info: {file_path: (report_path, violation_count)} + file_reports = {} + + # Find all Python files + python_files = glob.glob(os.path.join(source_dir, "**/*.py"), recursive=True) + + for py_file in python_files: + # Get violations for the file + violations = list(check([py_file])) + + # Create HTML report + html_content = create_html_report(py_file, violations) + + # Generate output filename + relative_path = os.path.relpath(py_file, source_dir) + output_file = os.path.join(output_dir, f"{relative_path}.html") + + # Create necessary subdirectories + os.makedirs(os.path.dirname(output_file), exist_ok=True) + + # Write the report + with open(output_file, "w", encoding="utf-8") as f: + f.write(html_content) + + # Store report info + file_reports[relative_path] = (output_file, len(violations)) + + print(f"Generated report for {py_file} -> {output_file}") + + # Create index.html + index_path = create_index_html(file_reports, output_dir) + print(f"\nGenerated index at {index_path}") + print(f"Total files analyzed: {len(file_reports)}") + print(f"Total violations found: {sum(count for _, count in file_reports.values())}") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Generate HTML reports for PyDocStyle violations" + ) + parser.add_argument( + "--source", default=".", help="Source directory containing Python files" + ) + parser.add_argument( + "--output", + default="pydocstyle_reports", + help="Output directory for HTML reports", + ) + + args = parser.parse_args() + process_files(args.source, args.output) diff --git a/scripts/typing_report.py b/scripts/typing_report.py new file mode 100644 index 00000000..ac586c94 --- /dev/null +++ b/scripts/typing_report.py @@ -0,0 +1,347 @@ +import os +import glob +import json +from datetime import datetime +import subprocess +from dataclasses import dataclass +from typing import List, Dict, Tuple, Optional + + +@dataclass +class MypyError: + """Representation of a mypy error.""" + + file: str + line: int + column: Optional[int] + severity: str + message: str + error_type: Optional[str] + + +def check_mypy_installation() -> bool: + """Check if mypy is installed and supports JSON output.""" + try: + # Check mypy version + version_result = subprocess.run( + ["mypy", "--version"], capture_output=True, text=True + ) + if version_result.returncode != 0: + print("Error: mypy is not properly installed.") + print("Please install mypy with: pip install mypy") + return False + + print(f"Found mypy version: {version_result.stdout.strip()}") + return True + except FileNotFoundError: + print("Error: mypy is not installed.") + print("Please install mypy with: pip install mypy") + return False + + +def run_mypy(file_path: str) -> List[MypyError]: + """Run mypy on a single file and return the errors.""" + # First check if mypy is properly installed + if not check_mypy_installation(): + return [] + try: + print(f"Running mypy on {file_path}") + # Run mypy with --json output format + # Run mypy with show-error-codes to get more detailed output + result = subprocess.run( + [ + "mypy", + "--show-error-codes", + "--no-error-summary", + "--no-color-output", + file_path, + ], + capture_output=True, + text=True, + ) + + # Parse the text output instead of JSON + errors = [] + if result.stdout: + for line in result.stdout.splitlines(): + if ": error:" in line or ": note:" in line: + try: + # Parse lines like: file.py:10: error: Message [error-code] + file_info, message = line.split(": ", 1) + file_path, line_no = file_info.rsplit(":", 1) + severity, message = message.split(": ", 1) + + # Extract error code if present + error_type = None + if "[" in message and message.endswith("]"): + message, error_type = message.rsplit(" [", 1) + error_type = error_type[:-1] # Remove closing bracket + + errors.append( + MypyError( + file=file_path, + line=int(line_no), + column=None, # Column information not available in text output + severity=severity, + message=message, + error_type=error_type, + ) + ) + except (ValueError, IndexError) as e: + print(f"Error parsing line: {line}") + print(f"Error details: {e}") + continue + + return errors + + errors = [] + for error in output: + errors.append( + MypyError( + file=error["path"], + line=error["line"], + column=error.get("column"), + severity=error["severity"], + message=error["message"], + error_type=error.get("type"), + ) + ) + + return errors + except (subprocess.SubprocessError, json.JSONDecodeError) as e: + print(f"Error analyzing {file_path}: {e}") + print(f"stderr: {result.stderr}") + print(f"stdout: {result.stdout}") + return [] + + +def create_html_report(filename: str, errors: List[MypyError]) -> str: + """Create an HTML report for a single file's mypy errors.""" + html_content = f""" + + + + Mypy Report - {filename} + + + +
+

Mypy Type Check Report

+

File: {filename}

+

Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

+

Back to Index

+
+ """ + + if not errors: + html_content += "

✅ No type checking errors found!

" + else: + html_content += f"

Found {len(errors)} error(s):

" + for error in errors: + severity_class = "error" if error.severity == "error" else "warning" + location = f"line {error.line}" + if error.column is not None: + location += f", column {error.column}" + + html_content += f""" +
+

{error.error_type or error.severity.upper()}

+

{error.message}

+

{location}

+
+ """ + + html_content += """ + + + """ + return html_content + + +def create_index_html(file_reports: Dict[str, Tuple[str, int]], output_dir: str) -> str: + """Create an index.html with a summary table of all reports.""" + template = """ + + + + Mypy Reports Index + + + +

Mypy Type Check Reports

+
+

Generated: {}

+

Total files analyzed: {}

+

Total type errors found: {}

+
+ + + + + + + + + """ + + html_content = template.format( + datetime.now().strftime("%Y-%m-%d %H:%M:%S"), + len(file_reports), + sum(count for _, count in file_reports.values()), + ) + + # Sort files by error count (descending) and then by path + sorted_files = sorted(file_reports.items(), key=lambda x: (-x[1][1], x[0])) + + for file_path, (report_path, error_count) in sorted_files: + # Make the report path relative to the output directory + relative_report_path = os.path.relpath(report_path, output_dir) + + error_class = "no-errors" if error_count == 0 else "error-count" + html_content += f""" + + + + + """ + + html_content += """ + +
File PathType Errors
{file_path}{error_count}
+ + + """ + + index_path = os.path.join(output_dir, "index.html") + with open(index_path, "w", encoding="utf-8") as f: + f.write(html_content) + + return index_path + + +def process_files(source_dir: str = ".", output_dir: str = "mypy_reports") -> None: + """Process all Python files and generate HTML reports.""" + # Check mypy installation first + if not check_mypy_installation(): + print("Aborting due to mypy installation issues.") + return + # Create output directory if it doesn't exist + os.makedirs(output_dir, exist_ok=True) + + # Dictionary to store file reports info: {file_path: (report_path, error_count)} + file_reports = {} + + # Find all Python files + python_files = glob.glob(os.path.join(source_dir, "**/*.py"), recursive=True) + + for py_file in python_files: + # Run mypy and get errors + errors = run_mypy(py_file) + + # Create HTML report + html_content = create_html_report(py_file, errors) + + # Generate output filename + relative_path = os.path.relpath(py_file, source_dir) + output_file = os.path.join(output_dir, f"{relative_path}.html") + + # Create necessary subdirectories + os.makedirs(os.path.dirname(output_file), exist_ok=True) + + # Write the report + with open(output_file, "w", encoding="utf-8") as f: + f.write(html_content) + + # Store report info + file_reports[relative_path] = (output_file, len(errors)) + + print(f"Generated report for {py_file} -> {output_file}") + + # Create index.html + index_path = create_index_html(file_reports, output_dir) + print(f"\nGenerated index at {index_path}") + print(f"Total files analyzed: {len(file_reports)}") + print( + f"Total type errors found: {sum(count for _, count in file_reports.values())}" + ) + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Generate HTML reports for Mypy type checking results" + ) + parser.add_argument( + "--source", default=".", help="Source directory containing Python files" + ) + parser.add_argument( + "--output", + default="mypy_reports", + help="Output directory for HTML reports", + ) + + args = parser.parse_args() + process_files(args.source, args.output)