Merge pull request expectedparrot#1413 from expectedparrot/static_ana…

…lysis_tools Tools for automatically generating typing/docstring reports
arulmabr · Dec 22, 2024 · 9fe0814 · 9fe0814
2 parents 60762ce + 4403d09
commit 9fe0814
Show file tree

Hide file tree

Showing 4 changed files with 588 additions and 15 deletions.
diff --git a/Makefile b/Makefile
@@ -120,6 +120,14 @@ docs-view: ## View documentation
 docstrings: ## Check docstrings
 	pydocstyle edsl
 
+style-report: ## Check docstrings and generate a report
+	python scripts/style_report.py --source edsl --output style_report
+	open style_report/index.html
+
+typing-report:
+	python scripts/typing_report.py --source edsl --output typing_report
+	open typing_report/index.html
+
 format: ## Run code autoformatters (black).
 	pre-commit install
 	pre-commit run black-jupyter --all-files --all

diff --git a/edsl/scenarios/Scenario.py b/edsl/scenarios/Scenario.py
@@ -19,6 +19,8 @@
 
 
 class DisplayJSON:
+    """Display a dictionary as JSON."""
+
     def __init__(self, input_dict: dict):
         self.text = json.dumps(input_dict, indent=4)
 
@@ -27,6 +29,8 @@ def __repr__(self):
 
 
 class DisplayYAML:
+    """Display a dictionary as YAML."""
+
     def __init__(self, input_dict: dict):
         import yaml
 
@@ -41,7 +45,7 @@ class Scenario(Base, UserDict, ScenarioHtmlMixin):
 
     __documentation__ = "https://docs.expectedparrot.com/en/latest/scenarios.html"
 
-    def __init__(self, data: Optional[dict] = None, name: str = None):
+    def __init__(self, data: Optional[dict] = None, name: Optional[str] = None):
         """Initialize a new Scenario.
 
         :param data: A dictionary of keys/values for parameterizing questions.
@@ -66,7 +70,6 @@ def replicate(self, n: int) -> "ScenarioList":
         :param n: The number of times to replicate the scenario.
 
         Example:
-
         >>> s = Scenario({"food": "wood chips"})
         >>> s.replicate(2)
         ScenarioList([Scenario({'food': 'wood chips'}), Scenario({'food': 'wood chips'})])
@@ -135,7 +138,7 @@ def __add__(self, other_scenario: Scenario) -> Scenario:
 
     def rename(
         self,
-        old_name_or_replacement_dict: Union[str, dict],
+        old_name_or_replacement_dict: Union[str, dict[str, str]],
         new_name: Optional[str] = None,
     ) -> Scenario:
         """Rename the keys of a scenario.
@@ -267,7 +270,7 @@ def select(self, list_of_keys: Collection[str]) -> "Scenario":
             new_scenario[key] = self[key]
         return new_scenario
 
-    def drop(self, list_of_keys: List[str]) -> "Scenario":
+    def drop(self, list_of_keys: Collection[str]) -> "Scenario":
         """Drop a subset of keys from a scenario.
 
         :param list_of_keys: The keys to drop.
@@ -455,29 +458,30 @@ def from_dict(cls, d: dict) -> "Scenario":
         return cls(d)
 
     def _table(self) -> tuple[dict, list]:
-        """Prepare generic table data."""
+        """Prepare generic table data.
+        >>> s = Scenario({"food": "wood chips"})
+        >>> s._table()
+        ([{'Attribute': 'data', 'Value': "{'food': 'wood chips'}"}, {'Attribute': 'name', 'Value': 'None'}], ['Attribute', 'Value'])
+        """
         table_data = []
         for attr_name, attr_value in self.__dict__.items():
             table_data.append({"Attribute": attr_name, "Value": repr(attr_value)})
         column_names = ["Attribute", "Value"]
         return table_data, column_names
 
     @classmethod
-    def example(cls, randomize: bool = False, has_image=False) -> Scenario:
+    def example(cls, randomize: bool = False) -> Scenario:
         """
         Returns an example Scenario instance.
 
         :param randomize: If True, adds a random string to the value of the example key.
         """
-        if not has_image:
-            addition = "" if not randomize else str(uuid4())
-            return cls(
-                {
-                    "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
-                }
-            )
-        else:
-            return cls.from_image(cls.example_image())
+        addition = "" if not randomize else str(uuid4())
+        return cls(
+            {
+                "persona": f"A reseacher studying whether LLMs can be used to generate surveys.{addition}",
+            }
+        )
 
     def code(self) -> List[str]:
         """Return the code for the scenario."""

diff --git a/scripts/style_report.py b/scripts/style_report.py
@@ -0,0 +1,214 @@
+import os
+import glob
+from pydocstyle import check
+from datetime import datetime
+
+
+def create_html_report(filename, violations):
+    """Create an HTML report for a single file's pydocstyle violations."""
+    html_content = f"""
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>PyDocStyle Report - {filename}</title>
+        <style>
+            body {{ font-family: Arial, sans-serif; margin: 20px; }}
+            .violation {{ 
+                background-color: #f8f8f8;
+                border-left: 4px solid #e74c3c;
+                margin: 10px 0;
+                padding: 10px;
+            }}
+            .violation-code {{ 
+                font-weight: bold;
+                color: #c0392b;
+            }}
+            .file-info {{
+                background-color: #eee;
+                padding: 10px;
+                margin-bottom: 20px;
+            }}
+            .timestamp {{
+                color: #666;
+                font-size: 0.9em;
+            }}
+        </style>
+    </head>
+    <body>
+        <div class="file-info">
+            <h2>PyDocStyle Report</h2>
+            <p>File: {filename}</p>
+            <p class="timestamp">Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
+            <p><a href="index.html">Back to Index</a></p>
+        </div>
+    """
+
+    if not violations:
+        html_content += "<p>✅ No documentation style violations found!</p>"
+    else:
+        html_content += f"<h3>Found {len(violations)} violation(s):</h3>"
+        for violation in violations:
+            html_content += f"""
+            <div class="violation">
+                <p><span class="violation-code">{violation.code}</span> at line {violation.line}</p>
+                <p>{violation.message}</p>
+                <pre><code>{violation.source}</code></pre>
+            </div>
+            """
+
+    html_content += """
+    </body>
+    </html>
+    """
+    return html_content
+
+
+def create_index_html(file_reports, output_dir):
+    """Create an index.html with a summary table of all reports."""
+    template = """
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>PyDocStyle Reports Index</title>
+        <style>
+            body {{ font-family: Arial, sans-serif; margin: 20px; }}
+            table {{
+                width: 100%;
+                border-collapse: collapse;
+                margin-top: 20px;
+            }}
+            th, td {{
+                padding: 12px;
+                text-align: left;
+                border-bottom: 1px solid #ddd;
+            }}
+            th {{
+                background-color: #f5f5f5;
+            }}
+            tr:hover {{
+                background-color: #f8f8f8;
+            }}
+            .error-count {{
+                font-weight: bold;
+                color: #e74c3c;
+            }}
+            .no-errors {{
+                color: #27ae60;
+            }}
+            .summary {{
+                background-color: #eee;
+                padding: 15px;
+                margin-bottom: 20px;
+                border-radius: 4px;
+            }}
+        </style>
+    </head>
+    <body>
+        <h1>PyDocStyle Reports Index</h1>
+        <div class="summary">
+            <p>Generated: {}</p>
+            <p>Total files analyzed: {}</p>
+            <p>Total violations found: {}</p>
+        </div>
+        <table>
+            <thead>
+                <tr>
+                    <th>File Path</th>
+                    <th>Violations</th>
+                </tr>
+            </thead>
+            <tbody>
+    """
+    html_content = template.format(
+        datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        len(file_reports),
+        sum(count for _, count in file_reports.values()),
+    )
+
+    # Sort files by violation count (descending) and then by path
+    sorted_files = sorted(file_reports.items(), key=lambda x: (-x[1][1], x[0]))
+
+    for file_path, (report_path, violation_count) in sorted_files:
+        # Make the report path relative to the output directory
+        relative_report_path = os.path.relpath(report_path, output_dir)
+
+        error_class = "no-errors" if violation_count == 0 else "error-count"
+        html_content += f"""
+            <tr>
+                <td><a href="{relative_report_path}">{file_path}</a></td>
+                <td class="{error_class}">{violation_count}</td>
+            </tr>
+        """
+
+    html_content += """
+            </tbody>
+        </table>
+    </body>
+    </html>
+    """
+
+    index_path = os.path.join(output_dir, "index.html")
+    with open(index_path, "w", encoding="utf-8") as f:
+        f.write(html_content)
+
+    return index_path
+
+
+def process_files(source_dir=".", output_dir="pydocstyle_reports"):
+    """Process all Python files and generate HTML reports."""
+    # Create output directory if it doesn't exist
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Dictionary to store file reports info: {file_path: (report_path, violation_count)}
+    file_reports = {}
+
+    # Find all Python files
+    python_files = glob.glob(os.path.join(source_dir, "**/*.py"), recursive=True)
+
+    for py_file in python_files:
+        # Get violations for the file
+        violations = list(check([py_file]))
+
+        # Create HTML report
+        html_content = create_html_report(py_file, violations)
+
+        # Generate output filename
+        relative_path = os.path.relpath(py_file, source_dir)
+        output_file = os.path.join(output_dir, f"{relative_path}.html")
+
+        # Create necessary subdirectories
+        os.makedirs(os.path.dirname(output_file), exist_ok=True)
+
+        # Write the report
+        with open(output_file, "w", encoding="utf-8") as f:
+            f.write(html_content)
+
+        # Store report info
+        file_reports[relative_path] = (output_file, len(violations))
+
+        print(f"Generated report for {py_file} -> {output_file}")
+
+    # Create index.html
+    index_path = create_index_html(file_reports, output_dir)
+    print(f"\nGenerated index at {index_path}")
+    print(f"Total files analyzed: {len(file_reports)}")
+    print(f"Total violations found: {sum(count for _, count in file_reports.values())}")
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Generate HTML reports for PyDocStyle violations"
+    )
+    parser.add_argument(
+        "--source", default=".", help="Source directory containing Python files"
+    )
+    parser.add_argument(
+        "--output",
+        default="pydocstyle_reports",
+        help="Output directory for HTML reports",
+    )
+
+    args = parser.parse_args()
+    process_files(args.source, args.output)