From ca63301fe93db220702fd9c5207f9b51f4d6807e Mon Sep 17 00:00:00 2001 From: Steven Hinojosa Date: Sun, 13 Jul 2025 10:51:25 -0700 Subject: [PATCH 1/2] feat: add package activity dashboard for tracking package maintenance - Add new pyos-packages section with package activity dashboard - Track last commit dates and identify inactive packages (6+ months) - Add sortable tables for all packages and inactive packages - Update existing package dashboard with activity tracking features - Reorganize navigation to group package-related metrics - Remove pyosmetrics dependency to fix Python 3.9 compatibility Closes #100 --- _quarto.yml | 6 +- peer-review/pyos-package-dashboard.qmd | 37 ++++-- pyos-packages/package-activity.qmd | 157 +++++++++++++++++++++++++ 3 files changed, 190 insertions(+), 10 deletions(-) create mode 100644 pyos-packages/package-activity.qmd diff --git a/_quarto.yml b/_quarto.yml index 584c9e1..bea2b55 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -33,11 +33,13 @@ website: - text: Peer review trends href: peer-review/review-trends.qmd - - text: "pyOpenSci Package Metrics" + - text: "pyOpenSci Packages" menu: + - text: Package Activity Dashboard + href: pyos-packages/package-activity.qmd - text: Accepted Package Metrics href: peer-review/accepted-packages.qmd - - text: Package Dashboard + - text: Package Dashboard href: peer-review/pyos-package-dashboard.qmd diff --git a/peer-review/pyos-package-dashboard.qmd b/peer-review/pyos-package-dashboard.qmd index 1305271..5250419 100644 --- a/peer-review/pyos-package-dashboard.qmd +++ b/peer-review/pyos-package-dashboard.qmd @@ -14,24 +14,18 @@ execute: import ast import warnings from pathlib import Path +from datetime import datetime, timedelta from itables import show import altair as alt import pandas as pd import plotly.express as px -# This is a local module that stores the plot theme -from pyosmetrics.plot_theme import load_poppins_font, register_and_enable_poppins_theme - pd.options.mode.chained_assignment = None pd.options.future.infer_string = True warnings.filterwarnings("ignore") -# Load the & register Poppins theme -load_poppins_font() -register_and_enable_poppins_theme() - package_data_path = Path.cwd().parents[0] / "_data" / "package_data.csv" package_df = pd.read_csv(package_data_path) @@ -39,12 +33,17 @@ package_df = pd.read_csv(package_data_path) package_df['gh_meta'] = package_df['gh_meta'].apply( lambda x: ast.literal_eval(x) if isinstance(x, str) else x ) +package_df['last_commit_date'] = package_df['gh_meta'].apply( + lambda x: x.get('last_commit') if isinstance(x, dict) else None +) +package_df['last_commit_date'] = pd.to_datetime(package_df['last_commit_date']) + # Extract "forks_count" value from the 'gh_meta' column package_df['forks_count'] = package_df['gh_meta'].apply( lambda x: x.get('forks_count' ) if isinstance(x, dict) else None ) - +# Extract "contrib_count" value from the 'gh_meta' column package_df['contrib_count'] = package_df['gh_meta'].apply( lambda x: x.get('contrib_count') if isinstance(x, dict) else None ) @@ -54,6 +53,28 @@ average_forks = int(package_df['forks_count'].mean()) ``` +## Row {height=25%} + +```{python} +#| title: "All Packages (Sortable by Last Commit Date)" + +sorted_df = package_df.sort_values("last_commit_date", ascending=False)[ + ["package_name", "package_description", "last_commit_date"] +] + +show(sorted_df, sortable=True, paging=True, maxBytes=0) +``` + +```{python} +#| title: "Inactive Packages (No commits in last 6 months)" + +six_months_ago = datetime.now() - timedelta(days=180) + +inactive_df = sorted_df[sorted_df["last_commit_date"] < six_months_ago] + +show(inactive_df, sortable=True, paging=True, maxBytes=0) +``` + ## Row {height=5%} ```{python} diff --git a/pyos-packages/package-activity.qmd b/pyos-packages/package-activity.qmd new file mode 100644 index 0000000..52400b9 --- /dev/null +++ b/pyos-packages/package-activity.qmd @@ -0,0 +1,157 @@ +--- +title: "pyOpenSci Package Activity Dashboard" +format: + dashboard: + scrolling: true +execute: + echo: false +--- + +## Row {height=0%} + +```{python} +#| echo: false +import ast +import warnings +from pathlib import Path +from datetime import datetime, timedelta + +from itables import show +import pandas as pd + +pd.options.mode.chained_assignment = None +pd.options.future.infer_string = True + +warnings.filterwarnings("ignore") + +# Load package data +package_data_path = Path.cwd().parents[0] / "_data" / "package_data.csv" +package_df = pd.read_csv(package_data_path) + +# Parse the "gh_meta" column back into dictionaries +package_df['gh_meta'] = package_df['gh_meta'].apply( + lambda x: ast.literal_eval(x) if isinstance(x, str) else x +) + +# Extract relevant fields from gh_meta +package_df['last_commit'] = package_df['gh_meta'].apply( + lambda x: x.get('last_commit') if isinstance(x, dict) else None +) + +package_df['stargazers_count'] = package_df['gh_meta'].apply( + lambda x: x.get('stargazers_count') if isinstance(x, dict) else None +) + +package_df['forks_count'] = package_df['gh_meta'].apply( + lambda x: x.get('forks_count') if isinstance(x, dict) else None +) + +package_df['open_issues_count'] = package_df['gh_meta'].apply( + lambda x: x.get('open_issues_count') if isinstance(x, dict) else None +) + +# Convert last_commit to datetime +package_df['last_commit_date'] = pd.to_datetime(package_df['last_commit'], errors='coerce') + +# Calculate days since last commit +today = datetime.now() +package_df['days_since_last_commit'] = (today - package_df['last_commit_date']).dt.days + +# Create a clean dataframe for display +display_df = package_df[['package_name', 'package_description', 'last_commit_date', + 'days_since_last_commit', 'stargazers_count', 'forks_count', + 'open_issues_count', 'repository_link']].copy() + +# Sort by last commit date (most recent first) +display_df = display_df.sort_values('last_commit_date', ascending=False) + +# Create inactive packages dataframe (6+ months = 180+ days) +six_months_ago = today - timedelta(days=180) +inactive_df = display_df[display_df['last_commit_date'] < six_months_ago].copy() + +# Format dates for display +display_df['last_commit_date'] = display_df['last_commit_date'].dt.strftime('%Y-%m-%d') +inactive_df['last_commit_date'] = inactive_df['last_commit_date'].dt.strftime('%Y-%m-%d') + +# Get current date for display +current_date = datetime.today().date() +today_str = current_date.strftime("%d %B %Y") +``` + +*Last updated: **`{python} today_str`*** + +## Row {height=5%} + +```{python} +#| content: valuebox +#| title: "Total Packages" + +total_packages = len(package_df) + +dict( + icon = "box2-heart", + color = "primary", + value = total_packages +) +``` + +```{python} +#| content: valuebox +#| title: "Active Packages (< 6 months)" + +active_packages = len(display_df) - len(inactive_df) + +dict( + icon = "activity", + color = "success", + value = active_packages +) +``` + +```{python} +#| content: valuebox +#| title: "Inactive Packages (6+ months)" + +inactive_count = len(inactive_df) + +dict( + icon = "pause-circle", + color = "warning", + value = inactive_count +) +``` + +## Row {height=45%} + +```{python} +#| title: "All Packages Sorted by Last Commit Date" + +# Rename columns for better display +display_columns = { + 'package_name': 'Package Name', + 'package_description': 'Description', + 'last_commit_date': 'Last Commit', + 'days_since_last_commit': 'Days Since Last Commit', + 'stargazers_count': 'Stars', + 'forks_count': 'Forks', + 'open_issues_count': 'Open Issues', + 'repository_link': 'Repository' +} + +display_table = display_df.rename(columns=display_columns) + +# Show the table +show(display_table) +``` + +## Row {height=45%} + +```{python} +#| title: "Packages Inactive for 6+ Months" + +if len(inactive_df) > 0: + inactive_table = inactive_df.rename(columns=display_columns) + show(inactive_table) +else: + print("🎉 Great news! All packages have been updated within the last 6 months.") +``` From 4feb4cf9fc71496aa474f2a6c6d5f8fdd29ac206 Mon Sep 17 00:00:00 2001 From: Steven Hinojosa Date: Sun, 13 Jul 2025 14:03:51 -0700 Subject: [PATCH 2/2] refactor: separate inactive packages and general package dashboards - Rename package-activity.qmd to inactive-packages.qmd for clarity - Inactive Packages Dashboard: Show ONLY packages with 6+ months inactivity - Package Dashboard: Remove activity filtering, treat all packages equally - Update navigation and titles to reflect focused purposes - Each dashboard now serves a distinct, clear purpose for different use cases --- _quarto.yml | 4 +-- peer-review/pyos-package-dashboard.qmd | 27 ++-------------- ...age-activity.qmd => inactive-packages.qmd} | 32 +++++++------------ 3 files changed, 15 insertions(+), 48 deletions(-) rename pyos-packages/{package-activity.qmd => inactive-packages.qmd} (86%) diff --git a/_quarto.yml b/_quarto.yml index bea2b55..13f6640 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -35,8 +35,8 @@ website: - text: "pyOpenSci Packages" menu: - - text: Package Activity Dashboard - href: pyos-packages/package-activity.qmd + - text: Inactive Packages + href: pyos-packages/inactive-packages.qmd - text: Accepted Package Metrics href: peer-review/accepted-packages.qmd - text: Package Dashboard diff --git a/peer-review/pyos-package-dashboard.qmd b/peer-review/pyos-package-dashboard.qmd index 5250419..95df4f8 100644 --- a/peer-review/pyos-package-dashboard.qmd +++ b/peer-review/pyos-package-dashboard.qmd @@ -14,7 +14,7 @@ execute: import ast import warnings from pathlib import Path -from datetime import datetime, timedelta + from itables import show import altair as alt @@ -33,10 +33,7 @@ package_df = pd.read_csv(package_data_path) package_df['gh_meta'] = package_df['gh_meta'].apply( lambda x: ast.literal_eval(x) if isinstance(x, str) else x ) -package_df['last_commit_date'] = package_df['gh_meta'].apply( - lambda x: x.get('last_commit') if isinstance(x, dict) else None -) -package_df['last_commit_date'] = pd.to_datetime(package_df['last_commit_date']) + # Extract "forks_count" value from the 'gh_meta' column package_df['forks_count'] = package_df['gh_meta'].apply( @@ -53,27 +50,7 @@ average_forks = int(package_df['forks_count'].mean()) ``` -## Row {height=25%} -```{python} -#| title: "All Packages (Sortable by Last Commit Date)" - -sorted_df = package_df.sort_values("last_commit_date", ascending=False)[ - ["package_name", "package_description", "last_commit_date"] -] - -show(sorted_df, sortable=True, paging=True, maxBytes=0) -``` - -```{python} -#| title: "Inactive Packages (No commits in last 6 months)" - -six_months_ago = datetime.now() - timedelta(days=180) - -inactive_df = sorted_df[sorted_df["last_commit_date"] < six_months_ago] - -show(inactive_df, sortable=True, paging=True, maxBytes=0) -``` ## Row {height=5%} diff --git a/pyos-packages/package-activity.qmd b/pyos-packages/inactive-packages.qmd similarity index 86% rename from pyos-packages/package-activity.qmd rename to pyos-packages/inactive-packages.qmd index 52400b9..3526766 100644 --- a/pyos-packages/package-activity.qmd +++ b/pyos-packages/inactive-packages.qmd @@ -1,5 +1,5 @@ --- -title: "pyOpenSci Package Activity Dashboard" +title: "pyOpenSci Inactive Packages Dashboard" format: dashboard: scrolling: true @@ -97,20 +97,21 @@ dict( ```{python} #| content: valuebox -#| title: "Active Packages (< 6 months)" +#| title: "Last Updated" -active_packages = len(display_df) - len(inactive_df) +from datetime import datetime +last_updated = datetime.now().strftime("%B %d, %Y") dict( - icon = "activity", - color = "success", - value = active_packages + icon = "calendar-check", + color = "info", + value = last_updated ) ``` ```{python} #| content: valuebox -#| title: "Inactive Packages (6+ months)" +#| title: "⚠️ Inactive Packages" inactive_count = len(inactive_df) @@ -121,15 +122,15 @@ dict( ) ``` -## Row {height=45%} +## Row {height=90%} ```{python} -#| title: "All Packages Sorted by Last Commit Date" +#| title: "⚠️ Inactive Packages (6+ Months Without Commits)" # Rename columns for better display display_columns = { 'package_name': 'Package Name', - 'package_description': 'Description', + 'package_description': 'Description', 'last_commit_date': 'Last Commit', 'days_since_last_commit': 'Days Since Last Commit', 'stargazers_count': 'Stars', @@ -138,17 +139,6 @@ display_columns = { 'repository_link': 'Repository' } -display_table = display_df.rename(columns=display_columns) - -# Show the table -show(display_table) -``` - -## Row {height=45%} - -```{python} -#| title: "Packages Inactive for 6+ Months" - if len(inactive_df) > 0: inactive_table = inactive_df.rename(columns=display_columns) show(inactive_table)