From e0fedeb419ba3f8aee0f488e046acb26e26ada66 Mon Sep 17 00:00:00 2001
From: Elijah Ahianyo <elijahahianyo@gmail.com>
Date: Mon, 25 Mar 2024 08:33:28 +0000
Subject: [PATCH] [REF-1682][REF-1683][REF-1684][REF-2283]Benchmark reflex
 package size and .web folder (#2880)

* remove codspeed.yml

* test upload job

* minor edits to get upload job working

* perhaps this works

* upload needs relex-install-size

* retrigger pipeline

* test only on ubuntu

* change to save to db directly

* oops

* size benchmarks

* .web for counter

* its timeout-minutes

* se integration.sh to run and kill process

* install psycopg2

* move .web runs to integration_tests.yml to save runners

* fix measurement-type for reflex-web

* add database url to env

* psycopg2

* test run ids

* commit sha gets the job done

* refactor

* add more matrices

* move reflex package size to integration_test.yml

* fix venv path

* test fix

* test fix

* use hyphen

* testing reflex build size

* ls for temp debug

* fix typo in command

* possible fix

* possible fix for windows

* remove dead code

* remove dead code

* remove unwanted comments

* refactor

* rebase on main

* pr_title

* remove pr_title from args

* debug

* should work now

* precommit fix

* print out package size for

* add shell

* test

* trying again

* dont use cached poetry to have accurate measurement of deps

* remove reflex deps calculation step from integration job

* fix script path

* precommit fix

* no real difference on different python versions so use 3.11.5

* remove ls keyword
---
 .github/workflows/benchmarks.yml              |  77 ++++++-
 .github/workflows/integration_tests.yml       |  23 +-
 scripts/benchmarks/benchmark_reflex_size.py   | 206 ++++++++++++++++++
 scripts/{ => benchmarks}/benchmarks.sh        |   0
 .../lighthouse_score_upload.py                |   0
 .../simple_app_benchmark_upload.py            |   0
 6 files changed, 298 insertions(+), 8 deletions(-)
 create mode 100644 scripts/benchmarks/benchmark_reflex_size.py
 rename scripts/{ => benchmarks}/benchmarks.sh (100%)
 rename scripts/{ => benchmarks}/lighthouse_score_upload.py (100%)
 rename scripts/{ => benchmarks}/simple_app_benchmark_upload.py (100%)

diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index 62360a9966e..573cf338aaa 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -22,6 +22,8 @@ env:
   TELEMETRY_ENABLED: false
   NODE_OPTIONS: '--max_old_space_size=4096'
   DATABASE_URL: ${{ secrets.DATABASE_URL }}
+  PR_TITLE: ${{ github.event.pull_request.title }}
+
 
 jobs:
   reflex-web:
@@ -63,16 +65,15 @@ jobs:
         run: |
           # Check that npm is home
           npm -v
-          poetry run bash scripts/benchmarks.sh ./reflex-web prod
+          poetry run bash scripts/benchmarks/benchmarks.sh ./reflex-web prod
         env:
           LHCI_GITHUB_APP_TOKEN: $
       - name: Run Benchmarks
         # Only run if the database creds are available in this context.
         if: ${{ env.DATABASE_URL }}
-        run: poetry run python scripts/lighthouse_score_upload.py "$GITHUB_SHA" ./integration/benchmarks/.lighthouseci
+        run: poetry run python scripts/benchmarks/lighthouse_score_upload.py "$GITHUB_SHA" ./integration/benchmarks/.lighthouseci
         env:
           GITHUB_SHA: ${{ github.sha }}
-          PR_TITLE: ${{ github.event.pull_request.title }}
 
   simple-apps-benchmarks:
     env:
@@ -119,11 +120,75 @@ jobs:
       - name: Upload benchmark results
         # Only run if the database creds are available in this context.
         if: ${{ env.DATABASE_URL }}
-        env:
-          PR_TITLE: ${{ github.event.pull_request.title }}
         run:
-          poetry run python scripts/simple_app_benchmark_upload.py --os "${{ matrix.os }}"
+          poetry run python scripts/benchmarks/simple_app_benchmark_upload.py --os "${{ matrix.os }}"
           --python-version "${{ matrix.python-version }}" --commit-sha "${{ github.sha }}"
           --benchmark-json "${{ env.OUTPUT_FILE }}"
           --db-url "${{ env.DATABASE_URL }}" --branch-name "${{ github.head_ref || github.ref_name }}"
           --event-type "${{ github.event_name }}" --actor "${{ github.actor }}" --pr-id "${{ github.event.pull_request.id }}"
+
+  reflex-build-size:
+    timeout-minutes: 30
+    strategy:
+      # Prioritize getting more information out of the workflow (even if something fails)
+      fail-fast: false
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/actions/setup_build_env
+        with:
+          python-version: 3.11.5
+          run-poetry-install: true
+          create-venv-at-path: .venv
+      - name: Install additional dependencies for DB access
+        run: poetry run pip install psycopg2-binary
+      - name: Build reflex
+        run: | 
+          poetry build
+      - name: Upload benchmark results
+        # Only run if the database creds are available in this context.
+        if: ${{ env.DATABASE_URL }}
+        run: poetry run python scripts/benchmarks/benchmark_reflex_size.py --os ubuntu-latest
+          --python-version 3.11.5 --commit-sha "${{ github.sha }}" --pr-id "${{ github.event.pull_request.id }}" 
+          --db-url "${{ env.DATABASE_URL }}" --branch-name "${{ github.head_ref || github.ref_name }}"
+          --measurement-type "reflex-build" --path ./dist
+
+  reflex-plus-dependency-size:
+    timeout-minutes: 30
+    strategy:
+      # Prioritize getting more information out of the workflow (even if something fails)
+      fail-fast: false
+      matrix:
+        # Show OS combos first in GUI
+        os: [ ubuntu-latest, windows-latest, macos-latest ]
+        python-version: [ '3.11.5']
+
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Poetry
+        uses: snok/install-poetry@v1
+        with:
+          version : 1.3.1
+          virtualenvs-create: true
+          virtualenvs-in-project: true
+          virtualenvs-path: .venv
+
+      - name: Run poetry install
+        shell: bash
+        run: |
+          python -m venv .venv
+          source .venv/*/activate
+          poetry install --without dev --no-interaction --no-root  
+
+      - name: Install additional dependencies for DB access
+        run: poetry run pip install psycopg2-binary
+
+      - if: ${{ env.DATABASE_URL }}
+        name: calculate and upload size
+        run: poetry run python scripts/benchmarks/benchmark_reflex_size.py --os "${{ matrix.os }}"
+          --python-version "${{ matrix.python-version }}" --commit-sha "${{ github.sha }}"
+          --pr-id "${{ github.event.pull_request.id }}" --db-url "${{ env.DATABASE_URL }}"
+          --branch-name "${{ github.head_ref || github.ref_name }}"
+          --measurement-type "reflex-package" --path ./.venv
\ No newline at end of file
diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
index e37fe11b1e4..172e8b2bc81 100644
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -26,6 +26,9 @@ env:
   PYTHONIOENCODING: 'utf8'
   TELEMETRY_ENABLED: false
   NODE_OPTIONS: '--max_old_space_size=4096'
+  DATABASE_URL: ${{ secrets.DATABASE_URL }}
+  PR_TITLE: ${{ github.event.pull_request.title }}
+
 
 jobs:
   example-counter:
@@ -60,17 +63,17 @@ jobs:
           python-version: ${{ matrix.python-version }}
           run-poetry-install: true
           create-venv-at-path: .venv
-
       - name: Clone Reflex Examples Repo
         uses: actions/checkout@v4
         with:
           repository: reflex-dev/reflex-examples
           path: reflex-examples
-
       - name: Install requirements for counter example
         working-directory: ./reflex-examples/counter
         run: |
           poetry run pip install -r requirements.txt
+      - name: Install additional dependencies for DB access
+        run: poetry run pip install psycopg2-binary
       - name: Check export --backend-only before init for counter example
         working-directory: ./reflex-examples/counter
         run: |
@@ -91,6 +94,13 @@ jobs:
           # Check that npm is home
           npm -v
           poetry run bash scripts/integration.sh ./reflex-examples/counter dev
+      - name: Measure and upload .web size
+        if: ${{ env.DATABASE_URL }}
+        run: poetry run python scripts/benchmarks/benchmark_reflex_size.py --os "${{ matrix.os }}"
+          --python-version "${{ matrix.python-version }}" --commit-sha "${{ github.sha }}"
+          --pr-id "${{ github.event.pull_request.id }}" --db-url "${{ env.DATABASE_URL }}" 
+          --branch-name "${{ github.head_ref || github.ref_name }}"
+          --measurement-type "counter-app-dot-web" --path ./reflex-examples/counter/.web
 
   reflex-web:
     strategy:
@@ -121,6 +131,8 @@ jobs:
       - name: Install Requirements for reflex-web
         working-directory: ./reflex-web
         run: poetry run pip install -r requirements.txt
+      - name: Install additional dependencies for DB access
+        run: poetry run pip install psycopg2-binary
       - name: Init Website for reflex-web
         working-directory: ./reflex-web
         run: poetry run reflex init
@@ -129,3 +141,10 @@ jobs:
           # Check that npm is home
           npm -v
           poetry run bash scripts/integration.sh ./reflex-web prod
+      - name: Measure and upload .web size
+        if: ${{ env.DATABASE_URL }}
+        run: poetry run python scripts/benchmarks/benchmark_reflex_size.py --os "${{ matrix.os }}"
+          --python-version "${{ matrix.python-version }}" --commit-sha "${{ github.sha }}"
+          --pr-id "${{ github.event.pull_request.id }}" 
+          --db-url "${{ env.DATABASE_URL }}" --branch-name "${{ github.head_ref || github.ref_name }}"
+          --measurement-type "reflex-web-dot-web" --path ./reflex-web/.web
diff --git a/scripts/benchmarks/benchmark_reflex_size.py b/scripts/benchmarks/benchmark_reflex_size.py
new file mode 100644
index 00000000000..1bb5e535d7b
--- /dev/null
+++ b/scripts/benchmarks/benchmark_reflex_size.py
@@ -0,0 +1,206 @@
+"""Checks the size of a specific directory and uploads result."""
+import argparse
+import os
+import subprocess
+from datetime import datetime
+
+import psycopg2
+
+
+def get_directory_size(directory):
+    """Get the size of a directory in bytes.
+
+    Args:
+        directory: The directory to check.
+
+    Returns:
+        The size of the dir in bytes.
+    """
+    total_size = 0
+    for dirpath, _, filenames in os.walk(directory):
+        for f in filenames:
+            fp = os.path.join(dirpath, f)
+            total_size += os.path.getsize(fp)
+    return total_size
+
+
+def get_python_version(venv_path, os_name):
+    """Get the python version of python in a virtual env.
+
+    Args:
+        venv_path: Path to virtual environment.
+        os_name: Name of os.
+
+    Returns:
+        The python version.
+    """
+    python_executable = (
+        os.path.join(venv_path, "bin", "python")
+        if "windows" not in os_name
+        else os.path.join(venv_path, "Scripts", "python.exe")
+    )
+    try:
+        output = subprocess.check_output(
+            [python_executable, "--version"], stderr=subprocess.STDOUT
+        )
+        python_version = output.decode("utf-8").strip().split()[1]
+        return ".".join(python_version.split(".")[:-1])
+    except subprocess.CalledProcessError:
+        return None
+
+
+def get_package_size(venv_path, os_name):
+    """Get the size of a specified package.
+
+    Args:
+        venv_path: The path to the venv.
+        os_name: Name of os.
+
+    Returns:
+        The total size of the package in bytes.
+
+    Raises:
+        ValueError: when venv does not exist or python version is None.
+    """
+    python_version = get_python_version(venv_path, os_name)
+    if python_version is None:
+        raise ValueError("Error: Failed to determine Python version.")
+
+    is_windows = "windows" in os_name
+
+    full_path = (
+        ["lib", f"python{python_version}", "site-packages"]
+        if not is_windows
+        else ["Lib", "site-packages"]
+    )
+
+    package_dir = os.path.join(venv_path, *full_path)
+    if not os.path.exists(package_dir):
+        raise ValueError(
+            "Error: Virtual environment does not exist or is not activated."
+        )
+
+    total_size = get_directory_size(package_dir)
+    return total_size
+
+
+def insert_benchmarking_data(
+    db_connection_url: str,
+    os_type_version: str,
+    python_version: str,
+    measurement_type: str,
+    commit_sha: str,
+    pr_title: str,
+    branch_name: str,
+    pr_id: str,
+    path: str,
+):
+    """Insert the benchmarking data into the database.
+
+    Args:
+        db_connection_url: The URL to connect to the database.
+        os_type_version: The OS type and version to insert.
+        python_version: The Python version to insert.
+        measurement_type: The type of metric to measure.
+        commit_sha: The commit SHA to insert.
+        pr_title: The PR title to insert.
+        branch_name: The name of the branch.
+        pr_id: The id of the PR.
+        path: The path to the dir or file to check size.
+    """
+    if measurement_type == "reflex-package":
+        size = get_package_size(path, os_type_version)
+    else:
+        size = get_directory_size(path)
+
+    # Get the current timestamp
+    current_timestamp = datetime.now()
+
+    # Connect to the database and insert the data
+    with psycopg2.connect(db_connection_url) as conn, conn.cursor() as cursor:
+        insert_query = """
+            INSERT INTO size_benchmarks (os, python_version, commit_sha, created_at, pr_title, branch_name, pr_id, measurement_type, size)
+            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s);
+            """
+        cursor.execute(
+            insert_query,
+            (
+                os_type_version,
+                python_version,
+                commit_sha,
+                current_timestamp,
+                pr_title,
+                branch_name,
+                pr_id,
+                measurement_type,
+                round(
+                    size / (1024 * 1024), 3
+                ),  # save size in mb and round to 3 places.
+            ),
+        )
+        # Commit the transaction
+        conn.commit()
+
+
+def main():
+    """Runs the benchmarks and inserts the results."""
+    parser = argparse.ArgumentParser(description="Run benchmarks and process results.")
+    parser.add_argument(
+        "--os", help="The OS type and version to insert into the database."
+    )
+    parser.add_argument(
+        "--python-version", help="The Python version to insert into the database."
+    )
+    parser.add_argument(
+        "--commit-sha", help="The commit SHA to insert into the database."
+    )
+    parser.add_argument(
+        "--db-url",
+        help="The URL to connect to the database.",
+        required=True,
+    )
+    parser.add_argument(
+        "--pr-title",
+        help="The PR title to insert into the database.",
+    )
+    parser.add_argument(
+        "--branch-name",
+        help="The current branch",
+        required=True,
+    )
+    parser.add_argument(
+        "--pr-id",
+        help="The pr id",
+        required=True,
+    )
+    parser.add_argument(
+        "--measurement-type",
+        help="The type of metric to be checked.",
+        required=True,
+    )
+    parser.add_argument(
+        "--path",
+        help="the current path to check size.",
+        required=True,
+    )
+    args = parser.parse_args()
+
+    # Get the PR title from env or the args. For the PR merge or push event, there is no PR title, leaving it empty.
+    pr_title = args.pr_title or os.getenv("PR_TITLE", "")
+
+    # Insert the data into the database
+    insert_benchmarking_data(
+        db_connection_url=args.db_url,
+        os_type_version=args.os,
+        python_version=args.python_version,
+        measurement_type=args.measurement_type,
+        commit_sha=args.commit_sha,
+        pr_title=pr_title,
+        branch_name=args.branch_name,
+        pr_id=args.pr_id,
+        path=args.path,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/benchmarks.sh b/scripts/benchmarks/benchmarks.sh
similarity index 100%
rename from scripts/benchmarks.sh
rename to scripts/benchmarks/benchmarks.sh
diff --git a/scripts/lighthouse_score_upload.py b/scripts/benchmarks/lighthouse_score_upload.py
similarity index 100%
rename from scripts/lighthouse_score_upload.py
rename to scripts/benchmarks/lighthouse_score_upload.py
diff --git a/scripts/simple_app_benchmark_upload.py b/scripts/benchmarks/simple_app_benchmark_upload.py
similarity index 100%
rename from scripts/simple_app_benchmark_upload.py
rename to scripts/benchmarks/simple_app_benchmark_upload.py