diff --git a/.github/scripts/get_integration_test_params.py b/.github/scripts/get_integration_test_params.py new file mode 100755 index 0000000000..af48d999c3 --- /dev/null +++ b/.github/scripts/get_integration_test_params.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +""" +This script is intended to be used as part of a GitHub Actions workflow in order to decide if the integration tests should: + +a) be triggered at all +b) if they should be triggered, should they be triggered for a subset of dialects or all dialects? + +The tests can be triggered manually by using the following directive in either the PR description or a PR comment: + + /integration-tests + +To limit them to a certain dialect or dialects, you can specify: + + /integration-tests dialects=bigquery,duckdb + +If you specify nothing, a `git diff` will be performed between your PR branch and the base branch. +If any files modified contain one of the SUPPORTED_DIALECTS in the filename, that dialect will be added to the +list of dialects to test. If no files match, the integration tests will be skipped. + +Note that integration tests in the remote workflow are only implemented for a subset of dialects. +If new ones are added, update the SUPPORTED_DIALECTS constant below. + +Each dialect is tested against itself (roundtrip) and duckdb (transpilation). +Supplying a dialect not in this list will cause the tests to get skipped. +""" + +import typing as t +import os +import sys +import json +import subprocess +from pathlib import Path + +TRIGGER = "/integration-test" +SUPPORTED_DIALECTS = ["duckdb", "bigquery", "snowflake"] + + +def get_dialects_from_manual_trigger(trigger: str) -> t.Set[str]: + """ + Takes a trigger string and parses out the supported dialects + + /integration_test -> [] + /integration_test dialects=bigquery -> ["bigquery"] + /integration_test dialects=bigquery,duckdb -> ["bigquery","duckdb"] + /integration_test dialects=exasol,duckdb -> ["duckdb"] + """ + + if not trigger.startswith(TRIGGER): + raise ValueError(f"Invalid trigger: {trigger}") + + # trim off start at first space (to cover both /integration-test and /integration-tests) + trigger_parts = trigger.split(" ")[1:] + + print(f"Parsing trigger args: {trigger_parts}") + + dialects: t.List[str] = [] + for part in trigger_parts: + # try to parse key=value pairs + maybe_kv = part.split("=", maxsplit=1) + if len(maybe_kv) >= 2: + k, v = maybe_kv[0], maybe_kv[1] + if k.lower().startswith("dialect"): + dialects.extend([d.lower().strip() for d in v.split(",")]) + + return {d for d in dialects if d in SUPPORTED_DIALECTS} + + +def get_dialects_from_git(base_ref: str, current_ref: str) -> t.Set[str]: + """ + Takes two git refs and runs `git diff --name-only ` + + If any of the returned file names contain a dialect from SUPPORTED_DIALECTS as + a substring, that dialect is included in the returned set + """ + print(f"Checking for files changed between '{base_ref}' and '{current_ref}'") + + result = subprocess.run( + ["git", "diff", "--name-only", base_ref, current_ref], + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + output = result.stdout.decode("utf8") + + if result.returncode != 0: + raise ValueError(f"Git process failed with exit code {result.returncode}:\n{output}") + + print(f"Git output:\n{output}") + + matching_dialects = [] + + for l in output.splitlines(): + l = l.strip().lower() + + matching_dialects.extend([d for d in SUPPORTED_DIALECTS if d in l]) + + return set(matching_dialects) + + +if __name__ == "__main__": + github_event_path = os.environ.get("GITHUB_EVENT_PATH") + github_sha = os.environ.get("GITHUB_SHA") + github_output = os.environ.get("GITHUB_OUTPUT") + + if ( + not os.environ.get("GITHUB_ACTIONS") + or not github_event_path + or not github_sha + or not github_output + ): + print(f"This script needs to run within GitHub Actions") + sys.exit(1) + + github_event_path = Path(github_event_path) + github_output = Path(github_output) + + with github_event_path.open("r") as f: + event: t.Dict[str, t.Any] = json.load(f) + + print(f"Handling event: \n" + json.dumps(event, indent=2)) + + # for issue_comment events, the body is located at github.event.comment.body + # since issues and PR's are the same thing in the GH backend, we also have to check if the issue type is "pull_request" + comment_body = ( + event.get("comment", {}).get("body") if event.get("issue", {}).get("pull_request") else None + ) + + # for pull_request events, the body is located at github.event.pull_request.body + pr_description = event.get("pull_request", {}).get("body") + + dialects = [] + should_run = False + + text_blob = f"{comment_body or ''}{pr_description or ''}" + text_blob_lines = [l.strip().lower() for l in text_blob.splitlines()] + if trigger_line := [l for l in text_blob_lines if l.startswith(TRIGGER)]: + # if the user has explicitly requested /integration-tests then use that + print(f"Handling trigger line: {trigger_line[0]}") + dialects = get_dialects_from_manual_trigger(trigger_line[0]) + should_run = True + else: + # otherwise, do a git diff and inspect the changed files + print(f"Explicit trigger line not detected; performing git diff") + pull_request_base_ref = event.get("pull_request", {}).get("base", {}).get("sha") + issue_comment_base_ref = event.get("before") + + base_ref = pull_request_base_ref or issue_comment_base_ref + if not base_ref: + raise ValueError("Unable to determine base ref") + + current_ref = github_sha + print(f"Comparing '{current_ref}' against '{base_ref}'") + # otherwise, look at git files changed and only trigger if a file relating + # to a supported dialect has changed + dialects = get_dialects_from_git(base_ref=base_ref, current_ref=github_sha) + if dialects: + should_run = True + + if should_run: + dialects_str = ( + f"the following dialects: {', '.join(dialects)}" + if dialects + else f"all supported dialects" + ) + print(f"Conclusion: should run tests for {dialects_str}") + else: + print(f"Conclusion: No tests to run") + + # write output variables + lines = [] + if should_run: + lines.append("skip=false") + if dialects: + lines.append(f"dialects={','.join(dialects)}") + else: + lines.append("skip=true") + + with github_output.open("a") as f: + f.writelines(f"{l}\n" for l in lines) diff --git a/.github/workflows/run-integration-tests.yml b/.github/workflows/run-integration-tests.yml index 7999e2556d..a635e6788c 100644 --- a/.github/workflows/run-integration-tests.yml +++ b/.github/workflows/run-integration-tests.yml @@ -10,15 +10,13 @@ on: types: [opened, synchronize, reopened] jobs: - run-integration-tests: - name: Run Integration Tests + should-run: + name: Check if integration tests should run runs-on: ubuntu-latest - if: | - (github.event_name == 'issue_comment' && - contains(github.event.comment.body, '/integration-test') && - github.event.issue.pull_request) || - (github.event_name == 'pull_request' && - contains(github.event.pull_request.body, '/integration-test')) + outputs: + skip: ${{ steps.test-parameters.outputs.skip }} + dialects: ${{ steps.test-parameters.outputs.dialects }} + steps: - name: Print debugging info run: | @@ -43,6 +41,29 @@ jobs: Ref Name: ${{ github.ref_name }} EOF + + - name: Checkout Code + uses: actions/checkout@v5 + with: + # we need to checkout all refs so we can run `git diff` + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.13' + + - name: Check if integration tests should be run + id: test-parameters + run: | + python .github/scripts/get_integration_test_params.py + + run-integration-tests: + name: Run Integration Tests + runs-on: ubuntu-latest + needs: should-run + if: needs.should-run.outputs.skip == 'false' + steps: - name: Acquire credentials id: app-token uses: actions/create-github-app-token@v2 @@ -70,7 +91,8 @@ jobs: -f sqlglot_ref=${{ github.sha }} \ -f sqlglot_pr_number=${{ github.event.number || github.event.issue.number }} \ -f sqlglot_branch_name=${{ github.head_ref || github.ref_name }} \ - -f correlation_id="$CORRELATION_ID" + -f correlation_id="$CORRELATION_ID" \ + -f dialects="${{ needs.should-run.outputs.dialects }}" echo "Triggered workflow using correlation id: $CORRELATION_ID" @@ -126,11 +148,37 @@ jobs: with: script: | // summary.json is downloaded from the remote workflow in the previous step - const summary = require("./summary.json") + const summary = require("./summary.json"); + + // Add a unique identifier to find this comment later + const commentIdentifier = ""; + const body = `${commentIdentifier}\n${summary.msg}`; - github.rest.issues.createComment({ + // Find existing comment + const { data: comments } = await github.rest.issues.listComments({ issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, - body: summary.msg - }) + }); + + const existingComment = comments.find(comment => + comment.body.includes(commentIdentifier) + ); + + if (existingComment) { + // Update existing comment + await github.rest.issues.updateComment({ + comment_id: existingComment.id, + owner: context.repo.owner, + repo: context.repo.repo, + body: body + }); + } else { + // Create new comment + await github.rest.issues.createComment({ + issue_number: context.issue.number, + owner: context.repo.owner, + repo: context.repo.repo, + body: body + }); + }