Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 178 additions & 0 deletions .github/scripts/get_integration_test_params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
#!/usr/bin/env python3
"""
This script is intended to be used as part of a GitHub Actions workflow in order to decide if the integration tests should:

a) be triggered at all
b) if they should be triggered, should they be triggered for a subset of dialects or all dialects?

The tests can be triggered manually by using the following directive in either the PR description or a PR comment:

/integration-tests

To limit them to a certain dialect or dialects, you can specify:

/integration-tests dialects=bigquery,duckdb

If you specify nothing, a `git diff` will be performed between your PR branch and the base branch.
If any files modified contain one of the SUPPORTED_DIALECTS in the filename, that dialect will be added to the
list of dialects to test. If no files match, the integration tests will be skipped.

Note that integration tests in the remote workflow are only implemented for a subset of dialects.
If new ones are added, update the SUPPORTED_DIALECTS constant below.

Each dialect is tested against itself (roundtrip) and duckdb (transpilation).
Supplying a dialect not in this list will cause the tests to get skipped.
"""

import typing as t
import os
import sys
import json
import subprocess
from pathlib import Path

TRIGGER = "/integration-test"
SUPPORTED_DIALECTS = ["duckdb", "bigquery", "snowflake"]


def get_dialects_from_manual_trigger(trigger: str) -> t.Set[str]:
"""
Takes a trigger string and parses out the supported dialects

/integration_test -> []
/integration_test dialects=bigquery -> ["bigquery"]
/integration_test dialects=bigquery,duckdb -> ["bigquery","duckdb"]
/integration_test dialects=exasol,duckdb -> ["duckdb"]
"""

if not trigger.startswith(TRIGGER):
raise ValueError(f"Invalid trigger: {trigger}")

# trim off start at first space (to cover both /integration-test and /integration-tests)
trigger_parts = trigger.split(" ")[1:]

print(f"Parsing trigger args: {trigger_parts}")

dialects: t.List[str] = []
for part in trigger_parts:
# try to parse key=value pairs
maybe_kv = part.split("=", maxsplit=1)
if len(maybe_kv) >= 2:
k, v = maybe_kv[0], maybe_kv[1]
if k.lower().startswith("dialect"):
dialects.extend([d.lower().strip() for d in v.split(",")])

return {d for d in dialects if d in SUPPORTED_DIALECTS}


def get_dialects_from_git(base_ref: str, current_ref: str) -> t.Set[str]:
"""
Takes two git refs and runs `git diff --name-only <base_ref> <current_ref>`

If any of the returned file names contain a dialect from SUPPORTED_DIALECTS as
a substring, that dialect is included in the returned set
"""
print(f"Checking for files changed between '{base_ref}' and '{current_ref}'")

result = subprocess.run(
["git", "diff", "--name-only", base_ref, current_ref],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
output = result.stdout.decode("utf8")

if result.returncode != 0:
raise ValueError(f"Git process failed with exit code {result.returncode}:\n{output}")

print(f"Git output:\n{output}")

matching_dialects = []

for l in output.splitlines():
l = l.strip().lower()

matching_dialects.extend([d for d in SUPPORTED_DIALECTS if d in l])

return set(matching_dialects)


if __name__ == "__main__":
github_event_path = os.environ.get("GITHUB_EVENT_PATH")
github_sha = os.environ.get("GITHUB_SHA")
github_output = os.environ.get("GITHUB_OUTPUT")

if (
not os.environ.get("GITHUB_ACTIONS")
or not github_event_path
or not github_sha
or not github_output
):
print(f"This script needs to run within GitHub Actions")
sys.exit(1)

github_event_path = Path(github_event_path)
github_output = Path(github_output)

with github_event_path.open("r") as f:
event: t.Dict[str, t.Any] = json.load(f)

print(f"Handling event: \n" + json.dumps(event, indent=2))

# for issue_comment events, the body is located at github.event.comment.body
# since issues and PR's are the same thing in the GH backend, we also have to check if the issue type is "pull_request"
comment_body = (
event.get("comment", {}).get("body") if event.get("issue", {}).get("pull_request") else None
)

# for pull_request events, the body is located at github.event.pull_request.body
pr_description = event.get("pull_request", {}).get("body")

dialects = []
should_run = False

text_blob = f"{comment_body or ''}{pr_description or ''}"
text_blob_lines = [l.strip().lower() for l in text_blob.splitlines()]
if trigger_line := [l for l in text_blob_lines if l.startswith(TRIGGER)]:
# if the user has explicitly requested /integration-tests then use that
print(f"Handling trigger line: {trigger_line[0]}")
dialects = get_dialects_from_manual_trigger(trigger_line[0])
should_run = True
else:
# otherwise, do a git diff and inspect the changed files
print(f"Explicit trigger line not detected; performing git diff")
pull_request_base_ref = event.get("pull_request", {}).get("base", {}).get("sha")
issue_comment_base_ref = event.get("before")

base_ref = pull_request_base_ref or issue_comment_base_ref
if not base_ref:
raise ValueError("Unable to determine base ref")

current_ref = github_sha
print(f"Comparing '{current_ref}' against '{base_ref}'")
# otherwise, look at git files changed and only trigger if a file relating
# to a supported dialect has changed
dialects = get_dialects_from_git(base_ref=base_ref, current_ref=github_sha)
if dialects:
should_run = True

if should_run:
dialects_str = (
f"the following dialects: {', '.join(dialects)}"
if dialects
else f"all supported dialects"
)
print(f"Conclusion: should run tests for {dialects_str}")
else:
print(f"Conclusion: No tests to run")

# write output variables
lines = []
if should_run:
lines.append("skip=false")
if dialects:
lines.append(f"dialects={','.join(dialects)}")
else:
lines.append("skip=true")

with github_output.open("a") as f:
f.writelines(f"{l}\n" for l in lines)
74 changes: 61 additions & 13 deletions .github/workflows/run-integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,13 @@ on:
types: [opened, synchronize, reopened]

jobs:
run-integration-tests:
name: Run Integration Tests
should-run:
name: Check if integration tests should run
runs-on: ubuntu-latest
if: |
(github.event_name == 'issue_comment' &&
contains(github.event.comment.body, '/integration-test') &&
github.event.issue.pull_request) ||
(github.event_name == 'pull_request' &&
contains(github.event.pull_request.body, '/integration-test'))
outputs:
skip: ${{ steps.test-parameters.outputs.skip }}
dialects: ${{ steps.test-parameters.outputs.dialects }}

steps:
- name: Print debugging info
run: |
Expand All @@ -43,6 +41,29 @@ jobs:

Ref Name: ${{ github.ref_name }}
EOF

- name: Checkout Code
uses: actions/checkout@v5
with:
# we need to checkout all refs so we can run `git diff`
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: '3.13'

- name: Check if integration tests should be run
id: test-parameters
run: |
python .github/scripts/get_integration_test_params.py

run-integration-tests:
name: Run Integration Tests
runs-on: ubuntu-latest
needs: should-run
if: needs.should-run.outputs.skip == 'false'
steps:
- name: Acquire credentials
id: app-token
uses: actions/create-github-app-token@v2
Expand Down Expand Up @@ -70,7 +91,8 @@ jobs:
-f sqlglot_ref=${{ github.sha }} \
-f sqlglot_pr_number=${{ github.event.number || github.event.issue.number }} \
-f sqlglot_branch_name=${{ github.head_ref || github.ref_name }} \
-f correlation_id="$CORRELATION_ID"
-f correlation_id="$CORRELATION_ID" \
-f dialects="${{ needs.should-run.outputs.dialects }}"

echo "Triggered workflow using correlation id: $CORRELATION_ID"

Expand Down Expand Up @@ -126,11 +148,37 @@ jobs:
with:
script: |
// summary.json is downloaded from the remote workflow in the previous step
const summary = require("./summary.json")
const summary = require("./summary.json");

// Add a unique identifier to find this comment later
const commentIdentifier = "<!-- integration-test-summary -->";
const body = `${commentIdentifier}\n${summary.msg}`;

github.rest.issues.createComment({
// Find existing comment
const { data: comments } = await github.rest.issues.listComments({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: summary.msg
})
});

const existingComment = comments.find(comment =>
comment.body.includes(commentIdentifier)
);

if (existingComment) {
// Update existing comment
await github.rest.issues.updateComment({
comment_id: existingComment.id,
owner: context.repo.owner,
repo: context.repo.repo,
body: body
});
} else {
// Create new comment
await github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: body
});
}