Skip to content

Commit

Permalink
Improve speed of tests by not creating connections at parse time (apa…
Browse files Browse the repository at this point in the history
…che#45690)

The DAG serialization tests load all of the example and system test DAGs, and
there were two places that these tests opened connections at parse time
resulting in loads of extra of test time.

- The SystemTestContextBuilder was trying to fetch things from SSM. This was
  addressed by adding a functools.cache on the function
- The Bedrock example dag was setting/caching the underlying conn object
  globally. This was addressed by making the Airflow connection a global,
  rather than the Bedrock conn. This fix is not _great_, but it does massively
  help

Before:

> 111 passed, 1 warning in 439.37s (0:07:19)

After:

> 111 passed, 1 warning in 71.76s (0:01:11)
  • Loading branch information
ashb authored and HariGS-DB committed Jan 16, 2025
1 parent 6024075 commit 8d6f2c4
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 17 deletions.
11 changes: 9 additions & 2 deletions providers/tests/amazon/aws/system/utils/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import os
import sys
from io import StringIO
from unittest.mock import ANY, patch
from unittest.mock import patch

import pytest
from moto import mock_aws
Expand Down Expand Up @@ -79,8 +79,15 @@ def test_fetch_variable_success(
) -> None:
mock_getenv.return_value = env_value or ssm_value

result = utils.fetch_variable(ANY, default_value) if default_value else utils.fetch_variable(ANY_STR)
utils._fetch_from_ssm.cache_clear()

result = (
utils.fetch_variable("some_key", default_value)
if default_value
else utils.fetch_variable(ANY_STR)
)

utils._fetch_from_ssm.cache_clear()
assert result == expected_result

def test_fetch_variable_no_value_found_raises_exception(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,15 @@ def create_opensearch_policies(bedrock_role_arn: str, collection_name: str, poli

def _create_security_policy(name, policy_type, policy):
try:
aoss_client.create_security_policy(name=name, policy=json.dumps(policy), type=policy_type)
aoss_client.conn.create_security_policy(name=name, policy=json.dumps(policy), type=policy_type)
except ClientError as e:
if e.response["Error"]["Code"] == "ConflictException":
log.info("OpenSearch security policy %s already exists.", name)
raise

def _create_access_policy(name, policy_type, policy):
try:
aoss_client.create_access_policy(name=name, policy=json.dumps(policy), type=policy_type)
aoss_client.conn.create_access_policy(name=name, policy=json.dumps(policy), type=policy_type)
except ClientError as e:
if e.response["Error"]["Code"] == "ConflictException":
log.info("OpenSearch data access policy %s already exists.", name)
Expand Down Expand Up @@ -205,9 +205,9 @@ def create_collection(collection_name: str):
:param collection_name: The name of the Collection to create.
"""
log.info("\nCreating collection: %s.", collection_name)
return aoss_client.create_collection(name=collection_name, type="VECTORSEARCH")["createCollectionDetail"][
"id"
]
return aoss_client.conn.create_collection(name=collection_name, type="VECTORSEARCH")[
"createCollectionDetail"
]["id"]


@task
Expand Down Expand Up @@ -321,7 +321,7 @@ def get_collection_arn(collection_id: str):
"""
return next(
colxn["arn"]
for colxn in aoss_client.list_collections()["collectionSummaries"]
for colxn in aoss_client.conn.list_collections()["collectionSummaries"]
if colxn["id"] == collection_id
)

Expand All @@ -340,7 +340,9 @@ def delete_data_source(knowledge_base_id: str, data_source_id: str):
:param data_source_id: The unique identifier of the data source to delete.
"""
log.info("Deleting data source %s from Knowledge Base %s.", data_source_id, knowledge_base_id)
bedrock_agent_client.delete_data_source(dataSourceId=data_source_id, knowledgeBaseId=knowledge_base_id)
bedrock_agent_client.conn.delete_data_source(
dataSourceId=data_source_id, knowledgeBaseId=knowledge_base_id
)


# [END howto_operator_bedrock_delete_data_source]
Expand All @@ -359,7 +361,7 @@ def delete_knowledge_base(knowledge_base_id: str):
:param knowledge_base_id: The unique identifier of the knowledge base to delete.
"""
log.info("Deleting Knowledge Base %s.", knowledge_base_id)
bedrock_agent_client.delete_knowledge_base(knowledgeBaseId=knowledge_base_id)
bedrock_agent_client.conn.delete_knowledge_base(knowledgeBaseId=knowledge_base_id)


# [END howto_operator_bedrock_delete_knowledge_base]
Expand Down Expand Up @@ -397,7 +399,7 @@ def delete_collection(collection_id: str):
:param collection_id: ID of the collection to be indexed.
"""
log.info("Deleting collection %s.", collection_id)
aoss_client.delete_collection(id=collection_id)
aoss_client.conn.delete_collection(id=collection_id)


@task(trigger_rule=TriggerRule.ALL_DONE)
Expand All @@ -408,26 +410,26 @@ def delete_opensearch_policies(collection_name: str):
:param collection_name: All policies in the given collection name will be deleted.
"""

access_policies = aoss_client.list_access_policies(
access_policies = aoss_client.conn.list_access_policies(
type="data", resource=[f"collection/{collection_name}"]
)["accessPolicySummaries"]
log.info("Found access policies for %s: %s", collection_name, access_policies)
if not access_policies:
raise Exception("No access policies found?")
for policy in access_policies:
log.info("Deleting access policy for %s: %s", collection_name, policy["name"])
aoss_client.delete_access_policy(name=policy["name"], type="data")
aoss_client.conn.delete_access_policy(name=policy["name"], type="data")

for policy_type in ["encryption", "network"]:
policies = aoss_client.list_security_policies(
policies = aoss_client.conn.list_security_policies(
type=policy_type, resource=[f"collection/{collection_name}"]
)["securityPolicySummaries"]
if not policies:
raise Exception("No security policies found?")
log.info("Found %s security policies for %s: %s", policy_type, collection_name, policies)
for policy in policies:
log.info("Deleting %s security policy for %s: %s", policy_type, collection_name, policy["name"])
aoss_client.delete_security_policy(name=policy["name"], type=policy_type)
aoss_client.conn.delete_security_policy(name=policy["name"], type=policy_type)


with DAG(
Expand All @@ -440,8 +442,8 @@ def delete_opensearch_policies(collection_name: str):
test_context = sys_test_context_task()
env_id = test_context["ENV_ID"]

aoss_client = OpenSearchServerlessHook(aws_conn_id=None).conn
bedrock_agent_client = BedrockAgentHook(aws_conn_id=None).conn
aoss_client = OpenSearchServerlessHook(aws_conn_id=None)
bedrock_agent_client = BedrockAgentHook(aws_conn_id=None)

region_name = boto3.session.Session().region_name

Expand Down
2 changes: 2 additions & 0 deletions providers/tests/system/amazon/aws/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.
from __future__ import annotations

import functools
import inspect
import json
import logging
Expand Down Expand Up @@ -92,6 +93,7 @@ def _validate_env_id(env_id: str) -> str:
return env_id.lower()


@functools.cache
def _fetch_from_ssm(key: str, test_name: str | None = None) -> str:
"""
Test values are stored in the SSM Value as a JSON-encoded dict of key/value pairs.
Expand Down

0 comments on commit 8d6f2c4

Please sign in to comment.