Skip to content
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions datahub-web-react/src/app/ingest/source/builder/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import sacLogo from '@images/saclogo.svg';
import sigmaLogo from '@images/sigmalogo.png';
import snaplogic from '@images/snaplogic.png';
import snowflakeLogo from '@images/snowflakelogo.png';
import streamlitLogo from '@images/streamlitlogo.png';
import supersetLogo from '@images/supersetlogo.png';
import tableauLogo from '@images/tableaulogo.png';
import trinoLogo from '@images/trinologo.png';
Expand Down Expand Up @@ -152,6 +153,8 @@ export const VERTEX_AI = 'vertexai';
export const VERTEXAI_URN = `urn:li:dataPlatform:${VERTEX_AI}`;
export const SNAPLOGIC = 'snaplogic';
export const SNAPLOGIC_URN = `urn:li:dataPlatform:${SNAPLOGIC}`;
export const STREAMLIT = 'streamlit';
export const STREAMLIT_URN = `urn:li:dataPlatform:${STREAMLIT}`;

export const PLATFORM_URN_TO_LOGO = {
[ATHENA_URN]: athenaLogo,
Expand Down Expand Up @@ -200,6 +203,7 @@ export const PLATFORM_URN_TO_LOGO = {
[NEO4J_URN]: neo4j,
[VERTEXAI_URN]: vertexAI,
[SNAPLOGIC_URN]: snaplogic,
[STREAMLIT_URN]: streamlitLogo,
};

export const SOURCE_TO_PLATFORM_URN = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import redshiftLogo from '@images/redshiftlogo.png';
import sacLogo from '@images/saclogo.svg';
import sigmaLogo from '@images/sigmalogo.png';
import snowflakeLogo from '@images/snowflakelogo.png';
import streamlitLogo from '@images/streamlitlogo.png';
import supersetLogo from '@images/supersetlogo.png';
import tableauLogo from '@images/tableaulogo.png';
import trinoLogo from '@images/trinologo.png';
Expand Down Expand Up @@ -108,6 +109,8 @@ export const SNOWFLAKE_BETA = 'snowflake-beta';
export const SNOWFLAKE_USAGE = 'snowflake-usage';
export const SNOWFLAKE_URN = `urn:li:dataPlatform:${SNOWFLAKE}`;
export const STARBURST_TRINO_USAGE = 'starburst-trino-usage';
export const STREAMLIT = 'streamlit';
export const STREAMLIT_URN = `urn:li:dataPlatform:${STREAMLIT}`;
export const SUPERSET = 'superset';
export const SUPERSET_URN = `urn:li:dataPlatform:${SUPERSET}`;
export const TABLEAU = 'tableau';
Expand Down Expand Up @@ -176,6 +179,7 @@ export const PLATFORM_URN_TO_LOGO = {
[PRESET_URN]: presetLogo,
[REDSHIFT_URN]: redshiftLogo,
[SNOWFLAKE_URN]: snowflakeLogo,
[STREAMLIT_URN]: streamlitLogo,
[TABLEAU_URN]: tableauLogo,
[TRINO_URN]: trinoLogo,
[SUPERSET_URN]: supersetLogo,
Expand Down
Binary file added datahub-web-react/src/images/streamlitlogo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 5 additions & 0 deletions metadata-ingestion/docs/sources/snowflake/snowflake_pre.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ grant role datahub_role to user datahub_user;

// Optional - required if extracting lineage, usage or tags (without lineage)
grant imported privileges on database snowflake to role datahub_role;

// Optional - required if extracting Streamlit Apps
grant usage on all streamlits in database "<your-database>" to role datahub_role;
grant usage on future streamlits in database "<your-database>" to role datahub_role;
```

The details of each granted privilege can be viewed in the [Snowflake docs](https://docs.snowflake.com/en/user-guide/security-access-control-privileges.html). A summary of each privilege and why it is required for this connector:
Expand All @@ -62,6 +66,7 @@ grant usage on schema "<your-database>"."<your-schema>" to role datahub_role;
```

- `select` on `streams` is required for stream definitions to be available. This does not allow selecting the data (not required) unless the underlying dataset has select access as well.
- `usage` on `streamlit` is required to show streamlits in a database.

```sql
grant usage on schema "<your-database>"."<your-schema>" to role datahub_role;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@
"s3",
"sagemaker",
"snowflake",
"streamlit",
]
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ class BIAssetSubTypes(StrEnum):
HEX_PROJECT = "Project"
HEX_COMPONENT = "Component"

# Streamlit
STREAMLIT = "Streamlit"


class MLAssetSubTypes(StrEnum):
MLFLOW_TRAINING_RUN = "ML Training Run"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,14 @@ class SnowflakeObjectDomain(StrEnum):
STREAM = "stream"
PROCEDURE = "procedure"
DYNAMIC_TABLE = "dynamic table"
STREAMLIT = "streamlit"


GENERIC_PERMISSION_ERROR_KEY = "permission-error"
LINEAGE_PERMISSION_ERROR = "lineage-permission-error"

STREAMLIT_PLATFORM = "streamlit"


# Snowflake connection arguments
# https://docs.snowflake.com/en/user-guide/python-connector-api.html#connect
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,14 @@ class SnowflakeFilterConfig(SQLFilterConfig):
" use the regex 'Customer.public.customer.*'",
)

streamlit_pattern: AllowDenyPattern = Field(
default=AllowDenyPattern.allow_all(),
description="Regex patterns for Streamlit app to filter in ingestion. "
"Specify regex to match the entire Streamlit app name in database.schema.streamlit format. "
"e.g. to match all Streamlit apps starting with dashboard in Analytics database and public schema,"
" use the regex 'Analytics.public.dashboard.*'",
)

match_fully_qualified_names: bool = Field(
default=False,
description="Whether `schema_pattern` is matched against fully qualified schema name `<catalog>.<schema>`.",
Expand Down Expand Up @@ -317,6 +325,11 @@ class SnowflakeV2Config(
description="If enabled, procedures will be ingested as pipelines/tasks.",
)

include_streamlits: bool = Field(
default=False,
description="If enabled, Streamlit apps will be ingested as dashboards.",
)

structured_property_pattern: AllowDenyPattern = Field(
default=AllowDenyPattern.allow_all(),
description=(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,10 @@ def procedures_for_database(db_name: str) -> str:
FROM {db_clause}information_schema.procedures
order by procedure_schema, procedure_name"""

@staticmethod
def streamlit_apps_for_database(db_name: str) -> str:
return f'SHOW STREAMLITS IN DATABASE "{db_name}"'

@staticmethod
def get_all_tags():
return """
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ class SnowflakeV2Report(
tags_scanned: int = 0
streams_scanned: int = 0
procedures_scanned: int = 0
streamlit_apps_scanned: int = 0

include_usage_stats: bool = False
include_operational_stats: bool = False
Expand Down Expand Up @@ -165,6 +166,8 @@ def report_entity_scanned(self, name: str, ent_type: str = "table") -> None:
self.streams_scanned += 1
elif ent_type == "procedure":
self.procedures_scanned += 1
elif ent_type == "streamlit":
self.streamlit_apps_scanned += 1
else:
raise KeyError(f"Unknown entity {ent_type}.")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,39 @@ def get_subtype(self) -> DatasetSubTypes:
return DatasetSubTypes.SNOWFLAKE_STREAM


@dataclass
class SnowflakeStreamlitApp:
"""
Represents a Snowflake Streamlit application.

Streamlit apps in Snowflake are interactive data applications built with Python
that are deployed and hosted within Snowflake. These apps are ingested as
dashboard entities in DataHub to provide visibility into data applications
alongside traditional dashboards and reports.

Attributes:
name: The Snowflake object name of the Streamlit app
created: Timestamp when the app was created
owner: The owner role of the app
database_name: The database containing the app
schema_name: The schema containing the app
title: Human-readable title of the app (displayed in Snowflake UI)
comment: Optional description/comment for the app
url_id: Unique identifier used in the Snowflake app URL
owner_role_type: Type of the owner (typically "ROLE")
"""

name: str
created: datetime
owner: str
database_name: str
schema_name: str
title: str
comment: Optional[str]
url_id: str
owner_role_type: str


class _SnowflakeTagCache:
def __init__(self) -> None:
# self._database_tags[<database_name>] = list of tags applied to database
Expand Down Expand Up @@ -946,6 +979,35 @@ def get_procedures_for_database(
)
return procedures

@serialized_lru_cache(maxsize=1)
def get_streamlit_apps_for_database(
self, db_name: str
) -> Dict[str, List[SnowflakeStreamlitApp]]:
streamlit_apps: Dict[str, List[SnowflakeStreamlitApp]] = {}
cur = self.connection.query(
SnowflakeQuery.streamlit_apps_for_database(db_name),
)

for app in cur:
schema_name = app["schema_name"]
if schema_name not in streamlit_apps:
streamlit_apps[schema_name] = []
streamlit_apps[schema_name].append(
SnowflakeStreamlitApp(
name=app["name"],
created=app["created_on"],
owner=app["owner"],
database_name=app["database_name"],
schema_name=app["schema_name"],
title=app["title"],
comment=app.get("comment"),
url_id=app["url_id"],
owner_role_type=app["owner_role_type"],
)
)

return streamlit_apps

@serialized_lru_cache(maxsize=1)
def get_dynamic_table_graph_info(self, db_name: str) -> Dict[str, Dict[str, Any]]:
"""Get dynamic table dependency information from information schema."""
Expand Down
Loading