Skip to content

Commit cf73ae4

Browse files
committed
refactor(looker): optimize view-to-explore mapping and enhance logging
1 parent 9092320 commit cf73ae4

File tree

2 files changed

+27
-13
lines changed

2 files changed

+27
-13
lines changed

metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -794,7 +794,6 @@ def get_internal_workunits(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
794794
if self.source_config.emit_reachable_views_only:
795795
explore_reachable_views.add(view_name.include)
796796

797-
# Build view-to-explores mapping efficiently
798797
view_to_explores[view_name.include].add(explore.name)
799798
explore_to_views[explore.name].add(view_name.include)
800799
except Exception as e:
@@ -810,6 +809,16 @@ def get_internal_workunits(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
810809
model.connection, set()
811810
)
812811

812+
view_to_explore_map = {}
813+
if view_to_explores and explore_to_views:
814+
view_to_explore_map = self._optimize_views_by_common_explore(
815+
view_to_explores, explore_to_views
816+
)
817+
else:
818+
logger.warning(
819+
f"Either view_to_explores: {view_to_explores} or explore_to_views: {explore_to_views} is empty"
820+
)
821+
813822
project_name = self.get_project_name(model_name)
814823

815824
looker_view_id_cache: LookerViewIdCache = LookerViewIdCache(
@@ -894,9 +903,7 @@ def get_internal_workunits(self) -> Iterable[Union[MetadataWorkUnit, Entity]]:
894903
config=self.source_config,
895904
ctx=self.ctx,
896905
looker_client=self.looker_client,
897-
view_to_explore_map=self._optimize_views_by_common_explore(
898-
view_to_explores, explore_to_views
899-
),
906+
view_to_explore_map=view_to_explore_map,
900907
)
901908
except Exception as e:
902909
self.reporter.report_warning(
@@ -1074,10 +1081,7 @@ def _optimize_views_by_common_explore(
10741081
view_to_explore: Dict[str, str] = {}
10751082

10761083
# For each view, find the explore with maximum size that contains it
1077-
for view_name in view_to_explores:
1078-
# Get all explores that contain this view from pre-built mapping
1079-
candidate_explores = view_to_explores[view_name]
1080-
1084+
for view_name, candidate_explores in view_to_explores.items():
10811085
if candidate_explores:
10821086
# Find explore with maximum size using max() with key function
10831087
# This assings the view to the explore with the most views that contains it
@@ -1102,6 +1106,7 @@ def _optimize_views_by_common_explore(
11021106
f"View-explore optimization: No explores to optimize for {total_views} views"
11031107
)
11041108

1109+
logger.debug(f"Final View-to-explore mapping: {view_to_explore}")
11051110
return view_to_explore
11061111

11071112
def get_report(self):

metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,9 @@ def _get_fields_from_looker_api(self, explore_name: str) -> List[str]:
611611
)
612612

613613
if explore and explore.fields:
614+
logger.debug(
615+
f"Looker API response for explore fields: {explore.fields}"
616+
)
614617
# Creating a map to de-dup dimension group fields - adding all of them adds to the query length, we dont need all of them for CLL
615618
dimension_group_fields_mapping: Dict[str, str] = {}
616619
# Get dimensions from API
@@ -654,10 +657,13 @@ def _get_fields_from_looker_api(self, explore_name: str) -> List[str]:
654657
f"No fields found in explore '{explore_name}' from Looker API, falling back to view context"
655658
)
656659

657-
except Exception as e:
660+
except Exception:
658661
logger.warning(
659-
f"Failed to get explore details from Looker API for explore '{explore_name}': {e}. Falling back to view context."
662+
f"Failed to get explore details from Looker API for explore '{explore_name}'. Current view: {self.view_context.name()} and view_fields: {view_fields}. Falling back to view csontext.",
663+
exc_info=True,
660664
)
665+
# Resetting view_fields to trigger fallback to view context
666+
view_fields = []
661667

662668
return view_fields
663669

@@ -682,7 +688,7 @@ def _get_fields_from_view_context(self) -> List[str]:
682688

683689
for dim_group in self.view_context.dimension_groups():
684690
dim_group_type_str = dim_group.get(VIEW_FIELD_TYPE_ATTRIBUTE)
685-
691+
686692
logger.debug(
687693
f"Processing dimension group from view context: {dim_group.get(NAME, 'unknown')}, type: {dim_group_type_str}"
688694
)
@@ -710,8 +716,11 @@ def _get_fields_from_view_context(self) -> List[str]:
710716
self._get_duration_dim_group_field_name(dim_group)
711717
)
712718
)
713-
except Exception as e:
714-
logger.error(f"View-name: {self.view_context.name()}: {e}")
719+
except Exception:
720+
logger.error(
721+
f"Failed to process dimension group for View-name: {self.view_context.name()}",
722+
exc_info=True,
723+
)
715724
# Continue processing other fields instead of failing completely
716725
continue
717726

0 commit comments

Comments
 (0)