@@ -495,62 +495,62 @@ def _ingest_events(
495
495
def _generate_operational_workunits (
496
496
self , usage_state : BigQueryUsageState , table_refs : Collection [str ]
497
497
) -> Iterable [MetadataWorkUnit ]:
498
- self .report .set_ingestion_stage ( "*" , USAGE_EXTRACTION_OPERATIONAL_STATS )
499
- for audit_event in usage_state .standalone_events ():
500
- try :
501
- operational_wu = self ._create_operation_workunit (
502
- audit_event , table_refs
503
- )
504
- if operational_wu :
505
- yield operational_wu
506
- self .report .num_operational_stats_workunits_emitted += 1
507
- except Exception as e :
508
- self .report .warning (
509
- message = "Unable to generate operation workunit" ,
510
- context = f"{ audit_event } " ,
511
- exc = e ,
512
- )
498
+ with self .report .new_stage ( f"*: { USAGE_EXTRACTION_OPERATIONAL_STATS } " ):
499
+ for audit_event in usage_state .standalone_events ():
500
+ try :
501
+ operational_wu = self ._create_operation_workunit (
502
+ audit_event , table_refs
503
+ )
504
+ if operational_wu :
505
+ yield operational_wu
506
+ self .report .num_operational_stats_workunits_emitted += 1
507
+ except Exception as e :
508
+ self .report .warning (
509
+ message = "Unable to generate operation workunit" ,
510
+ context = f"{ audit_event } " ,
511
+ exc = e ,
512
+ )
513
513
514
514
def _generate_usage_workunits (
515
515
self , usage_state : BigQueryUsageState
516
516
) -> Iterable [MetadataWorkUnit ]:
517
- self .report .set_ingestion_stage ("*" , USAGE_EXTRACTION_USAGE_AGGREGATION )
518
- top_n = (
519
- self .config .usage .top_n_queries
520
- if self .config .usage .include_top_n_queries
521
- else 0
522
- )
523
- for entry in usage_state .usage_statistics (top_n = top_n ):
524
- try :
525
- query_freq = [
526
- (
527
- self .uuid_to_query .get (
528
- query_hash , usage_state .queries [query_hash ]
529
- ),
530
- count ,
517
+ with self .report .new_stage (f"*: { USAGE_EXTRACTION_USAGE_AGGREGATION } " ):
518
+ top_n = (
519
+ self .config .usage .top_n_queries
520
+ if self .config .usage .include_top_n_queries
521
+ else 0
522
+ )
523
+ for entry in usage_state .usage_statistics (top_n = top_n ):
524
+ try :
525
+ query_freq = [
526
+ (
527
+ self .uuid_to_query .get (
528
+ query_hash , usage_state .queries [query_hash ]
529
+ ),
530
+ count ,
531
+ )
532
+ for query_hash , count in entry .query_freq
533
+ ]
534
+ yield make_usage_workunit (
535
+ bucket_start_time = datetime .fromisoformat (entry .timestamp ),
536
+ resource = BigQueryTableRef .from_string_name (entry .resource ),
537
+ query_count = entry .query_count ,
538
+ query_freq = query_freq ,
539
+ user_freq = entry .user_freq ,
540
+ column_freq = entry .column_freq ,
541
+ bucket_duration = self .config .bucket_duration ,
542
+ resource_urn_builder = self .identifiers .gen_dataset_urn_from_raw_ref ,
543
+ top_n_queries = self .config .usage .top_n_queries ,
544
+ format_sql_queries = self .config .usage .format_sql_queries ,
545
+ queries_character_limit = self .config .usage .queries_character_limit ,
546
+ )
547
+ self .report .num_usage_workunits_emitted += 1
548
+ except Exception as e :
549
+ self .report .warning (
550
+ message = "Unable to generate usage statistics workunit" ,
551
+ context = f"{ entry .timestamp } , { entry .resource } " ,
552
+ exc = e ,
531
553
)
532
- for query_hash , count in entry .query_freq
533
- ]
534
- yield make_usage_workunit (
535
- bucket_start_time = datetime .fromisoformat (entry .timestamp ),
536
- resource = BigQueryTableRef .from_string_name (entry .resource ),
537
- query_count = entry .query_count ,
538
- query_freq = query_freq ,
539
- user_freq = entry .user_freq ,
540
- column_freq = entry .column_freq ,
541
- bucket_duration = self .config .bucket_duration ,
542
- resource_urn_builder = self .identifiers .gen_dataset_urn_from_raw_ref ,
543
- top_n_queries = self .config .usage .top_n_queries ,
544
- format_sql_queries = self .config .usage .format_sql_queries ,
545
- queries_character_limit = self .config .usage .queries_character_limit ,
546
- )
547
- self .report .num_usage_workunits_emitted += 1
548
- except Exception as e :
549
- self .report .warning (
550
- message = "Unable to generate usage statistics workunit" ,
551
- context = f"{ entry .timestamp } , { entry .resource } " ,
552
- exc = e ,
553
- )
554
554
555
555
def _get_usage_events (self , projects : Iterable [str ]) -> Iterable [AuditEvent ]:
556
556
if self .config .use_exported_bigquery_audit_metadata :
@@ -559,10 +559,10 @@ def _get_usage_events(self, projects: Iterable[str]) -> Iterable[AuditEvent]:
559
559
for project_id in projects :
560
560
with PerfTimer () as timer :
561
561
try :
562
- self .report .set_ingestion_stage (
563
- project_id , USAGE_EXTRACTION_INGESTION
564
- )
565
- yield from self ._get_parsed_bigquery_log_events (project_id )
562
+ with self .report .new_stage (
563
+ f" { project_id } : { USAGE_EXTRACTION_INGESTION } "
564
+ ):
565
+ yield from self ._get_parsed_bigquery_log_events (project_id )
566
566
except Exception as e :
567
567
self .report .usage_failed_extraction .append (project_id )
568
568
self .report .warning (
@@ -572,8 +572,8 @@ def _get_usage_events(self, projects: Iterable[str]) -> Iterable[AuditEvent]:
572
572
)
573
573
self .report_status (f"usage-extraction-{ project_id } " , False )
574
574
575
- self .report .usage_extraction_sec [project_id ] = round (
576
- timer . elapsed_seconds (), 2
575
+ self .report .usage_extraction_sec [project_id ] = timer . elapsed_seconds (
576
+ digits = 2
577
577
)
578
578
579
579
def _store_usage_event (
0 commit comments