@@ -34,6 +34,7 @@ def __init__(self, namespaces, port_mapping, sfp_obj_dict, main_thread_stop_even
3434 threading .Thread .__init__ (self )
3535 self .name = "DomInfoUpdateTask"
3636 self .exc = None
37+ self .dom_th_info_cache = {}
3738 self .task_stopping_event = threading .Event ()
3839 self .main_thread_stop_event = main_thread_stop_event
3940 self .helper_logger = helper_logger
@@ -265,21 +266,25 @@ def task_worker(self):
265266 self .log_notice ("Start DOM monitoring loop" )
266267 firmware_info_cache = {}
267268 dom_info_cache = {}
269+ dom_th_info_cache = {}
268270 transceiver_status_cache = {}
269271 vdm_real_value_cache = {}
270272 vdm_flag_cache = {}
271273 pm_info_cache = {}
274+ temperature_status = {}
272275 sel , asic_context = port_event_helper .subscribe_port_config_change (self .namespaces )
273276
274277 # Start loop to update dom info in DB periodically
275278 while not self .task_stopping_event .wait (self .DOM_INFO_UPDATE_PERIOD_SECS ):
276279 # Clear the cache at the begin of the loop to make sure it will be clear each time
277280 firmware_info_cache .clear ()
278281 dom_info_cache .clear ()
282+ dom_th_info_cache .clear ()
279283 transceiver_status_cache .clear ()
280284 vdm_real_value_cache .clear ()
281285 vdm_flag_cache .clear ()
282286 pm_info_cache .clear ()
287+ temperature_status .clear ()
283288
284289 # Handle port change event from main thread
285290 port_event_helper .handle_port_config_change (sel , asic_context , self .task_stopping_event , self .port_mapping , self .helper_logger , self .on_port_config_change )
@@ -310,6 +315,7 @@ def task_worker(self):
310315
311316 try :
312317 self .post_port_sfp_firmware_info_to_db (logical_port_name , self .port_mapping , self .xcvr_table_helper .get_firmware_info_tbl (asic_index ), self .task_stopping_event , firmware_info_cache = firmware_info_cache )
318+ xcvrd .post_port_dom_threshold_info_to_db (logical_port_name , self .port_mapping , self .xcvr_table_helper .get_dom_threshold_tbl (asic_index ), self .task_stopping_event , dom_th_info_cache = self .dom_th_info_cache )
313319 except (KeyError , TypeError ) as e :
314320 #continue to process next port since execption could be raised due to port reset, transceiver removal
315321 self .log_warning ("Got exception {} while processing firmware info for port {}, ignored" .format (repr (e ), logical_port_name ))
@@ -326,6 +332,12 @@ def task_worker(self):
326332 self .xcvr_table_helper .get_status_tbl (asic_index ),
327333 self .task_stopping_event ,
328334 transceiver_status_cache = transceiver_status_cache )
335+
336+ self .update_transceiver_temperature_status (logical_port_name ,
337+ self .port_mapping ,
338+ dom_info_cache ,
339+ self .dom_th_info_cache ,
340+ temperature_status )
329341 except (KeyError , TypeError ) as e :
330342 #continue to process next port since execption could be raised due to port reset, transceiver removal
331343 self .log_warning ("Got exception {} while processing transceiver status hw for port {}, ignored" .format (repr (e ), logical_port_name ))
@@ -360,6 +372,66 @@ def task_worker(self):
360372
361373 self .log_notice ("Stop DOM monitoring loop" )
362374
375+ def update_transceiver_temperature_status (self , logical_port_name ,port_mapping ,dom_info_cache , dom_th_info_cache , temperature_status ):
376+ TEMP_NORMAL = 0
377+ TEMP_HIGH_ALARM = 1
378+ TEMP_LOW_ALARM = 2
379+ TEMP_HIGH_WARNING = 3
380+ TEMP_LOW_WARNING = 4
381+
382+ TEMP_ERROR_TO_DESCRIPTION_DICT = {
383+ TEMP_NORMAL : "normal" ,
384+ TEMP_HIGH_ALARM : "temperature high alarm" ,
385+ TEMP_LOW_ALARM : "temperature low alarm" ,
386+ TEMP_HIGH_WARNING : "temperature high warning" ,
387+ TEMP_LOW_WARNING : "temperature low warning"
388+ }
389+
390+ for physical_port , physical_port_name in xcvrd .get_physical_port_name_dict (logical_port_name , port_mapping ).items ():
391+ if self .task_stopping_event .is_set ():
392+ break
393+
394+ if not xcvrd ._wrapper_get_presence (physical_port ):
395+ continue
396+
397+ ori_temp_status = temperature_status .get (physical_port )
398+ if ori_temp_status is None :
399+ ori_temp_status = TEMP_NORMAL
400+ temperature_status [physical_port ] = ori_temp_status
401+ new_temp_status = TEMP_NORMAL
402+
403+ dom_info_dict = dom_info_cache .get (physical_port )
404+ dom_th_info_dict = dom_th_info_cache .get (physical_port )
405+ if dom_info_dict is not None and dom_th_info_dict is not None :
406+ temperature = dom_info_dict .get ("temperature" )
407+ temphighalarm = dom_th_info_dict .get ("temphighalarm" )
408+ templowalarm = dom_th_info_dict .get ("templowalarm" )
409+ temphighwarning = dom_th_info_dict .get ("temphighwarning" )
410+ templowwarning = dom_th_info_dict .get ("templowwarning" )
411+ if temperature != 'N/A' and temphighalarm != 'N/A' and templowalarm != 'N/A' and \
412+ temphighwarning != 'N/A' and templowwarning != 'N/A' :
413+ if float (temperature ) > float (temphighalarm ):
414+ new_temp_status = TEMP_HIGH_ALARM
415+ elif float (temperature ) > float (temphighwarning ):
416+ new_temp_status = TEMP_HIGH_WARNING
417+ elif float (temperature ) < float (templowalarm ):
418+ new_temp_status = TEMP_LOW_ALARM
419+ elif float (temperature ) < float (templowwarning ):
420+ new_temp_status = TEMP_LOW_WARNING
421+ else :
422+ new_temp_status = TEMP_NORMAL
423+
424+ # Add syslog for temperature
425+ if ori_temp_status != new_temp_status :
426+ temperature_status [physical_port ] = new_temp_status
427+ helper_logger .log_notice ("{}: temperature status change from {} to {}" .format (
428+ physical_port_name ,
429+ TEMP_ERROR_TO_DESCRIPTION_DICT [ori_temp_status ],
430+ TEMP_ERROR_TO_DESCRIPTION_DICT [new_temp_status ]))
431+ elif new_temp_status > 0 :
432+ helper_logger .log_notice ("{}: {}" .format (physical_port_name , TEMP_ERROR_TO_DESCRIPTION_DICT [new_temp_status ]))
433+
434+
363435 def run (self ):
364436 if self .task_stopping_event .is_set ():
365437 return
0 commit comments