Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix xcvrd to support 400G ZR optic #293

Merged
merged 9 commits into from
Oct 20, 2022
12 changes: 12 additions & 0 deletions sonic-xcvrd/tests/test_xcvrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,18 @@ def test_CmisManagerTask_task_worker(self, mock_chassis):
mock_xcvr_api.get_tx_config_power = MagicMock(return_value=0)
mock_xcvr_api.get_laser_config_freq = MagicMock(return_value=0)
mock_xcvr_api.get_module_type_abbreviation = MagicMock(return_value='QSFP-DD')
mock_xcvr_api.get_datapath_init_duration = MagicMock(return_value=60000.0)
mock_xcvr_api.get_datapath_deinit_duration = MagicMock(return_value=600000.0)
mock_xcvr_api.get_dpinit_pending = MagicMock(return_value={
'DPInitPending1': False,
'DPInitPending2': False,
'DPInitPending3': False,
'DPInitPending4': False,
'DPInitPending5': False,
'DPInitPending6': False,
'DPInitPending7': False,
'DPInitPending8': False
})
mock_xcvr_api.get_application_advertisement = MagicMock(return_value={
1: {
'host_electrical_interface_id': '400GAUI-8 C2M (Annex 120E)',
Expand Down
76 changes: 63 additions & 13 deletions sonic-xcvrd/xcvrd/xcvrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,22 +986,38 @@ def on_port_update_event(self, port_change_event):
return

if port_change_event.event_type == port_change_event.PORT_SET:
need_update = False
if pport >= 0:
self.port_dict[lport]['index'] = pport
if self.port_dict[lport].get('index') != pport:
self.port_dict[lport]['index'] = pport
need_update = True
if 'speed' in port_change_event.port_dict and port_change_event.port_dict['speed'] != 'N/A':
self.port_dict[lport]['speed'] = port_change_event.port_dict['speed']
if self.port_dict[lport].get('speed') != port_change_event.port_dict['speed']:
self.port_dict[lport]['speed'] = port_change_event.port_dict['speed']
need_update = True
if 'lanes' in port_change_event.port_dict:
self.port_dict[lport]['lanes'] = port_change_event.port_dict['lanes']
if self.port_dict[lport].get('lanes') != port_change_event.port_dict['lanes']:
self.port_dict[lport]['lanes'] = port_change_event.port_dict['lanes']
need_update = True
if 'host_tx_ready' in port_change_event.port_dict:
self.port_dict[lport]['host_tx_ready'] = port_change_event.port_dict['host_tx_ready']
if self.port_dict[lport].get('host_tx_ready') != port_change_event.port_dict['host_tx_ready']:
self.port_dict[lport]['host_tx_ready'] = port_change_event.port_dict['host_tx_ready']
need_update = True
if 'admin_status' in port_change_event.port_dict:
self.port_dict[lport]['admin_status'] = port_change_event.port_dict['admin_status']
if self.port_dict[lport].get('admin_status') != port_change_event.port_dict['admin_status']:
self.port_dict[lport]['admin_status'] = port_change_event.port_dict['admin_status']
need_update = True
if 'laser_freq' in port_change_event.port_dict:
self.port_dict[lport]['laser_freq'] = int(port_change_event.port_dict['laser_freq'])
if self.port_dict[lport].get('laser_freq') != int(port_change_event.port_dict['laser_freq']):
self.port_dict[lport]['laser_freq'] = int(port_change_event.port_dict['laser_freq'])
need_update = True
if 'tx_power' in port_change_event.port_dict:
self.port_dict[lport]['tx_power'] = float(port_change_event.port_dict['tx_power'])
if self.port_dict[lport].get('tx_power') != float(port_change_event.port_dict['tx_power']):
self.port_dict[lport]['tx_power'] = float(port_change_event.port_dict['tx_power'])
need_update = True

self.force_cmis_reinit(lport, 0)
if need_update:
self.force_cmis_reinit(lport, 0)
else:
self.port_dict[lport]['cmis_state'] = self.CMIS_STATE_REMOVED

Expand Down Expand Up @@ -1073,6 +1089,12 @@ def get_cmis_application_desired(self, api, channel, speed):

return (appl_code & 0xf)

def get_cmis_dp_init_duration(self, api):
return api.get_datapath_init_duration()/1000

def get_cmis_dp_deinit_duration(self, api):
return api.get_datapath_deinit_duration()/1000
abohanyang marked this conversation as resolved.
Show resolved Hide resolved

def is_cmis_application_update_required(self, api, channel, speed):
"""
Check if the CMIS application update is required
Expand Down Expand Up @@ -1176,6 +1198,32 @@ def check_config_error(self, api, channel, states):

return done

def check_datapath_init_pending(self, api, channel):
"""
Check if the CMIS datapath init is pending

Args:
api:
XcvrApi object
channel:
Integer, a bitmask of the lanes on the host side
e.g. 0x5 for lane 0 and lane 2.

Returns:
Boolean, true if any lanes are pending datapath init, otherwise false
"""
pending = False
dpinit_pending_dict = api.get_dpinit_pending()
for lane in range(self.CMIS_NUM_CHANNELS):
if ((1 << lane) & channel) == 0:
continue
key = "DPInitPending{}".format(lane + 1)
if dpinit_pending_dict[key]:
pending = True
break

return pending

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should check if DpInitPending is set for ALL host lanes and return TRUE. Return False if any of the lanes has DpInitPending=0

image

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On further thought and checking , please look further into this.
should this be interpreted as: as long as one of the lanes is found in DPInitPending state, outcome is pending (i.e. true). Caller of this function on getting 'true', would perform force_cmis_reinit() instead of setting state to CMIS_STATE_DP_INIT (& proceeding with normal workflow)

def check_datapath_state(self, api, channel, states):
"""
Check if the CMIS datapath states are in the specified state
Expand Down Expand Up @@ -1458,7 +1506,7 @@ def task_worker(self):
# TODO: Make sure this doesn't impact other datapaths
api.set_lpmode(False)
self.port_dict[lport]['cmis_state'] = self.CMIS_STATE_AP_CONF
self.port_dict[lport]['cmis_expired'] = now + datetime.timedelta(seconds=self.CMIS_DEF_EXPIRED)
self.port_dict[lport]['cmis_expired'] = now + datetime.timedelta(seconds=self.get_cmis_dp_deinit_duration(api))
elif state == self.CMIS_STATE_AP_CONF:
# TODO: Use fine grained time when the CMIS memory map is available
if not self.check_module_state(api, ['ModuleReady']):
Expand Down Expand Up @@ -1494,8 +1542,11 @@ def task_worker(self):
self.force_cmis_reinit(lport, retries + 1)
continue

# TODO: Use fine grained time when the CMIS memory map is available
self.port_dict[lport]['cmis_expired'] = now + datetime.timedelta(seconds=self.CMIS_DEF_EXPIRED)
if self.check_datapath_init_pending(api, host_lanes):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if any of the lanes has DPInitPending=0 i.e if this fn call return False then we should retry re-init

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have updated the code based on our discussion.

self.log_notice("{}: datapath init pending".format(lport))
self.force_cmis_reinit(lport, retries + 1)
continue

self.port_dict[lport]['cmis_state'] = self.CMIS_STATE_DP_INIT
elif state == self.CMIS_STATE_DP_INIT:
if not self.check_config_error(api, host_lanes, ['ConfigSuccess']):
Expand All @@ -1515,8 +1566,7 @@ def task_worker(self):

# D.1.3 Software Configuration and Initialization
api.set_datapath_init(host_lanes)
# TODO: Use fine grained timeout when the CMIS memory map is available
self.port_dict[lport]['cmis_expired'] = now + datetime.timedelta(seconds=self.CMIS_DEF_EXPIRED)
self.port_dict[lport]['cmis_expired'] = now + datetime.timedelta(seconds=self.get_cmis_dp_init_duration(api))
self.port_dict[lport]['cmis_state'] = self.CMIS_STATE_DP_TXON
elif state == self.CMIS_STATE_DP_TXON:
if not self.check_datapath_state(api, host_lanes, ['DataPathInitialized']):
Expand Down