[Mellanox|FFB]: Add support for Mellanox fast-fast boot (#2294)

* [mlnx|ffb] Add support for mellanox fast-fast boot

Signed-off-by: Stepan Blyschak <stepanb@mellanox.com>

* [mlnx|ffb]: Add support of "config end" event for mlnx fast-fast boot

Signed-off-by: Volodymyr Samotiy <volodymyrs@mellanox.com>

* [Mellanox|FFB]: Fix review comments

* Change naming convention from "fast-fast" to "fastfast"

Signed-off-by: Volodymyr Samotiy <volodymyrs@mellanox.com>
This commit is contained in:
Volodymyr Samotiy 2018-12-04 20:11:24 +02:00 committed by lguohan
parent 1d655dbf2b
commit 75b41233d2
16 changed files with 394 additions and 14 deletions

View File

@ -7,18 +7,21 @@ function getMountPoint()
function getBootType()
{
local TYPE
case "$(cat /proc/cmdline)" in
*SONIC_BOOT_TYPE=fast*)
TYPE='fast'
;;
*SONIC_BOOT_TYPE=warm*)
local BOOT_TYPE
case "$(cat /proc/cmdline | grep -o 'SONIC_BOOT_TYPE=\S*' | cut -d'=' -f2)" in
warm*)
TYPE='warm'
;;
fastfast)
TYPE='fastfast'
;;
fast*)
TYPE='fast'
;;
*)
TYPE='cold'
esac
echo $TYPE
echo "${TYPE}"
}
function preStartAction()
@ -26,7 +29,7 @@ function preStartAction()
{%- if docker_container_name == "database" %}
WARM_DIR=/host/warmboot
if [[ "$BOOT_TYPE" == "warm" && -f $WARM_DIR/dump.rdb ]]; then
# Load redis content from /host/warm-reboot/dump.rdb
# Load redis content from /host/warmboot/dump.rdb
docker cp $WARM_DIR/dump.rdb database:/var/lib/redis/dump.rdb
else
# Create an emtpy file and overwrite any RDB if already there
@ -46,7 +49,6 @@ function postStartAction()
until [[ $(/usr/bin/docker exec database redis-cli -s $REDIS_SOCK ping | grep -c PONG) -gt 0 ]]; do
sleep 1;
done
if [[ "$BOOT_TYPE" == "warm" && -f $WARM_DIR/dump.rdb ]]; then
rm -f $WARM_DIR/dump.rdb
else
@ -59,7 +61,7 @@ function postStartAction()
fi
{%- elif docker_container_name == "swss" %}
docker exec swss rm -f /ready # remove cruft
if [[ "$BOOT_TYPE" == "fast" && -d /host/fast-reboot ]]; then
if [[ "$BOOT_TYPE" == "fast" || "$BOOT_TYPE" == "fastfast" ]] && [[ -d /host/fast-reboot ]]; then
test -e /host/fast-reboot/fdb.json && docker cp /host/fast-reboot/fdb.json swss:/
test -e /host/fast-reboot/arp.json && docker cp /host/fast-reboot/arp.json swss:/
test -e /host/fast-reboot/default_routes.json && docker cp /host/fast-reboot/default_routes.json swss:/

View File

@ -317,7 +317,9 @@ sudo cp {{src}} $FILESYSTEM_ROOT/{{dst}}
{% if sonic_asic_platform == "mellanox" %}
sudo mkdir -p $FILESYSTEM_ROOT/etc/mlnx/
sudo cp target/files/$MLNX_FW_FILE $FILESYSTEM_ROOT/etc/mlnx/fw-SPC.mfa
sudo cp target/files/$MLNX_FFB_SCRIPT $FILESYSTEM_ROOT/usr/bin/mlnx-ffb.sh
j2 platform/mellanox/mlnx-fw-upgrade.j2 | sudo tee $FILESYSTEM_ROOT/usr/bin/mlnx-fw-upgrade.sh
j2 platform/mellanox/sdk-version.j2 | sudo tee $FILESYSTEM_ROOT/etc/mlnx/sdk-version
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/mlnx-fw-upgrade.sh
{% endif %}

View File

@ -90,7 +90,11 @@ start() {
# Don't flush DB during warm boot
if [[ x"$WARM_BOOT" != x"true" ]]; then
# Don't flush APP_DB during MLNX fastfast boot
BOOT_TYPE="$(cat /proc/cmdline | grep -o 'SONIC_BOOT_TYPE=\S*' | cut -d'=' -f2)"
if [[ x"$BOOT_TYPE" != x"fastfast" ]] && [[ ! -f /var/warmboot/issu_started ]]; then
/usr/bin/docker exec database redis-cli -n 0 FLUSHDB
fi
/usr/bin/docker exec database redis-cli -n 2 FLUSHDB
/usr/bin/docker exec database redis-cli -n 5 FLUSHDB
clean_up_tables 6 "'PORT_TABLE*', 'MGMT_PORT_TABLE*', 'VLAN_TABLE*', 'VLAN_MEMBER_TABLE*', 'INTERFACE_TABLE*', 'MIRROR_SESSION*'"

View File

@ -2,7 +2,7 @@
DOCKER_SYNCD_MLNX_RPC = docker-syncd-mlnx-rpc.gz
$(DOCKER_SYNCD_MLNX_RPC)_PATH = $(PLATFORM_PATH)/docker-syncd-mlnx-rpc
$(DOCKER_SYNCD_MLNX_RPC)_DEPENDS += $(SYNCD_RPC) $(LIBTHRIFT) $(MLNX_SFPD)
$(DOCKER_SYNCD_MLNX_RPC)_DEPENDS += $(SYNCD_RPC) $(LIBTHRIFT) $(MLNX_SFPD) $(MLNX_ISSU)
$(DOCKER_SYNCD_MLNX_RPC)_LOAD_DOCKERS += $(DOCKER_SYNCD_MLNX)
SONIC_DOCKER_IMAGES += $(DOCKER_SYNCD_MLNX_RPC)
ifeq ($(ENABLE_SYNCD_RPC),y)
@ -13,3 +13,4 @@ $(DOCKER_SYNCD_MLNX_RPC)_CONTAINER_NAME = syncd
$(DOCKER_SYNCD_MLNX_RPC)_RUN_OPT += --net=host --privileged -t
$(DOCKER_SYNCD_MLNX_RPC)_RUN_OPT += -v /host/machine.conf:/etc/machine.conf
$(DOCKER_SYNCD_MLNX_RPC)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
$(DOCKER_SYNCD_MLNX_RPC)_RUN_OPT += -v /host/warmboot:/var/warmboot

View File

@ -2,7 +2,7 @@
DOCKER_SYNCD_MLNX = docker-syncd-mlnx.gz
$(DOCKER_SYNCD_MLNX)_PATH = $(PLATFORM_PATH)/docker-syncd-mlnx
$(DOCKER_SYNCD_MLNX)_DEPENDS += $(SYNCD) $(PYTHON_SDK_API) $(MLNX_SFPD) $(CRIU)
$(DOCKER_SYNCD_MLNX)_DEPENDS += $(SYNCD) $(PYTHON_SDK_API) $(MLNX_SFPD) $(CRIU) $(MLNX_ISSU)
$(DOCKER_SYNCD_MLNX)_LOAD_DOCKERS += $(DOCKER_CONFIG_ENGINE)
SONIC_DOCKER_IMAGES += $(DOCKER_SYNCD_MLNX)
ifneq ($(ENABLE_SYNCD_RPC),y)
@ -13,4 +13,5 @@ $(DOCKER_SYNCD_MLNX)_CONTAINER_NAME = syncd
$(DOCKER_SYNCD_MLNX)_RUN_OPT += --net=host --privileged -t
$(DOCKER_SYNCD_MLNX)_RUN_OPT += -v /host/machine.conf:/etc/machine.conf
$(DOCKER_SYNCD_MLNX)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
$(DOCKER_SYNCD_MLNX)_RUN_OPT += -v /host/warmboot:/var/warmboot
$(DOCKER_SYNCD_MLNX)_RUN_OPT += --tmpfs /run/criu

View File

@ -7,3 +7,9 @@ supervisorctl start rsyslogd
supervisorctl start syncd
supervisorctl start mlnx-sfpd
BOOT_TYPE="$(cat /proc/cmdline | grep -o 'SONIC_BOOT_TYPE=\S*' | cut -d'=' -f2)"
if [[ x"$BOOT_TYPE" == x"fastfast" ]] && [[ -f /var/warmboot/issu_started ]]; then
rm -f /var/warmboot/issu_started
/usr/bin/ffb &>/dev/null &
fi

7
platform/mellanox/mlnx-ffb.mk Executable file
View File

@ -0,0 +1,7 @@
# mellanox fast fast boot script
MLNX_FFB_SCRIPT = mlnx-ffb.sh
$(MLNX_FFB_SCRIPT)_PATH = platform/mellanox/
SONIC_COPY_FILES += $(MLNX_FFB_SCRIPT)
export MLNX_FFB_SCRIPT

87
platform/mellanox/mlnx-ffb.sh Executable file
View File

@ -0,0 +1,87 @@
#!/bin/bash
FFB_SUCCESS=0
FFB_FAILURE=1
# Check if ISSU is enabled on this device
check_issu_enabled()
{
CHECK_RESULT="${FFB_FAILURE}"
ISSU_CHECK_CMD="show platform mlnx issu"
# Check whether show ISSU status outputs ENABLED
if [[ `$ISSU_CHECK_CMD` =~ "enabled" ]]; then
# ISSU enabled, return success
CHECK_RESULT="${FFB_SUCCESS}"
fi
return "${CHECK_RESULT}"
}
# Check if ISSU upgrade from current SDK to next image SDK is supported
check_sdk_upgrade()
{
CHECK_RESULT="${FFB_FAILURE}"
NEXT_SONIC_IMAGE="$(sonic_installer list | grep "Next: " | cut -f2 -d' ')"
CURRENT_SONIC_IMAGE="$(sonic_installer list | grep "Current: " | cut -f2 -d' ')"
FS_PATH="/host/image-${NEXT_SONIC_IMAGE#SONiC-OS-}/fs.squashfs"
FS_MOUNTPOINT="/tmp/image-${NEXT_SONIC_IMAGE#SONiC-OS-}-fs"
if [[ "${CURRENT_SONIC_IMAGE}" == "${NEXT_SONIC_IMAGE}" ]]; then
return "${FFB_SUCCESS}"
fi
while :; do
mkdir -p "${FS_MOUNTPOINT}"
mount -t squashfs "${FS_PATH}" "${FS_MOUNTPOINT}" || {
>&2 echo "Failed to mount next SONiC image"
break;
}
SDK_VERSION_FILE_PATH="${FS_MOUNTPOINT}/etc/mlnx/sdk-version"
[ -f "${SDK_VERSION_FILE_PATH}" ] && {
NEXT_SDK_VERSION="$(cat ${FS_MOUNTPOINT}/etc/mlnx/sdk-version)"
} || {
>&2 echo "No SDK version file ${SDK_VERSION_FILE_PATH}"
break;
}
ISSU_CHECK_CMD="docker exec -t syncd issu --check ${NEXT_SDK_VERSION}"
${ISS_CHECK_CMD} > /dev/null && CHECK_RESULT="${FFB_SUCCESS}"
break
done
umount -rf "${FS_MOUNTPOINT}" 2> /dev/null || true
rm -rf "${FS_MOUNTPOINT}" 2> /dev/null || true
return "${CHECK_RESULT}"
}
# Perform ISSU start
issu_start()
{
ISSU_START_CMD="docker exec -t syncd issu --start"
${ISSU_START_CMD} > /dev/null
EXIT_CODE=$?
touch /host/warmboot/issu_started
return $EXIT_CODE
}
# Perform ISSU end
issu_end()
{
ISSU_END_CMD="docker exec -t syncd issu --end"
${ISSU_END_CMD} > /dev/null
EXIT_CODE=$?
return $EXIT_CODE
}

5
platform/mellanox/mlnx-issu.mk Executable file
View File

@ -0,0 +1,5 @@
# issu (SONiC MLNX platform ISSU tool) Debian package
MLNX_ISSU = python-mlnx-issu_1.0-1_all.deb
$(MLNX_ISSU)_SRC_PATH = $(PLATFORM_PATH)/mlnx-issu
SONIC_PYTHON_STDEB_DEBS += $(MLNX_ISSU)

View File

@ -0,0 +1,69 @@
#!/usr/bin/env python
"""
Part of Mellanox platform specific fastfast boot implementation for warm-boot.
Notifies SYNCD proccess once boot is finished after warm-reboot.
Once SYNCD received such notification it should set appropriate SAI attribute.
Then SAI will notify SDK to end ISSU mode for the FFB.
"""
import time
import swsssdk
from threading import Timer
class FFB(object):
"""Provides implementation for Mellanox fastfast boot"""
DB_WARM_TABLE_KEY = 'WARM_RESTART_TABLE|bgp'
DB_STATE_ENTRY_NAME = 'state'
DB_STATE_TYPE_RECONCILED = 'reconciled'
DB_CHANNEL_NAME = 'MLNX_FFB'
DB_CHANNEL_MSG = '["SET","ISSU_END"]' # message should be in the following format: ["<operation>","<data>"]
SUB_THREAD_TIMEOUT = 1
STOP_TIMER_TIMEOUT = 180
def __init__(self):
self.state_db = swsssdk.SonicV2Connector()
self.state_db.connect(self.state_db.STATE_DB)
self.prevState = self.state_db.get(self.state_db.STATE_DB, self.DB_WARM_TABLE_KEY, self.DB_STATE_ENTRY_NAME)
self.pubSub = self.state_db.redis_clients[self.state_db.STATE_DB].pubsub()
self.pubSub.psubscribe(**{'__key*@6__:{}'.format(self.DB_WARM_TABLE_KEY): self.eventHandler})
self.timeoutTimer = Timer(self.STOP_TIMER_TIMEOUT, self.finish)
def run(self):
# Start event thread in order to get required events
self.eventThread = self.pubSub.run_in_thread(sleep_time=self.SUB_THREAD_TIMEOUT)
# Start oneshot timer in order to exit in case required event is not received during defined timeout
self.timeoutTimer.start()
def finish(self):
# Stop event thread and timeout timer
self.eventThread.stop()
self.timeoutTimer.cancel()
# Publish "FFB END" event to SYNCD process
time.sleep(60) # W/A: Wait until configuration is applied to HW since it takes some time
self.state_db.publish(self.state_db.STATE_DB, self.DB_CHANNEL_NAME, self.DB_CHANNEL_MSG)
def eventHandler(self, msg):
# Only "set" operations are needed so just skip all others
if msg['data'] != 'hset':
return
state = self.state_db.get(self.state_db.STATE_DB, self.DB_WARM_TABLE_KEY, self.DB_STATE_ENTRY_NAME)
if (state != self.prevState) and (state == self.DB_STATE_TYPE_RECONCILED):
self.finish()
else:
self.prevState = state
def main():
FFB().run()
if __name__ == '__main__':
main()

View File

@ -0,0 +1,176 @@
#!/usr/bin/env python
'''
This code is for a mlnx platform specific tool, issu.
This tool provides an CLI interface to interact with SDK ISSU module
'''
from __future__ import print_function
import sys
import os
import re
import errno
import syslog
import argparse
from functools import wraps
from python_sdk_api import sx_api
# ========================== Constants ===============================
SDK_VERSION_PATTERN = r'(\d+)\.(\d+)\.(.*)'
SYSLOG_IDENTIFIER = "ISSU"
# Flag that indicates whether to print logs to stdout
verbose = False
# ========================== Syslog wrappers ==========================
def log_info(msg):
syslog.openlog(SYSLOG_IDENTIFIER)
syslog.syslog(syslog.LOG_INFO, msg)
syslog.closelog()
if verbose:
print(msg)
def log_warning(msg):
syslog.openlog(SYSLOG_IDENTIFIER)
syslog.syslog(syslog.LOG_WARNING, msg)
syslog.closelog()
if verbose:
print(msg)
def log_error(msg):
syslog.openlog(SYSLOG_IDENTIFIER)
syslog.syslog(syslog.LOG_ERR, msg)
syslog.closelog()
print(msg, file=sys.stderr)
# ========================== Global functions =========================
def with_sdk_handle(func):
""" A decorator for @func that use sx api
that gets a SDK handler, calls func(handler, *args, **kwargs)
and then closes the handler regardless of func failure"""
@wraps(func)
def wrapped(*args, **kwargs):
log_info("opening sdk")
rc, handle = sx_api.sx_api_open(None)
log_info("sx_api_open handle: 0x%x , rc %d " % ( handle, rc) )
if rc != sx_api.SX_STATUS_SUCCESS:
log_error("failed to open api handle. Please check that SDK is running")
sys.exit(errno.EACCES)
try:
res = func(handle, *args, **kwargs)
finally:
log_info("closing sdk handle")
rc = sx_api.sx_api_close(handle)
if rc != sx_api.SX_STATUS_SUCCESS:
log_error("failed to close api handle")
return res
return wrapped
def check_sdk_version_pattern(sdk_version):
"""Checker for @sdk_version"""
if not re.match(SDK_VERSION_PATTERN, sdk_version):
raise argparse.ArgumentTypeError("{} is an invalid SDK version string".format(sdk_version))
return sdk_version
@with_sdk_handle
def check_issu_upgrade_to_sdk_version(handle, new_sdk):
"""This function checks whether ISSU upgrade to @new_sdk version is posible"""
version = sx_api.new_sx_api_sx_sdk_versions_t_p()
rc = sx_api.sx_api_sx_sdk_version_get(handle, version)
if rc != sx_api.SX_STATUS_SUCCESS:
log_error("failed to get current SDK version")
sys.exit(errno.EACCES)
current_sdk = version.sx_sdk
succeed = True
log_info('check ISSU upgrade: current SDK: {}, new SDK: {}, check succeed: {}'.format(current_sdk, new_sdk, succeed))
return succeed
@with_sdk_handle
def issu_start(handle):
"""This function calls ISSU start API"""
log_info("call ISSU start")
rc = sx_api.sx_api_issu_start_set(handle)
if rc != sx_api.SX_STATUS_SUCCESS:
log_error("failed to execute ISSU start API")
sys.exit(errno.EACCES)
@with_sdk_handle
def issu_end(handle):
"""This function calls ISSU end API"""
log_info("call ISSU end")
rc = sx_api.sx_api_issu_end_set(handle)
if rc != sx_api.SX_STATUS_SUCCESS:
log_error("failed to execute ISSU end API")
sys.exit(errno.EACCES)
def get_parser():
"""This function creates an argument parser"""
parser = argparse.ArgumentParser()
parser.add_argument('-c', '--check', nargs=1, action='store', default=None, type=check_sdk_version_pattern,
help='Check if ISSU upgrade is supported to new SDK version')
parser.add_argument('-s', '--start', action='store_true', help='Call ISSU start API')
parser.add_argument('-e', '--end', action='store_true', help='Call ISSU end API')
parser.add_argument('-v', '--verbose', action='store_true', default=False)
return parser
def main():
global verbose
parser = get_parser()
args = parser.parse_args()
verbose = args.verbose
if args.check is not None:
new_sdk = args.check[0]
is_supported = check_issu_upgrade_to_sdk_version(new_sdk)
if verbose:
print('SDK upgrade is{}supported'.format(' ' if is_supported else ' not '))
if not is_supported:
sys.exit(1)
elif args.start:
issu_start()
elif args.end:
issu_end()
else:
parser.parse_args(['-h'])
if __name__ == '__main__':
main()

View File

@ -0,0 +1,15 @@
from setuptools import setup
setup(
name='mlnx-issu',
version='1.0',
description='MLNX ISSU tool for SONiC on mellanox platform',
author='SONiC Community',
url='https://github.com/Azure/sonic-buildimage/',
maintainer='Stepan Blyschak',
maintainer_email='stepanb@mellanox.com',
scripts=[
'scripts/issu',
'scripts/ffb',
]
)

View File

@ -5,5 +5,5 @@ $(SONIC_ONE_IMAGE)_MACHINE = mellanox
$(SONIC_ONE_IMAGE)_IMAGE_TYPE = onie
$(SONIC_ONE_IMAGE)_INSTALLS += $(SX_KERNEL) $(KERNEL_MFT) $(MFT_OEM) $(MFT) $(MLNX_HW_MANAGEMENT)
$(SONIC_ONE_IMAGE)_DOCKERS += $(SONIC_INSTALL_DOCKER_IMAGES)
$(SONIC_ONE_IMAGE)_FILES += $(MLNX_FW_FILE)
$(SONIC_ONE_IMAGE)_FILES += $(MLNX_FW_FILE) $(MLNX_FFB_SCRIPT)
SONIC_INSTALLERS += $(SONIC_ONE_IMAGE)

View File

@ -11,6 +11,8 @@ include $(PLATFORM_PATH)/one-image.mk
include $(PLATFORM_PATH)/libsaithrift-dev.mk
include $(PLATFORM_PATH)/docker-ptf-mlnx.mk
include $(PLATFORM_PATH)/mlnx-sfpd.mk
include $(PLATFORM_PATH)/mlnx-ffb.mk
include $(PLATFORM_PATH)/mlnx-issu.mk
SONIC_ALL += $(SONIC_ONE_IMAGE) \
$(DOCKER_FPM)

View File

@ -0,0 +1 @@
{{ MLNX_SDK_VERSION }}

View File

@ -55,3 +55,5 @@ $(eval $(foreach deb,$(MLNX_SDK_RDEBS),$(call make_url,$(deb))))
$(eval $(foreach deb,$(PYTHON_SDK_API) $(SX_KERNEL) $(SX_KERNEL_DEV),$(call make_url,$(deb))))
SONIC_ONLINE_DEBS += $(MLNX_SDK_RDEBS) $(PYTHON_SDK_API) $(SX_KERNEL)
export MLNX_SDK_VERSION