[mellanox|ffb] use system level warm reboot for Mellanox fastfast boot (#2374)

* [mellanox|ffb] use system level warm reboot for Mellanox fastfast boot

Signed-off-by: Stepan Blyschak <stepanb@mellanox.com>

* [mellanox|ffb] add comments for mellanox start/stop drivers section

Signed-off-by: Stepan Blyschak <stepanb@mellanox.com>
This commit is contained in:
stepanblyschak 2019-01-11 00:09:04 +02:00 committed by Ying Xie
parent ae190725ff
commit ff526dd103
7 changed files with 37 additions and 98 deletions

View File

@ -28,7 +28,7 @@ function preStartAction()
{ {
{%- if docker_container_name == "database" %} {%- if docker_container_name == "database" %}
WARM_DIR=/host/warmboot WARM_DIR=/host/warmboot
if [[ "$BOOT_TYPE" == "warm" && -f $WARM_DIR/dump.rdb ]]; then if [[ ("$BOOT_TYPE" == "warm" || "$BOOT_TYPE" == "fastfast") && -f $WARM_DIR/dump.rdb ]]; then
# Load redis content from /host/warmboot/dump.rdb # Load redis content from /host/warmboot/dump.rdb
docker cp $WARM_DIR/dump.rdb database:/var/lib/redis/dump.rdb docker cp $WARM_DIR/dump.rdb database:/var/lib/redis/dump.rdb
else else
@ -49,7 +49,7 @@ function postStartAction()
until [[ $(/usr/bin/docker exec database redis-cli -s $REDIS_SOCK ping | grep -c PONG) -gt 0 ]]; do until [[ $(/usr/bin/docker exec database redis-cli -s $REDIS_SOCK ping | grep -c PONG) -gt 0 ]]; do
sleep 1; sleep 1;
done done
if [[ "$BOOT_TYPE" == "warm" && -f $WARM_DIR/dump.rdb ]]; then if [[ ("$BOOT_TYPE" == "warm" || "$BOOT_TYPE" == "fastfast") && -f $WARM_DIR/dump.rdb ]]; then
rm -f $WARM_DIR/dump.rdb rm -f $WARM_DIR/dump.rdb
else else
# If there is a config db dump file, load it # If there is a config db dump file, load it
@ -61,7 +61,7 @@ function postStartAction()
fi fi
{%- elif docker_container_name == "swss" %} {%- elif docker_container_name == "swss" %}
docker exec swss rm -f /ready # remove cruft docker exec swss rm -f /ready # remove cruft
if [[ "$BOOT_TYPE" == "fast" || "$BOOT_TYPE" == "fastfast" ]] && [[ -d /host/fast-reboot ]]; then if [[ "$BOOT_TYPE" == "fast" ]] && [[ -d /host/fast-reboot ]]; then
test -e /host/fast-reboot/fdb.json && docker cp /host/fast-reboot/fdb.json swss:/ test -e /host/fast-reboot/fdb.json && docker cp /host/fast-reboot/fdb.json swss:/
test -e /host/fast-reboot/arp.json && docker cp /host/fast-reboot/arp.json swss:/ test -e /host/fast-reboot/arp.json && docker cp /host/fast-reboot/arp.json swss:/
test -e /host/fast-reboot/default_routes.json && docker cp /host/fast-reboot/default_routes.json swss:/ test -e /host/fast-reboot/default_routes.json && docker cp /host/fast-reboot/default_routes.json swss:/

View File

@ -90,11 +90,7 @@ start() {
# Don't flush DB during warm boot # Don't flush DB during warm boot
if [[ x"$WARM_BOOT" != x"true" ]]; then if [[ x"$WARM_BOOT" != x"true" ]]; then
# Don't flush APP_DB during MLNX fastfast boot /usr/bin/docker exec database redis-cli -n 0 FLUSHDB
BOOT_TYPE="$(cat /proc/cmdline | grep -o 'SONIC_BOOT_TYPE=\S*' | cut -d'=' -f2)"
if [[ x"$BOOT_TYPE" != x"fastfast" ]] && [[ ! -f /var/warmboot/issu_started ]]; then
/usr/bin/docker exec database redis-cli -n 0 FLUSHDB
fi
/usr/bin/docker exec database redis-cli -n 2 FLUSHDB /usr/bin/docker exec database redis-cli -n 2 FLUSHDB
/usr/bin/docker exec database redis-cli -n 5 FLUSHDB /usr/bin/docker exec database redis-cli -n 5 FLUSHDB
clean_up_tables 6 "'PORT_TABLE*', 'MGMT_PORT_TABLE*', 'VLAN_TABLE*', 'VLAN_MEMBER_TABLE*', 'INTERFACE_TABLE*', 'MIRROR_SESSION*'" clean_up_tables 6 "'PORT_TABLE*', 'MGMT_PORT_TABLE*', 'VLAN_TABLE*', 'VLAN_MEMBER_TABLE*', 'INTERFACE_TABLE*', 'MIRROR_SESSION*'"

View File

@ -90,14 +90,12 @@ start() {
# Flush DB during non-warm start # Flush DB during non-warm start
/usr/bin/docker exec database redis-cli -n 1 FLUSHDB /usr/bin/docker exec database redis-cli -n 1 FLUSHDB
# platform specific tasks
if [ x$sonic_asic_platform == x'cavium' ]; then
/etc/init.d/xpnet.sh start
fi
fi fi
# platform specific tasks # platform specific tasks
# start mellanox drivers regardless of
# boot type
if [ x"$sonic_asic_platform" == x"mellanox" ]; then if [ x"$sonic_asic_platform" == x"mellanox" ]; then
BOOT_TYPE=`getBootType` BOOT_TYPE=`getBootType`
if [[ x"$WARM_BOOT" == x"true" || x"$BOOT_TYPE" == x"fast" ]]; then if [[ x"$WARM_BOOT" == x"true" || x"$BOOT_TYPE" == x"fast" ]]; then
@ -109,6 +107,13 @@ start() {
/sbin/modprobe i2c-dev /sbin/modprobe i2c-dev
fi fi
if [[ x"$WARM_BOOT" != x"true" ]]; then
if [ x$sonic_asic_platform == x'cavium' ]; then
/etc/init.d/xpnet.sh start
fi
fi
# start service docker # start service docker
/usr/bin/${SERVICE}.sh start /usr/bin/${SERVICE}.sh start
debug "Started ${SERVICE} service..." debug "Started ${SERVICE} service..."
@ -146,21 +151,23 @@ stop() {
/usr/bin/${SERVICE}.sh stop /usr/bin/${SERVICE}.sh stop
debug "Stopped ${SERVICE} service..." debug "Stopped ${SERVICE} service..."
# if warm start enabled, don't stop peer service docker # platform specific tasks
# stop mellanox driver regardless of
# shutdown type
if [ x$sonic_asic_platform == x'mellanox' ]; then
/etc/init.d/sxdkernel stop
/usr/bin/mst stop
fi
if [[ x"$WARM_BOOT" != x"true" ]]; then if [[ x"$WARM_BOOT" != x"true" ]]; then
# platform specific tasks
if [ x$sonic_asic_platform == x'cavium' ]; then if [ x$sonic_asic_platform == x'cavium' ]; then
/etc/init.d/xpnet.sh stop /etc/init.d/xpnet.sh stop
/etc/init.d/xpnet.sh start /etc/init.d/xpnet.sh start
fi fi
fi fi
# platform specific tasks
if [ x"$sonic_asic_platform" == x"mellanox" ]; then
/etc/init.d/sxdkernel stop
/usr/bin/mst stop
fi
unlock_service_state_change unlock_service_state_change
} }

View File

@ -8,8 +8,3 @@ supervisorctl start syncd
supervisorctl start mlnx-sfpd supervisorctl start mlnx-sfpd
BOOT_TYPE="$(cat /proc/cmdline | grep -o 'SONIC_BOOT_TYPE=\S*' | cut -d'=' -f2)"
if [[ x"$BOOT_TYPE" == x"fastfast" ]] && [[ -f /var/warmboot/issu_started ]]; then
rm -f /var/warmboot/issu_started
/usr/bin/ffb &>/dev/null &
fi

View File

@ -62,6 +62,19 @@ check_sdk_upgrade()
return "${CHECK_RESULT}" return "${CHECK_RESULT}"
} }
check_ffb()
{
check_issu_enabled || {
echo "ISSU is not enabled on this HWSKU"
return "${FFB_FAILURE}"
}
check_sdk_upgrade || {
echo "SDK upgrade check failued"
return "${FFB_FAILURE}"
}
return "${FFB_SUCCESS}";
}
# Perform ISSU start # Perform ISSU start
issu_start() issu_start()
{ {
@ -70,8 +83,6 @@ issu_start()
EXIT_CODE=$? EXIT_CODE=$?
touch /host/warmboot/issu_started
return $EXIT_CODE return $EXIT_CODE
} }

View File

@ -1,69 +0,0 @@
#!/usr/bin/env python
"""
Part of Mellanox platform specific fastfast boot implementation for warm-boot.
Notifies SYNCD proccess once boot is finished after warm-reboot.
Once SYNCD received such notification it should set appropriate SAI attribute.
Then SAI will notify SDK to end ISSU mode for the FFB.
"""
import time
import swsssdk
from threading import Timer
class FFB(object):
"""Provides implementation for Mellanox fastfast boot"""
DB_WARM_TABLE_KEY = 'WARM_RESTART_TABLE|bgp'
DB_STATE_ENTRY_NAME = 'state'
DB_STATE_TYPE_RECONCILED = 'reconciled'
DB_CHANNEL_NAME = 'MLNX_FFB'
DB_CHANNEL_MSG = '["SET","ISSU_END"]' # message should be in the following format: ["<operation>","<data>"]
SUB_THREAD_TIMEOUT = 1
STOP_TIMER_TIMEOUT = 180
def __init__(self):
self.state_db = swsssdk.SonicV2Connector()
self.state_db.connect(self.state_db.STATE_DB)
self.prevState = self.state_db.get(self.state_db.STATE_DB, self.DB_WARM_TABLE_KEY, self.DB_STATE_ENTRY_NAME)
self.pubSub = self.state_db.redis_clients[self.state_db.STATE_DB].pubsub()
self.pubSub.psubscribe(**{'__key*@6__:{}'.format(self.DB_WARM_TABLE_KEY): self.eventHandler})
self.timeoutTimer = Timer(self.STOP_TIMER_TIMEOUT, self.finish)
def run(self):
# Start event thread in order to get required events
self.eventThread = self.pubSub.run_in_thread(sleep_time=self.SUB_THREAD_TIMEOUT)
# Start oneshot timer in order to exit in case required event is not received during defined timeout
self.timeoutTimer.start()
def finish(self):
# Stop event thread and timeout timer
self.eventThread.stop()
self.timeoutTimer.cancel()
# Publish "FFB END" event to SYNCD process
time.sleep(60) # W/A: Wait until configuration is applied to HW since it takes some time
self.state_db.publish(self.state_db.STATE_DB, self.DB_CHANNEL_NAME, self.DB_CHANNEL_MSG)
def eventHandler(self, msg):
# Only "set" operations are needed so just skip all others
if msg['data'] != 'hset':
return
state = self.state_db.get(self.state_db.STATE_DB, self.DB_WARM_TABLE_KEY, self.DB_STATE_ENTRY_NAME)
if (state != self.prevState) and (state == self.DB_STATE_TYPE_RECONCILED):
self.finish()
else:
self.prevState = state
def main():
FFB().run()
if __name__ == '__main__':
main()

View File

@ -10,6 +10,5 @@ setup(
maintainer_email='stepanb@mellanox.com', maintainer_email='stepanb@mellanox.com',
scripts=[ scripts=[
'scripts/issu', 'scripts/issu',
'scripts/ffb',
] ]
) )