[mellanox|ffb] use system level warm reboot for Mellanox fastfast boot (#2374)

* [mellanox|ffb] use system level warm reboot for Mellanox fastfast boot

Signed-off-by: Stepan Blyschak <stepanb@mellanox.com>

* [mellanox|ffb] add comments for mellanox start/stop drivers section

Signed-off-by: Stepan Blyschak <stepanb@mellanox.com>
This commit is contained in:
stepanblyschak 2019-01-11 00:09:04 +02:00 committed by Ying Xie
parent ae190725ff
commit ff526dd103
7 changed files with 37 additions and 98 deletions

View File

@ -28,7 +28,7 @@ function preStartAction()
{
{%- if docker_container_name == "database" %}
WARM_DIR=/host/warmboot
if [[ "$BOOT_TYPE" == "warm" && -f $WARM_DIR/dump.rdb ]]; then
if [[ ("$BOOT_TYPE" == "warm" || "$BOOT_TYPE" == "fastfast") && -f $WARM_DIR/dump.rdb ]]; then
# Load redis content from /host/warmboot/dump.rdb
docker cp $WARM_DIR/dump.rdb database:/var/lib/redis/dump.rdb
else
@ -49,7 +49,7 @@ function postStartAction()
until [[ $(/usr/bin/docker exec database redis-cli -s $REDIS_SOCK ping | grep -c PONG) -gt 0 ]]; do
sleep 1;
done
if [[ "$BOOT_TYPE" == "warm" && -f $WARM_DIR/dump.rdb ]]; then
if [[ ("$BOOT_TYPE" == "warm" || "$BOOT_TYPE" == "fastfast") && -f $WARM_DIR/dump.rdb ]]; then
rm -f $WARM_DIR/dump.rdb
else
# If there is a config db dump file, load it
@ -61,7 +61,7 @@ function postStartAction()
fi
{%- elif docker_container_name == "swss" %}
docker exec swss rm -f /ready # remove cruft
if [[ "$BOOT_TYPE" == "fast" || "$BOOT_TYPE" == "fastfast" ]] && [[ -d /host/fast-reboot ]]; then
if [[ "$BOOT_TYPE" == "fast" ]] && [[ -d /host/fast-reboot ]]; then
test -e /host/fast-reboot/fdb.json && docker cp /host/fast-reboot/fdb.json swss:/
test -e /host/fast-reboot/arp.json && docker cp /host/fast-reboot/arp.json swss:/
test -e /host/fast-reboot/default_routes.json && docker cp /host/fast-reboot/default_routes.json swss:/

View File

@ -90,11 +90,7 @@ start() {
# Don't flush DB during warm boot
if [[ x"$WARM_BOOT" != x"true" ]]; then
# Don't flush APP_DB during MLNX fastfast boot
BOOT_TYPE="$(cat /proc/cmdline | grep -o 'SONIC_BOOT_TYPE=\S*' | cut -d'=' -f2)"
if [[ x"$BOOT_TYPE" != x"fastfast" ]] && [[ ! -f /var/warmboot/issu_started ]]; then
/usr/bin/docker exec database redis-cli -n 0 FLUSHDB
fi
/usr/bin/docker exec database redis-cli -n 2 FLUSHDB
/usr/bin/docker exec database redis-cli -n 5 FLUSHDB
clean_up_tables 6 "'PORT_TABLE*', 'MGMT_PORT_TABLE*', 'VLAN_TABLE*', 'VLAN_MEMBER_TABLE*', 'INTERFACE_TABLE*', 'MIRROR_SESSION*'"

View File

@ -90,14 +90,12 @@ start() {
# Flush DB during non-warm start
/usr/bin/docker exec database redis-cli -n 1 FLUSHDB
# platform specific tasks
if [ x$sonic_asic_platform == x'cavium' ]; then
/etc/init.d/xpnet.sh start
fi
fi
# platform specific tasks
# start mellanox drivers regardless of
# boot type
if [ x"$sonic_asic_platform" == x"mellanox" ]; then
BOOT_TYPE=`getBootType`
if [[ x"$WARM_BOOT" == x"true" || x"$BOOT_TYPE" == x"fast" ]]; then
@ -109,6 +107,13 @@ start() {
/sbin/modprobe i2c-dev
fi
if [[ x"$WARM_BOOT" != x"true" ]]; then
if [ x$sonic_asic_platform == x'cavium' ]; then
/etc/init.d/xpnet.sh start
fi
fi
# start service docker
/usr/bin/${SERVICE}.sh start
debug "Started ${SERVICE} service..."
@ -146,21 +151,23 @@ stop() {
/usr/bin/${SERVICE}.sh stop
debug "Stopped ${SERVICE} service..."
# if warm start enabled, don't stop peer service docker
if [[ x"$WARM_BOOT" != x"true" ]]; then
# platform specific tasks
# stop mellanox driver regardless of
# shutdown type
if [ x$sonic_asic_platform == x'mellanox' ]; then
/etc/init.d/sxdkernel stop
/usr/bin/mst stop
fi
if [[ x"$WARM_BOOT" != x"true" ]]; then
if [ x$sonic_asic_platform == x'cavium' ]; then
/etc/init.d/xpnet.sh stop
/etc/init.d/xpnet.sh start
fi
fi
# platform specific tasks
if [ x"$sonic_asic_platform" == x"mellanox" ]; then
/etc/init.d/sxdkernel stop
/usr/bin/mst stop
fi
unlock_service_state_change
}

View File

@ -8,8 +8,3 @@ supervisorctl start syncd
supervisorctl start mlnx-sfpd
BOOT_TYPE="$(cat /proc/cmdline | grep -o 'SONIC_BOOT_TYPE=\S*' | cut -d'=' -f2)"
if [[ x"$BOOT_TYPE" == x"fastfast" ]] && [[ -f /var/warmboot/issu_started ]]; then
rm -f /var/warmboot/issu_started
/usr/bin/ffb &>/dev/null &
fi

View File

@ -62,6 +62,19 @@ check_sdk_upgrade()
return "${CHECK_RESULT}"
}
check_ffb()
{
check_issu_enabled || {
echo "ISSU is not enabled on this HWSKU"
return "${FFB_FAILURE}"
}
check_sdk_upgrade || {
echo "SDK upgrade check failued"
return "${FFB_FAILURE}"
}
return "${FFB_SUCCESS}";
}
# Perform ISSU start
issu_start()
{
@ -70,8 +83,6 @@ issu_start()
EXIT_CODE=$?
touch /host/warmboot/issu_started
return $EXIT_CODE
}

View File

@ -1,69 +0,0 @@
#!/usr/bin/env python
"""
Part of Mellanox platform specific fastfast boot implementation for warm-boot.
Notifies SYNCD proccess once boot is finished after warm-reboot.
Once SYNCD received such notification it should set appropriate SAI attribute.
Then SAI will notify SDK to end ISSU mode for the FFB.
"""
import time
import swsssdk
from threading import Timer
class FFB(object):
"""Provides implementation for Mellanox fastfast boot"""
DB_WARM_TABLE_KEY = 'WARM_RESTART_TABLE|bgp'
DB_STATE_ENTRY_NAME = 'state'
DB_STATE_TYPE_RECONCILED = 'reconciled'
DB_CHANNEL_NAME = 'MLNX_FFB'
DB_CHANNEL_MSG = '["SET","ISSU_END"]' # message should be in the following format: ["<operation>","<data>"]
SUB_THREAD_TIMEOUT = 1
STOP_TIMER_TIMEOUT = 180
def __init__(self):
self.state_db = swsssdk.SonicV2Connector()
self.state_db.connect(self.state_db.STATE_DB)
self.prevState = self.state_db.get(self.state_db.STATE_DB, self.DB_WARM_TABLE_KEY, self.DB_STATE_ENTRY_NAME)
self.pubSub = self.state_db.redis_clients[self.state_db.STATE_DB].pubsub()
self.pubSub.psubscribe(**{'__key*@6__:{}'.format(self.DB_WARM_TABLE_KEY): self.eventHandler})
self.timeoutTimer = Timer(self.STOP_TIMER_TIMEOUT, self.finish)
def run(self):
# Start event thread in order to get required events
self.eventThread = self.pubSub.run_in_thread(sleep_time=self.SUB_THREAD_TIMEOUT)
# Start oneshot timer in order to exit in case required event is not received during defined timeout
self.timeoutTimer.start()
def finish(self):
# Stop event thread and timeout timer
self.eventThread.stop()
self.timeoutTimer.cancel()
# Publish "FFB END" event to SYNCD process
time.sleep(60) # W/A: Wait until configuration is applied to HW since it takes some time
self.state_db.publish(self.state_db.STATE_DB, self.DB_CHANNEL_NAME, self.DB_CHANNEL_MSG)
def eventHandler(self, msg):
# Only "set" operations are needed so just skip all others
if msg['data'] != 'hset':
return
state = self.state_db.get(self.state_db.STATE_DB, self.DB_WARM_TABLE_KEY, self.DB_STATE_ENTRY_NAME)
if (state != self.prevState) and (state == self.DB_STATE_TYPE_RECONCILED):
self.finish()
else:
self.prevState = state
def main():
FFB().run()
if __name__ == '__main__':
main()

View File

@ -10,6 +10,5 @@ setup(
maintainer_email='stepanb@mellanox.com',
scripts=[
'scripts/issu',
'scripts/ffb',
]
)