[mellanox|ffb] use system level warm reboot for Mellanox fastfast boot (#2374)
* [mellanox|ffb] use system level warm reboot for Mellanox fastfast boot Signed-off-by: Stepan Blyschak <stepanb@mellanox.com> * [mellanox|ffb] add comments for mellanox start/stop drivers section Signed-off-by: Stepan Blyschak <stepanb@mellanox.com>
This commit is contained in:
parent
ae190725ff
commit
ff526dd103
@ -28,7 +28,7 @@ function preStartAction()
|
|||||||
{
|
{
|
||||||
{%- if docker_container_name == "database" %}
|
{%- if docker_container_name == "database" %}
|
||||||
WARM_DIR=/host/warmboot
|
WARM_DIR=/host/warmboot
|
||||||
if [[ "$BOOT_TYPE" == "warm" && -f $WARM_DIR/dump.rdb ]]; then
|
if [[ ("$BOOT_TYPE" == "warm" || "$BOOT_TYPE" == "fastfast") && -f $WARM_DIR/dump.rdb ]]; then
|
||||||
# Load redis content from /host/warmboot/dump.rdb
|
# Load redis content from /host/warmboot/dump.rdb
|
||||||
docker cp $WARM_DIR/dump.rdb database:/var/lib/redis/dump.rdb
|
docker cp $WARM_DIR/dump.rdb database:/var/lib/redis/dump.rdb
|
||||||
else
|
else
|
||||||
@ -49,7 +49,7 @@ function postStartAction()
|
|||||||
until [[ $(/usr/bin/docker exec database redis-cli -s $REDIS_SOCK ping | grep -c PONG) -gt 0 ]]; do
|
until [[ $(/usr/bin/docker exec database redis-cli -s $REDIS_SOCK ping | grep -c PONG) -gt 0 ]]; do
|
||||||
sleep 1;
|
sleep 1;
|
||||||
done
|
done
|
||||||
if [[ "$BOOT_TYPE" == "warm" && -f $WARM_DIR/dump.rdb ]]; then
|
if [[ ("$BOOT_TYPE" == "warm" || "$BOOT_TYPE" == "fastfast") && -f $WARM_DIR/dump.rdb ]]; then
|
||||||
rm -f $WARM_DIR/dump.rdb
|
rm -f $WARM_DIR/dump.rdb
|
||||||
else
|
else
|
||||||
# If there is a config db dump file, load it
|
# If there is a config db dump file, load it
|
||||||
@ -61,7 +61,7 @@ function postStartAction()
|
|||||||
fi
|
fi
|
||||||
{%- elif docker_container_name == "swss" %}
|
{%- elif docker_container_name == "swss" %}
|
||||||
docker exec swss rm -f /ready # remove cruft
|
docker exec swss rm -f /ready # remove cruft
|
||||||
if [[ "$BOOT_TYPE" == "fast" || "$BOOT_TYPE" == "fastfast" ]] && [[ -d /host/fast-reboot ]]; then
|
if [[ "$BOOT_TYPE" == "fast" ]] && [[ -d /host/fast-reboot ]]; then
|
||||||
test -e /host/fast-reboot/fdb.json && docker cp /host/fast-reboot/fdb.json swss:/
|
test -e /host/fast-reboot/fdb.json && docker cp /host/fast-reboot/fdb.json swss:/
|
||||||
test -e /host/fast-reboot/arp.json && docker cp /host/fast-reboot/arp.json swss:/
|
test -e /host/fast-reboot/arp.json && docker cp /host/fast-reboot/arp.json swss:/
|
||||||
test -e /host/fast-reboot/default_routes.json && docker cp /host/fast-reboot/default_routes.json swss:/
|
test -e /host/fast-reboot/default_routes.json && docker cp /host/fast-reboot/default_routes.json swss:/
|
||||||
|
@ -90,11 +90,7 @@ start() {
|
|||||||
|
|
||||||
# Don't flush DB during warm boot
|
# Don't flush DB during warm boot
|
||||||
if [[ x"$WARM_BOOT" != x"true" ]]; then
|
if [[ x"$WARM_BOOT" != x"true" ]]; then
|
||||||
# Don't flush APP_DB during MLNX fastfast boot
|
|
||||||
BOOT_TYPE="$(cat /proc/cmdline | grep -o 'SONIC_BOOT_TYPE=\S*' | cut -d'=' -f2)"
|
|
||||||
if [[ x"$BOOT_TYPE" != x"fastfast" ]] && [[ ! -f /var/warmboot/issu_started ]]; then
|
|
||||||
/usr/bin/docker exec database redis-cli -n 0 FLUSHDB
|
/usr/bin/docker exec database redis-cli -n 0 FLUSHDB
|
||||||
fi
|
|
||||||
/usr/bin/docker exec database redis-cli -n 2 FLUSHDB
|
/usr/bin/docker exec database redis-cli -n 2 FLUSHDB
|
||||||
/usr/bin/docker exec database redis-cli -n 5 FLUSHDB
|
/usr/bin/docker exec database redis-cli -n 5 FLUSHDB
|
||||||
clean_up_tables 6 "'PORT_TABLE*', 'MGMT_PORT_TABLE*', 'VLAN_TABLE*', 'VLAN_MEMBER_TABLE*', 'INTERFACE_TABLE*', 'MIRROR_SESSION*'"
|
clean_up_tables 6 "'PORT_TABLE*', 'MGMT_PORT_TABLE*', 'VLAN_TABLE*', 'VLAN_MEMBER_TABLE*', 'INTERFACE_TABLE*', 'MIRROR_SESSION*'"
|
||||||
|
@ -90,14 +90,12 @@ start() {
|
|||||||
|
|
||||||
# Flush DB during non-warm start
|
# Flush DB during non-warm start
|
||||||
/usr/bin/docker exec database redis-cli -n 1 FLUSHDB
|
/usr/bin/docker exec database redis-cli -n 1 FLUSHDB
|
||||||
|
|
||||||
# platform specific tasks
|
|
||||||
if [ x$sonic_asic_platform == x'cavium' ]; then
|
|
||||||
/etc/init.d/xpnet.sh start
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# platform specific tasks
|
# platform specific tasks
|
||||||
|
|
||||||
|
# start mellanox drivers regardless of
|
||||||
|
# boot type
|
||||||
if [ x"$sonic_asic_platform" == x"mellanox" ]; then
|
if [ x"$sonic_asic_platform" == x"mellanox" ]; then
|
||||||
BOOT_TYPE=`getBootType`
|
BOOT_TYPE=`getBootType`
|
||||||
if [[ x"$WARM_BOOT" == x"true" || x"$BOOT_TYPE" == x"fast" ]]; then
|
if [[ x"$WARM_BOOT" == x"true" || x"$BOOT_TYPE" == x"fast" ]]; then
|
||||||
@ -109,6 +107,13 @@ start() {
|
|||||||
/sbin/modprobe i2c-dev
|
/sbin/modprobe i2c-dev
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ x"$WARM_BOOT" != x"true" ]]; then
|
||||||
|
if [ x$sonic_asic_platform == x'cavium' ]; then
|
||||||
|
/etc/init.d/xpnet.sh start
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
# start service docker
|
# start service docker
|
||||||
/usr/bin/${SERVICE}.sh start
|
/usr/bin/${SERVICE}.sh start
|
||||||
debug "Started ${SERVICE} service..."
|
debug "Started ${SERVICE} service..."
|
||||||
@ -146,21 +151,23 @@ stop() {
|
|||||||
/usr/bin/${SERVICE}.sh stop
|
/usr/bin/${SERVICE}.sh stop
|
||||||
debug "Stopped ${SERVICE} service..."
|
debug "Stopped ${SERVICE} service..."
|
||||||
|
|
||||||
# if warm start enabled, don't stop peer service docker
|
|
||||||
if [[ x"$WARM_BOOT" != x"true" ]]; then
|
|
||||||
# platform specific tasks
|
# platform specific tasks
|
||||||
|
|
||||||
|
# stop mellanox driver regardless of
|
||||||
|
# shutdown type
|
||||||
|
if [ x$sonic_asic_platform == x'mellanox' ]; then
|
||||||
|
/etc/init.d/sxdkernel stop
|
||||||
|
/usr/bin/mst stop
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
if [[ x"$WARM_BOOT" != x"true" ]]; then
|
||||||
if [ x$sonic_asic_platform == x'cavium' ]; then
|
if [ x$sonic_asic_platform == x'cavium' ]; then
|
||||||
/etc/init.d/xpnet.sh stop
|
/etc/init.d/xpnet.sh stop
|
||||||
/etc/init.d/xpnet.sh start
|
/etc/init.d/xpnet.sh start
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# platform specific tasks
|
|
||||||
if [ x"$sonic_asic_platform" == x"mellanox" ]; then
|
|
||||||
/etc/init.d/sxdkernel stop
|
|
||||||
/usr/bin/mst stop
|
|
||||||
fi
|
|
||||||
|
|
||||||
unlock_service_state_change
|
unlock_service_state_change
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -8,8 +8,3 @@ supervisorctl start syncd
|
|||||||
|
|
||||||
supervisorctl start mlnx-sfpd
|
supervisorctl start mlnx-sfpd
|
||||||
|
|
||||||
BOOT_TYPE="$(cat /proc/cmdline | grep -o 'SONIC_BOOT_TYPE=\S*' | cut -d'=' -f2)"
|
|
||||||
if [[ x"$BOOT_TYPE" == x"fastfast" ]] && [[ -f /var/warmboot/issu_started ]]; then
|
|
||||||
rm -f /var/warmboot/issu_started
|
|
||||||
/usr/bin/ffb &>/dev/null &
|
|
||||||
fi
|
|
||||||
|
@ -62,6 +62,19 @@ check_sdk_upgrade()
|
|||||||
return "${CHECK_RESULT}"
|
return "${CHECK_RESULT}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
check_ffb()
|
||||||
|
{
|
||||||
|
check_issu_enabled || {
|
||||||
|
echo "ISSU is not enabled on this HWSKU"
|
||||||
|
return "${FFB_FAILURE}"
|
||||||
|
}
|
||||||
|
check_sdk_upgrade || {
|
||||||
|
echo "SDK upgrade check failued"
|
||||||
|
return "${FFB_FAILURE}"
|
||||||
|
}
|
||||||
|
return "${FFB_SUCCESS}";
|
||||||
|
}
|
||||||
|
|
||||||
# Perform ISSU start
|
# Perform ISSU start
|
||||||
issu_start()
|
issu_start()
|
||||||
{
|
{
|
||||||
@ -70,8 +83,6 @@ issu_start()
|
|||||||
|
|
||||||
EXIT_CODE=$?
|
EXIT_CODE=$?
|
||||||
|
|
||||||
touch /host/warmboot/issu_started
|
|
||||||
|
|
||||||
return $EXIT_CODE
|
return $EXIT_CODE
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,69 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
"""
|
|
||||||
Part of Mellanox platform specific fastfast boot implementation for warm-boot.
|
|
||||||
Notifies SYNCD proccess once boot is finished after warm-reboot.
|
|
||||||
Once SYNCD received such notification it should set appropriate SAI attribute.
|
|
||||||
Then SAI will notify SDK to end ISSU mode for the FFB.
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
import time
|
|
||||||
import swsssdk
|
|
||||||
from threading import Timer
|
|
||||||
|
|
||||||
|
|
||||||
class FFB(object):
|
|
||||||
"""Provides implementation for Mellanox fastfast boot"""
|
|
||||||
DB_WARM_TABLE_KEY = 'WARM_RESTART_TABLE|bgp'
|
|
||||||
DB_STATE_ENTRY_NAME = 'state'
|
|
||||||
DB_STATE_TYPE_RECONCILED = 'reconciled'
|
|
||||||
DB_CHANNEL_NAME = 'MLNX_FFB'
|
|
||||||
DB_CHANNEL_MSG = '["SET","ISSU_END"]' # message should be in the following format: ["<operation>","<data>"]
|
|
||||||
SUB_THREAD_TIMEOUT = 1
|
|
||||||
STOP_TIMER_TIMEOUT = 180
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.state_db = swsssdk.SonicV2Connector()
|
|
||||||
self.state_db.connect(self.state_db.STATE_DB)
|
|
||||||
|
|
||||||
self.prevState = self.state_db.get(self.state_db.STATE_DB, self.DB_WARM_TABLE_KEY, self.DB_STATE_ENTRY_NAME)
|
|
||||||
|
|
||||||
self.pubSub = self.state_db.redis_clients[self.state_db.STATE_DB].pubsub()
|
|
||||||
self.pubSub.psubscribe(**{'__key*@6__:{}'.format(self.DB_WARM_TABLE_KEY): self.eventHandler})
|
|
||||||
|
|
||||||
self.timeoutTimer = Timer(self.STOP_TIMER_TIMEOUT, self.finish)
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
# Start event thread in order to get required events
|
|
||||||
self.eventThread = self.pubSub.run_in_thread(sleep_time=self.SUB_THREAD_TIMEOUT)
|
|
||||||
# Start oneshot timer in order to exit in case required event is not received during defined timeout
|
|
||||||
self.timeoutTimer.start()
|
|
||||||
|
|
||||||
def finish(self):
|
|
||||||
# Stop event thread and timeout timer
|
|
||||||
self.eventThread.stop()
|
|
||||||
self.timeoutTimer.cancel()
|
|
||||||
|
|
||||||
# Publish "FFB END" event to SYNCD process
|
|
||||||
time.sleep(60) # W/A: Wait until configuration is applied to HW since it takes some time
|
|
||||||
self.state_db.publish(self.state_db.STATE_DB, self.DB_CHANNEL_NAME, self.DB_CHANNEL_MSG)
|
|
||||||
|
|
||||||
def eventHandler(self, msg):
|
|
||||||
# Only "set" operations are needed so just skip all others
|
|
||||||
if msg['data'] != 'hset':
|
|
||||||
return
|
|
||||||
|
|
||||||
state = self.state_db.get(self.state_db.STATE_DB, self.DB_WARM_TABLE_KEY, self.DB_STATE_ENTRY_NAME)
|
|
||||||
|
|
||||||
if (state != self.prevState) and (state == self.DB_STATE_TYPE_RECONCILED):
|
|
||||||
self.finish()
|
|
||||||
else:
|
|
||||||
self.prevState = state
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
FFB().run()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
@ -10,6 +10,5 @@ setup(
|
|||||||
maintainer_email='stepanb@mellanox.com',
|
maintainer_email='stepanb@mellanox.com',
|
||||||
scripts=[
|
scripts=[
|
||||||
'scripts/issu',
|
'scripts/issu',
|
||||||
'scripts/ffb',
|
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user