This repository has been archived on 2025-03-20. You can view files and clone it, but cannot push or open issues or pull requests.
sonic-buildimage/src/sonic-ctrmgrd/ctrmgr/container_startup.py
lixiaoyuner c59f55f6a3
Move k8s script to docker-config-engine (#14788) (#15768)
Why I did it
To reduce the container's dependency from host system

Work item tracking
Microsoft ADO (number only):
17713469
How I did it
Move the k8s container startup script to config engine container, other than mount it from host.

How to verify it
Check file path(/usr/share/sonic/scripts/container_startup.py) inside config engine container.

Signed-off-by: Yun Li <yunli1@microsoft.com>
Co-authored-by: Qi Luo <qiluo-msft@users.noreply.github.com>
2023-07-17 23:21:01 +08:00

293 lines
8.5 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import datetime
import inspect
import json
import syslog
import time
from swsscommon import swsscommon
from sonic_py_common.general import getstatusoutput_noshell_pipe
# DB field names
SET_OWNER = "set_owner"
CURRENT_OWNER = "current_owner"
UPD_TIMESTAMP = "update_time"
DOCKER_ID = "container_id"
REMOTE_STATE = "remote_state"
VERSION = "container_version"
SYSTEM_STATE = "system_state"
KUBE_LABEL_TABLE = "KUBE_LABELS"
KUBE_LABEL_SET_KEY = "SET"
UNIT_TESTING = 0
def debug_msg(m):
msg = "{}: {}".format(inspect.stack()[1][3], m)
syslog.syslog(syslog.LOG_DEBUG, msg)
def _get_version_key(feature, version):
# Coin label for version control
return "{}_{}_enabled".format(feature, version)
def _get_local_version_key(feature):
# Coin label for track laster local version
return "{}_local".format(feature)
def read_data(feature):
state_data = {
CURRENT_OWNER: "none",
UPD_TIMESTAMP: "",
DOCKER_ID: "",
REMOTE_STATE: "none",
VERSION: "0.0.0",
SYSTEM_STATE: ""
}
set_owner = "local"
# read owner from config-db and current state data from state-db.
db = swsscommon.DBConnector("CONFIG_DB", 0)
tbl = swsscommon.Table(db, 'FEATURE')
data = dict(tbl.get(feature)[1])
if (SET_OWNER in data):
set_owner = data[SET_OWNER]
state_db = swsscommon.DBConnector("STATE_DB", 0)
tbl = swsscommon.Table(state_db, 'FEATURE')
state_data.update(dict(tbl.get(feature)[1]))
return (state_db, set_owner, state_data)
def read_fields(state_db, feature, fields):
# Read directly from STATE-DB, given fields
# for given feature.
# Fields is a list of tuples (<field name>, <default val>)
#
tbl = swsscommon.Table(state_db, 'FEATURE')
ret = []
# tbl.get for non-existing feature would return
# [False, {} ]
#
data = dict(tbl.get(feature)[1])
for (field, default) in fields:
val = data[field] if field in data else default
ret += [val]
return tuple(ret)
def check_version_blocked(state_db, feature, version):
# Ensure this version is *not* blocked explicitly.
#
tbl = swsscommon.Table(state_db, KUBE_LABEL_TABLE)
labels = dict(tbl.get(KUBE_LABEL_SET_KEY)[1])
key = _get_local_version_key(feature)
return (key in labels) and (labels[key].lower() == version.lower())
def drop_label(state_db, feature, version):
# Mark given feature version as dropped in labels.
# Update is done in state-db.
# ctrmgrd sets it with kube API server per reaschability
tbl = swsscommon.Table(state_db, KUBE_LABEL_TABLE)
name = _get_local_version_key(feature)
tbl.set(KUBE_LABEL_SET_KEY, [(name, version)])
def update_data(state_db, feature, data):
# Update STATE-DB entry for this feature with given data
#
debug_msg("{}: {}".format(feature, str(data)))
tbl = swsscommon.Table(state_db, "FEATURE")
tbl.set(feature, list(data.items()))
def get_docker_id():
# Read the container-id
# Note: This script runs inside the context of container
#
cmd0 = ['cat', '/proc/self/cgroup']
cmd1 = ['grep', '-e', ":memory:"]
cmd2 = ['rev']
cmd3 = ['cut', '-f1', '-d', '/']
cmd4 = ['rev']
_, output = getstatusoutput_noshell_pipe(cmd0, cmd1, cmd2, cmd3, cmd4)
return output.strip()[:12]
def instance_higher(feature, ct_version, version):
# Compares given version against current version in STATE-DB.
# Return True if this version is higher than current.
#
ret = False
if ct_version:
ct = ct_version.split('.')
nxt = version.split('.')
for cs, ns in zip(ct, nxt):
c = int(cs)
n = int(ns)
if n < c:
break
elif n > c:
ret = True
break
else:
# Empty version implies no one is running.
ret = True
debug_msg("compare version: new:{} current:{} res={}".format(
version, ct_version, ret))
return ret
def is_active(feature, system_state):
# Check current system state of the feature
if system_state == "up":
return True
else:
syslog.syslog(syslog.LOG_ERR, "Found inactive for {}".format(feature))
return False
def update_state(state_db, feature, owner=None, version=None):
"""
sets owner, version & container-id for this feature in state-db.
If owner is local, update label to block remote deploying same version or
if kube, sets state to "running".
"""
data = {
CURRENT_OWNER: owner,
DOCKER_ID: get_docker_id() if owner != "local" else feature,
UPD_TIMESTAMP: str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
"hello": "world",
VERSION: version
}
if (owner == "local"):
# Disable deployment of this version as available locally
drop_label(state_db, feature, version)
else:
data[REMOTE_STATE] = "running"
debug_msg("{} up data:{}".format(feature, str(data)))
update_data(state_db, feature, data)
def do_freeze(feat, m):
# Exiting will kick off the container to run.
# So sleep forever with periodic logs.
#
while True:
syslog.syslog(syslog.LOG_ERR, "Blocking .... feat:{} docker_id:{} msg:{}".format(
feat, get_docker_id(), m))
if UNIT_TESTING:
break
time.sleep(60)
def container_up(feature, owner, version):
"""
This is called by container upon post start.
The container will run its application, only upon this call
complete.
This call does the basic check for if this starting-container can be allowed
to run based on current state, and owner & version of this starting
container.
If allowed to proceed, this info is recorded in state-db and return
to enable container start the main application. Else it proceeds to
sleep forever, blocking the container from starting the main application.
"""
debug_msg("BEGIN")
(state_db, set_owner, state_data) = read_data(feature)
debug_msg("args: feature={}, owner={}, version={} DB: set_owner={} state_data={}".format(
feature, owner, version, set_owner, json.dumps(state_data, indent=4)))
if state_data[SYSTEM_STATE] == '':
return
if owner == "local":
update_state(state_db, feature, owner, version)
else:
if (set_owner == "local"):
do_freeze(feature, "bail out as set_owner is local")
return
if not is_active(feature, state_data[SYSTEM_STATE]):
do_freeze(feature, "bail out as system state not active")
return
if check_version_blocked(state_db, feature, version):
do_freeze(feature, "This version is marked disabled. Exiting ...")
return
update_data(state_db, feature, { VERSION: version })
mode = state_data[REMOTE_STATE]
if mode in ("none", "running", "stopped"):
update_data(state_db, feature, { REMOTE_STATE: "pending" })
mode = "pending"
else:
debug_msg("{}: Skip remote_state({}) update".format(feature, mode))
i = 0
while (mode != "ready"):
if (i % 10) == 0:
debug_msg("{}: remote_state={}. Waiting to go ready".format(feature, mode))
i += 1
time.sleep(2)
mode, db_version = read_fields(state_db,
feature, [(REMOTE_STATE, "none"), (VERSION, "")])
if version != db_version:
# looks like another instance has overwritten. Exit for now.
# If this happens to be higher version, next deploy by kube will fix
# This is a very rare window of opportunity, for this version to be higher.
#
do_freeze(feature,
"bail out as current deploy version={} is different than {}. re-deploy higher one".
format(version, db_version))
return
if UNIT_TESTING:
return
update_state(state_db, feature, owner, version)
debug_msg("END")
def main():
parser = argparse.ArgumentParser(description="container_startup <feature> kube/local [<version>]")
parser.add_argument("-f", "--feature", required=True)
parser.add_argument("-o", "--owner", choices=["local", "kube"], required=True)
parser.add_argument("-v", "--version", required=True)
args = parser.parse_args()
container_up(args.feature, args.owner, args.version)
if __name__ == "__main__":
main()