Why I did it To reduce the container's dependency from host system Work item tracking Microsoft ADO (number only): 17713469 How I did it Move the k8s container startup script to config engine container, other than mount it from host. How to verify it Check file path(/usr/share/sonic/scripts/container_startup.py) inside config engine container. Signed-off-by: Yun Li <yunli1@microsoft.com> Co-authored-by: Qi Luo <qiluo-msft@users.noreply.github.com>
293 lines
8.5 KiB
Python
Executable File
293 lines
8.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import argparse
|
|
import datetime
|
|
import inspect
|
|
import json
|
|
import syslog
|
|
import time
|
|
|
|
from swsscommon import swsscommon
|
|
from sonic_py_common.general import getstatusoutput_noshell_pipe
|
|
|
|
# DB field names
|
|
SET_OWNER = "set_owner"
|
|
|
|
CURRENT_OWNER = "current_owner"
|
|
UPD_TIMESTAMP = "update_time"
|
|
DOCKER_ID = "container_id"
|
|
REMOTE_STATE = "remote_state"
|
|
VERSION = "container_version"
|
|
SYSTEM_STATE = "system_state"
|
|
|
|
KUBE_LABEL_TABLE = "KUBE_LABELS"
|
|
KUBE_LABEL_SET_KEY = "SET"
|
|
|
|
UNIT_TESTING = 0
|
|
|
|
|
|
def debug_msg(m):
|
|
msg = "{}: {}".format(inspect.stack()[1][3], m)
|
|
syslog.syslog(syslog.LOG_DEBUG, msg)
|
|
|
|
|
|
def _get_version_key(feature, version):
|
|
# Coin label for version control
|
|
return "{}_{}_enabled".format(feature, version)
|
|
|
|
|
|
def _get_local_version_key(feature):
|
|
# Coin label for track laster local version
|
|
return "{}_local".format(feature)
|
|
|
|
|
|
def read_data(feature):
|
|
state_data = {
|
|
CURRENT_OWNER: "none",
|
|
UPD_TIMESTAMP: "",
|
|
DOCKER_ID: "",
|
|
REMOTE_STATE: "none",
|
|
VERSION: "0.0.0",
|
|
SYSTEM_STATE: ""
|
|
}
|
|
|
|
set_owner = "local"
|
|
|
|
# read owner from config-db and current state data from state-db.
|
|
db = swsscommon.DBConnector("CONFIG_DB", 0)
|
|
tbl = swsscommon.Table(db, 'FEATURE')
|
|
data = dict(tbl.get(feature)[1])
|
|
|
|
if (SET_OWNER in data):
|
|
set_owner = data[SET_OWNER]
|
|
|
|
state_db = swsscommon.DBConnector("STATE_DB", 0)
|
|
tbl = swsscommon.Table(state_db, 'FEATURE')
|
|
state_data.update(dict(tbl.get(feature)[1]))
|
|
|
|
return (state_db, set_owner, state_data)
|
|
|
|
|
|
def read_fields(state_db, feature, fields):
|
|
# Read directly from STATE-DB, given fields
|
|
# for given feature.
|
|
# Fields is a list of tuples (<field name>, <default val>)
|
|
#
|
|
tbl = swsscommon.Table(state_db, 'FEATURE')
|
|
ret = []
|
|
|
|
# tbl.get for non-existing feature would return
|
|
# [False, {} ]
|
|
#
|
|
data = dict(tbl.get(feature)[1])
|
|
for (field, default) in fields:
|
|
val = data[field] if field in data else default
|
|
ret += [val]
|
|
|
|
return tuple(ret)
|
|
|
|
|
|
def check_version_blocked(state_db, feature, version):
|
|
# Ensure this version is *not* blocked explicitly.
|
|
#
|
|
tbl = swsscommon.Table(state_db, KUBE_LABEL_TABLE)
|
|
labels = dict(tbl.get(KUBE_LABEL_SET_KEY)[1])
|
|
key = _get_local_version_key(feature)
|
|
return (key in labels) and (labels[key].lower() == version.lower())
|
|
|
|
|
|
def drop_label(state_db, feature, version):
|
|
# Mark given feature version as dropped in labels.
|
|
# Update is done in state-db.
|
|
# ctrmgrd sets it with kube API server per reaschability
|
|
|
|
tbl = swsscommon.Table(state_db, KUBE_LABEL_TABLE)
|
|
name = _get_local_version_key(feature)
|
|
tbl.set(KUBE_LABEL_SET_KEY, [(name, version)])
|
|
|
|
|
|
def update_data(state_db, feature, data):
|
|
# Update STATE-DB entry for this feature with given data
|
|
#
|
|
debug_msg("{}: {}".format(feature, str(data)))
|
|
tbl = swsscommon.Table(state_db, "FEATURE")
|
|
tbl.set(feature, list(data.items()))
|
|
|
|
|
|
def get_docker_id():
|
|
# Read the container-id
|
|
# Note: This script runs inside the context of container
|
|
#
|
|
cmd0 = ['cat', '/proc/self/cgroup']
|
|
cmd1 = ['grep', '-e', ":memory:"]
|
|
cmd2 = ['rev']
|
|
cmd3 = ['cut', '-f1', '-d', '/']
|
|
cmd4 = ['rev']
|
|
_, output = getstatusoutput_noshell_pipe(cmd0, cmd1, cmd2, cmd3, cmd4)
|
|
return output.strip()[:12]
|
|
|
|
|
|
def instance_higher(feature, ct_version, version):
|
|
# Compares given version against current version in STATE-DB.
|
|
# Return True if this version is higher than current.
|
|
#
|
|
ret = False
|
|
if ct_version:
|
|
ct = ct_version.split('.')
|
|
nxt = version.split('.')
|
|
for cs, ns in zip(ct, nxt):
|
|
c = int(cs)
|
|
n = int(ns)
|
|
if n < c:
|
|
break
|
|
elif n > c:
|
|
ret = True
|
|
break
|
|
else:
|
|
# Empty version implies no one is running.
|
|
ret = True
|
|
|
|
debug_msg("compare version: new:{} current:{} res={}".format(
|
|
version, ct_version, ret))
|
|
return ret
|
|
|
|
|
|
def is_active(feature, system_state):
|
|
# Check current system state of the feature
|
|
if system_state == "up":
|
|
return True
|
|
else:
|
|
syslog.syslog(syslog.LOG_ERR, "Found inactive for {}".format(feature))
|
|
return False
|
|
|
|
|
|
def update_state(state_db, feature, owner=None, version=None):
|
|
"""
|
|
sets owner, version & container-id for this feature in state-db.
|
|
|
|
If owner is local, update label to block remote deploying same version or
|
|
if kube, sets state to "running".
|
|
|
|
"""
|
|
data = {
|
|
CURRENT_OWNER: owner,
|
|
DOCKER_ID: get_docker_id() if owner != "local" else feature,
|
|
UPD_TIMESTAMP: str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
|
|
"hello": "world",
|
|
VERSION: version
|
|
}
|
|
|
|
if (owner == "local"):
|
|
# Disable deployment of this version as available locally
|
|
drop_label(state_db, feature, version)
|
|
else:
|
|
data[REMOTE_STATE] = "running"
|
|
|
|
debug_msg("{} up data:{}".format(feature, str(data)))
|
|
update_data(state_db, feature, data)
|
|
|
|
|
|
def do_freeze(feat, m):
|
|
# Exiting will kick off the container to run.
|
|
# So sleep forever with periodic logs.
|
|
#
|
|
while True:
|
|
syslog.syslog(syslog.LOG_ERR, "Blocking .... feat:{} docker_id:{} msg:{}".format(
|
|
feat, get_docker_id(), m))
|
|
if UNIT_TESTING:
|
|
break
|
|
time.sleep(60)
|
|
|
|
|
|
def container_up(feature, owner, version):
|
|
"""
|
|
This is called by container upon post start.
|
|
|
|
The container will run its application, only upon this call
|
|
complete.
|
|
|
|
This call does the basic check for if this starting-container can be allowed
|
|
to run based on current state, and owner & version of this starting
|
|
container.
|
|
|
|
If allowed to proceed, this info is recorded in state-db and return
|
|
to enable container start the main application. Else it proceeds to
|
|
sleep forever, blocking the container from starting the main application.
|
|
|
|
"""
|
|
debug_msg("BEGIN")
|
|
(state_db, set_owner, state_data) = read_data(feature)
|
|
|
|
debug_msg("args: feature={}, owner={}, version={} DB: set_owner={} state_data={}".format(
|
|
feature, owner, version, set_owner, json.dumps(state_data, indent=4)))
|
|
|
|
if state_data[SYSTEM_STATE] == '':
|
|
return
|
|
|
|
if owner == "local":
|
|
update_state(state_db, feature, owner, version)
|
|
else:
|
|
if (set_owner == "local"):
|
|
do_freeze(feature, "bail out as set_owner is local")
|
|
return
|
|
|
|
if not is_active(feature, state_data[SYSTEM_STATE]):
|
|
do_freeze(feature, "bail out as system state not active")
|
|
return
|
|
|
|
if check_version_blocked(state_db, feature, version):
|
|
do_freeze(feature, "This version is marked disabled. Exiting ...")
|
|
return
|
|
|
|
update_data(state_db, feature, { VERSION: version })
|
|
|
|
mode = state_data[REMOTE_STATE]
|
|
if mode in ("none", "running", "stopped"):
|
|
update_data(state_db, feature, { REMOTE_STATE: "pending" })
|
|
mode = "pending"
|
|
else:
|
|
debug_msg("{}: Skip remote_state({}) update".format(feature, mode))
|
|
|
|
|
|
i = 0
|
|
while (mode != "ready"):
|
|
if (i % 10) == 0:
|
|
debug_msg("{}: remote_state={}. Waiting to go ready".format(feature, mode))
|
|
i += 1
|
|
|
|
time.sleep(2)
|
|
mode, db_version = read_fields(state_db,
|
|
feature, [(REMOTE_STATE, "none"), (VERSION, "")])
|
|
if version != db_version:
|
|
# looks like another instance has overwritten. Exit for now.
|
|
# If this happens to be higher version, next deploy by kube will fix
|
|
# This is a very rare window of opportunity, for this version to be higher.
|
|
#
|
|
do_freeze(feature,
|
|
"bail out as current deploy version={} is different than {}. re-deploy higher one".
|
|
format(version, db_version))
|
|
return
|
|
if UNIT_TESTING:
|
|
return
|
|
|
|
|
|
update_state(state_db, feature, owner, version)
|
|
|
|
debug_msg("END")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="container_startup <feature> kube/local [<version>]")
|
|
|
|
parser.add_argument("-f", "--feature", required=True)
|
|
parser.add_argument("-o", "--owner", choices=["local", "kube"], required=True)
|
|
parser.add_argument("-v", "--version", required=True)
|
|
|
|
args = parser.parse_args()
|
|
container_up(args.feature, args.owner, args.version)
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|