From c71fb3a30f3d384438f2e4861c80c28aea398fef Mon Sep 17 00:00:00 2001 From: ganglv <88995770+ganglyu@users.noreply.github.com> Date: Wed, 8 Nov 2023 08:54:36 +0800 Subject: [PATCH] Share image for gnmi and telemetry (#16863) Why I did it Share docker image to support gnmi container and telemetry container Work item tracking Microsoft ADO 25423918: How I did it Create telemetry image from gnmi docker image. Enable gnmi container and disable telemetry container by default. How to verify it Run end to end test. --- Makefile.work | 1 + dockers/docker-sonic-gnmi/Dockerfile.j2 | 34 ++++++ .../base_image_files/monit_gnmi | 5 + dockers/docker-sonic-gnmi/critical_processes | 1 + dockers/docker-sonic-gnmi/dialout.sh | 6 + dockers/docker-sonic-gnmi/gnmi-native.sh | 105 ++++++++++++++++++ dockers/docker-sonic-gnmi/start.sh | 18 +++ dockers/docker-sonic-gnmi/supervisord.conf | 60 ++++++++++ dockers/docker-sonic-gnmi/telemetry_vars.j2 | 5 + dockers/docker-sonic-telemetry/Dockerfile.j2 | 3 +- dockers/docker-sonic-telemetry/telemetry.sh | 12 +- files/build_templates/gnmi.service.j2 | 16 +++ files/build_templates/init_cfg.json.j2 | 3 +- .../build_templates/sonic_debian_extension.j2 | 1 + files/image_config/logrotate/rsyslog.j2 | 1 + files/image_config/monit/container_checker | 8 +- .../rsyslog/rsyslog.d/00-sonic.conf | 6 + files/scripts/gnmi.sh | 1 + rules/config | 7 +- rules/docker-gnmi.dep | 11 ++ rules/docker-gnmi.mk | 37 ++++++ rules/docker-telemetry.mk | 13 +-- slave.mk | 6 + .../health_checker/service_checker.py | 4 +- 24 files changed, 334 insertions(+), 30 deletions(-) create mode 100644 dockers/docker-sonic-gnmi/Dockerfile.j2 create mode 100644 dockers/docker-sonic-gnmi/base_image_files/monit_gnmi create mode 100644 dockers/docker-sonic-gnmi/critical_processes create mode 100755 dockers/docker-sonic-gnmi/dialout.sh create mode 100755 dockers/docker-sonic-gnmi/gnmi-native.sh create mode 100755 dockers/docker-sonic-gnmi/start.sh create mode 100644 dockers/docker-sonic-gnmi/supervisord.conf create mode 100644 dockers/docker-sonic-gnmi/telemetry_vars.j2 create mode 100644 files/build_templates/gnmi.service.j2 create mode 120000 files/scripts/gnmi.sh create mode 100644 rules/docker-gnmi.dep create mode 100644 rules/docker-gnmi.mk diff --git a/Makefile.work b/Makefile.work index 3514a5e05d..628f770a82 100644 --- a/Makefile.work +++ b/Makefile.work @@ -543,6 +543,7 @@ SONIC_BUILD_INSTRUCTION := $(MAKE) \ DOCKER_LOCKFILE_SAVE=$(DOCKER_LOCKFILE_SAVE) \ SONIC_CONFIG_USE_NATIVE_DOCKERD_FOR_BUILD=$(SONIC_CONFIG_USE_NATIVE_DOCKERD_FOR_BUILD) \ SONIC_INCLUDE_SYSTEM_TELEMETRY=$(INCLUDE_SYSTEM_TELEMETRY) \ + SONIC_INCLUDE_SYSTEM_GNMI=$(INCLUDE_SYSTEM_GNMI) \ INCLUDE_DHCP_RELAY=$(INCLUDE_DHCP_RELAY) \ INCLUDE_DHCP_SERVER=$(INCLUDE_DHCP_SERVER) \ INCLUDE_MACSEC=$(INCLUDE_MACSEC) \ diff --git a/dockers/docker-sonic-gnmi/Dockerfile.j2 b/dockers/docker-sonic-gnmi/Dockerfile.j2 new file mode 100644 index 0000000000..9624865202 --- /dev/null +++ b/dockers/docker-sonic-gnmi/Dockerfile.j2 @@ -0,0 +1,34 @@ +{% from "dockers/dockerfile-macros.j2" import install_debian_packages, install_python_wheels, copy_files %} +FROM docker-config-engine-bullseye-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}} + +ARG docker_container_name +ARG image_version + +## Make apt-get non-interactive +ENV DEBIAN_FRONTEND=noninteractive + +# Pass the image_version to container +ENV IMAGE_VERSION=$image_version + +RUN apt-get update + +{% if docker_sonic_gnmi_debs.strip() -%} +# Copy locally-built Debian package dependencies +{{ copy_files("debs/", docker_sonic_gnmi_debs.split(' '), "/debs/") }} + +# Install locally-built Debian packages and implicitly install their dependencies +{{ install_debian_packages(docker_sonic_gnmi_debs.split(' ')) }} +{%- endif %} + +RUN apt-get clean -y && \ + apt-get autoclean - && \ + apt-get autoremove -y && \ + rm -rf /debs + +COPY ["start.sh", "gnmi-native.sh", "dialout.sh", "/usr/bin/"] +COPY ["telemetry_vars.j2", "/usr/share/sonic/templates/"] +COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] +COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] +COPY ["critical_processes", "/etc/supervisor"] + +ENTRYPOINT ["/usr/local/bin/supervisord"] diff --git a/dockers/docker-sonic-gnmi/base_image_files/monit_gnmi b/dockers/docker-sonic-gnmi/base_image_files/monit_gnmi new file mode 100644 index 0000000000..0288daa9e0 --- /dev/null +++ b/dockers/docker-sonic-gnmi/base_image_files/monit_gnmi @@ -0,0 +1,5 @@ +############################################################################### +## Monit configuration for telemetry container +############################################################################### +check program container_memory_gnmi with path "/usr/bin/memory_checker gnmi 419430400" + if status == 3 for 10 times within 20 cycles then exec "/usr/bin/restart_service gnmi" repeat every 2 cycles diff --git a/dockers/docker-sonic-gnmi/critical_processes b/dockers/docker-sonic-gnmi/critical_processes new file mode 100644 index 0000000000..fd693f8007 --- /dev/null +++ b/dockers/docker-sonic-gnmi/critical_processes @@ -0,0 +1 @@ +program:gnmi-native diff --git a/dockers/docker-sonic-gnmi/dialout.sh b/dockers/docker-sonic-gnmi/dialout.sh new file mode 100755 index 0000000000..485c3292d0 --- /dev/null +++ b/dockers/docker-sonic-gnmi/dialout.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +# Start with default config +export CVL_SCHEMA_PATH=/usr/sbin/schema +exec /usr/sbin/dialout_client_cli -insecure -logtostderr -v 2 + diff --git a/dockers/docker-sonic-gnmi/gnmi-native.sh b/dockers/docker-sonic-gnmi/gnmi-native.sh new file mode 100755 index 0000000000..ea5b88f44e --- /dev/null +++ b/dockers/docker-sonic-gnmi/gnmi-native.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash + +EXIT_TELEMETRY_VARS_FILE_NOT_FOUND=1 +INCORRECT_TELEMETRY_VALUE=2 +TELEMETRY_VARS_FILE=/usr/share/sonic/templates/telemetry_vars.j2 + +if [ ! -f "$TELEMETRY_VARS_FILE" ]; then + echo "Telemetry vars template file not found" + exit $EXIT_TELEMETRY_VARS_FILE_NOT_FOUND +fi + +# Try to read telemetry and certs config from ConfigDB. +# Use default value if no valid config exists +TELEMETRY_VARS=$(sonic-cfggen -d -t $TELEMETRY_VARS_FILE) +TELEMETRY_VARS=${TELEMETRY_VARS//[\']/\"} +X509=$(echo $TELEMETRY_VARS | jq -r '.x509') +GNMI=$(echo $TELEMETRY_VARS | jq -r '.gnmi') +CERTS=$(echo $TELEMETRY_VARS | jq -r '.certs') + +TELEMETRY_ARGS=" -logtostderr" +export CVL_SCHEMA_PATH=/usr/sbin/schema + +if [ -n "$CERTS" ]; then + SERVER_CRT=$(echo $CERTS | jq -r '.server_crt') + SERVER_KEY=$(echo $CERTS | jq -r '.server_key') + if [ -z $SERVER_CRT ] || [ -z $SERVER_KEY ]; then + TELEMETRY_ARGS+=" --insecure" + else + TELEMETRY_ARGS+=" --server_crt $SERVER_CRT --server_key $SERVER_KEY " + fi + + CA_CRT=$(echo $CERTS | jq -r '.ca_crt') + if [ ! -z $CA_CRT ]; then + TELEMETRY_ARGS+=" --ca_crt $CA_CRT" + fi +elif [ -n "$X509" ]; then + SERVER_CRT=$(echo $X509 | jq -r '.server_crt') + SERVER_KEY=$(echo $X509 | jq -r '.server_key') + if [ -z $SERVER_CRT ] || [ -z $SERVER_KEY ]; then + TELEMETRY_ARGS+=" --insecure" + else + TELEMETRY_ARGS+=" --server_crt $SERVER_CRT --server_key $SERVER_KEY " + fi + + CA_CRT=$(echo $X509 | jq -r '.ca_crt') + if [ ! -z $CA_CRT ]; then + TELEMETRY_ARGS+=" --ca_crt $CA_CRT" + fi +else + TELEMETRY_ARGS+=" --noTLS" +fi + +# If no configuration entry exists for TELEMETRY, create one default port +if [ -z "$GNMI" ]; then + PORT=8080 +else + PORT=$(echo $GNMI | jq -r '.port') +fi +TELEMETRY_ARGS+=" --port $PORT" + +CLIENT_AUTH=$(echo $GNMI | jq -r '.client_auth') +if [ -z $CLIENT_AUTH ] || [ $CLIENT_AUTH == "false" ]; then + TELEMETRY_ARGS+=" --allow_no_client_auth" +fi + +LOG_LEVEL=$(echo $GNMI | jq -r '.log_level') +if [[ $LOG_LEVEL =~ ^[0-9]+$ ]]; then + TELEMETRY_ARGS+=" -v=$LOG_LEVEL" +else + TELEMETRY_ARGS+=" -v=2" +fi + +# Enable ZMQ for SmartSwitch +LOCALHOST_SUBTYPE=`sonic-db-cli CONFIG_DB hget localhost "subtype"` +if [[ x"${LOCALHOST_SUBTYPE}" == x"SmartSwitch" ]]; then + TELEMETRY_ARGS+=" -zmq_address=tcp://127.0.0.1:8100" +fi + +# Server will handle threshold connections consecutively +THRESHOLD_CONNECTIONS=$(echo $GNMI | jq -r '.threshold') +if [[ $THRESHOLD_CONNECTIONS =~ ^[0-9]+$ ]]; then + TELEMETRY_ARGS+=" --threshold $THRESHOLD_CONNECTIONS" +else + if [ -z "$GNMI" ] || [[ $THRESHOLD_CONNECTIONS == "null" ]]; then + TELEMETRY_ARGS+=" --threshold 100" + else + echo "Incorrect threshold value, expecting positive integers" >&2 + exit $INCORRECT_TELEMETRY_VALUE + fi +fi + +# Close idle connections after certain duration (in seconds) +IDLE_CONN_DURATION=$(echo $GNMI | jq -r '.idle_conn_duration') +if [[ $IDLE_CONN_DURATION =~ ^[0-9]+$ ]]; then + TELEMETRY_ARGS+=" --idle_conn_duration $IDLE_CONN_DURATION" +else + if [ -z "$GNMI" ] || [[ $IDLE_CONN_DURATION == "null" ]]; then + TELEMETRY_ARGS+=" --idle_conn_duration 5" + else + echo "Incorrect idle_conn_duration value, expecting positive integers" >&2 + exit $INCORRECT_TELEMETRY_VALUE + fi +fi + +exec /usr/sbin/telemetry ${TELEMETRY_ARGS} diff --git a/dockers/docker-sonic-gnmi/start.sh b/dockers/docker-sonic-gnmi/start.sh new file mode 100755 index 0000000000..c3a928a6f8 --- /dev/null +++ b/dockers/docker-sonic-gnmi/start.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +if [ "${RUNTIME_OWNER}" == "" ]; then + RUNTIME_OWNER="kube" +fi + +CTR_SCRIPT="/usr/share/sonic/scripts/container_startup.py" +if test -f ${CTR_SCRIPT} +then + ${CTR_SCRIPT} -f gnmi -o ${RUNTIME_OWNER} -v ${IMAGE_VERSION} +fi + +mkdir -p /var/sonic +echo "# Config files managed by sonic-config-engine" > /var/sonic/config_status + +TZ=$(cat /etc/timezone) +rm -rf /etc/localtime +ln -sf /usr/share/zoneinfo/$TZ /etc/localtime diff --git a/dockers/docker-sonic-gnmi/supervisord.conf b/dockers/docker-sonic-gnmi/supervisord.conf new file mode 100644 index 0000000000..b70e6c868d --- /dev/null +++ b/dockers/docker-sonic-gnmi/supervisord.conf @@ -0,0 +1,60 @@ +[supervisord] +logfile_maxbytes=1MB +logfile_backups=2 +nodaemon=true + +[eventlistener:dependent-startup] +command=python3 -m supervisord_dependent_startup +autostart=true +autorestart=unexpected +startretries=0 +exitcodes=0,3 +events=PROCESS_STATE +buffer_size=1024 + +[eventlistener:supervisor-proc-exit-listener] +command=/usr/bin/supervisor-proc-exit-listener --container-name gnmi +events=PROCESS_STATE_EXITED,PROCESS_STATE_RUNNING +autostart=true +autorestart=false +buffer_size=1024 + +[program:rsyslogd] +command=/usr/sbin/rsyslogd -n -iNONE +priority=1 +autostart=false +autorestart=true +stdout_logfile=syslog +stderr_logfile=syslog +dependent_startup=true + +[program:start] +command=/usr/bin/start.sh +priority=2 +autostart=false +autorestart=false +startsecs=0 +stdout_logfile=syslog +stderr_logfile=syslog +dependent_startup=true +dependent_startup_wait_for=rsyslogd:running + +[program:gnmi-native] +command=/usr/bin/gnmi-native.sh +priority=3 +autostart=false +autorestart=false +stdout_logfile=syslog +stderr_logfile=syslog +dependent_startup=true +dependent_startup_wait_for=start:exited + +[program:dialout] +command=/usr/bin/dialout.sh +priority=4 +autostart=false +autorestart=false +stdout_logfile=syslog +stderr_logfile=syslog +dependent_startup=true +dependent_startup_wait_for=gnmi-native:running diff --git a/dockers/docker-sonic-gnmi/telemetry_vars.j2 b/dockers/docker-sonic-gnmi/telemetry_vars.j2 new file mode 100644 index 0000000000..4546ae5ab7 --- /dev/null +++ b/dockers/docker-sonic-gnmi/telemetry_vars.j2 @@ -0,0 +1,5 @@ +{ + "certs": {% if "certs" in GNMI.keys() %}{{ GNMI["certs"] }}{% else %}""{% endif %}, + "gnmi" : {% if "gnmi" in GNMI.keys() %}{{ GNMI["gnmi"] }}{% else %}""{% endif %}, + "x509" : {% if "x509" in DEVICE_METADATA.keys() %}{{ DEVICE_METADATA["x509"] }}{% else %}""{% endif %} +} diff --git a/dockers/docker-sonic-telemetry/Dockerfile.j2 b/dockers/docker-sonic-telemetry/Dockerfile.j2 index 64d679d40c..88aceca4c8 100644 --- a/dockers/docker-sonic-telemetry/Dockerfile.j2 +++ b/dockers/docker-sonic-telemetry/Dockerfile.j2 @@ -1,5 +1,5 @@ {% from "dockers/dockerfile-macros.j2" import install_debian_packages, install_python_wheels, copy_files %} -FROM docker-config-engine-bullseye-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}} +FROM docker-sonic-gnmi-{{DOCKER_USERNAME}}:{{DOCKER_USERTAG}} ARG docker_container_name ARG image_version @@ -28,7 +28,6 @@ RUN apt-get clean -y && \ COPY ["start.sh", "telemetry.sh", "dialout.sh", "/usr/bin/"] COPY ["telemetry_vars.j2", "/usr/share/sonic/templates/"] COPY ["supervisord.conf", "/etc/supervisor/conf.d/"] -COPY ["files/supervisor-proc-exit-listener", "/usr/bin"] COPY ["critical_processes", "/etc/supervisor"] ENTRYPOINT ["/usr/local/bin/supervisord"] diff --git a/dockers/docker-sonic-telemetry/telemetry.sh b/dockers/docker-sonic-telemetry/telemetry.sh index e44a2282b8..4e9b6574dc 100755 --- a/dockers/docker-sonic-telemetry/telemetry.sh +++ b/dockers/docker-sonic-telemetry/telemetry.sh @@ -70,18 +70,12 @@ else TELEMETRY_ARGS+=" -v=2" fi -# Enable ZMQ for SmartSwitch -LOCALHOST_SUBTYPE=`sonic-db-cli CONFIG_DB hget localhost "subtype"` -if [[ x"${LOCALHOST_SUBTYPE}" == x"SmartSwitch" ]]; then - TELEMETRY_ARGS+=" -zmq_address=tcp://127.0.0.1:8100" -fi - # Server will handle threshold connections consecutively THRESHOLD_CONNECTIONS=$(echo $GNMI | jq -r '.threshold') if [[ $THRESHOLD_CONNECTIONS =~ ^[0-9]+$ ]]; then TELEMETRY_ARGS+=" --threshold $THRESHOLD_CONNECTIONS" else - if [ -z $GNMI ] || [[ $THRESHOLD_CONNECTIONS == "null" ]]; then + if [ -z "$GNMI" ] || [[ $THRESHOLD_CONNECTIONS == "null" ]]; then TELEMETRY_ARGS+=" --threshold 100" else echo "Incorrect threshold value, expecting positive integers" >&2 @@ -94,13 +88,13 @@ IDLE_CONN_DURATION=$(echo $GNMI | jq -r '.idle_conn_duration') if [[ $IDLE_CONN_DURATION =~ ^[0-9]+$ ]]; then TELEMETRY_ARGS+=" --idle_conn_duration $IDLE_CONN_DURATION" else - if [ -z $GNMI ] || [[ $IDLE_CONN_DURATION == "null" ]]; then + if [ -z "$GNMI" ] || [[ $IDLE_CONN_DURATION == "null" ]]; then TELEMETRY_ARGS+=" --idle_conn_duration 5" else echo "Incorrect idle_conn_duration value, expecting positive integers" >&2 exit $INCORRECT_TELEMETRY_VALUE fi fi - +TELEMETRY_ARGS+=" -gnmi_native_write=false" exec /usr/sbin/telemetry ${TELEMETRY_ARGS} diff --git a/files/build_templates/gnmi.service.j2 b/files/build_templates/gnmi.service.j2 new file mode 100644 index 0000000000..7710a8fd0d --- /dev/null +++ b/files/build_templates/gnmi.service.j2 @@ -0,0 +1,16 @@ +[Unit] +Description=GNMI container +Requires=database.service +After=database.service swss.service syncd.service +Before=ntp-config.service +BindsTo=sonic.target +After=sonic.target +StartLimitIntervalSec=1200 +StartLimitBurst=3 + +[Service] +User={{ sonicadmin_user }} +ExecStartPre=/usr/local/bin/{{docker_container_name}}.sh start +ExecStart=/usr/local/bin/{{docker_container_name}}.sh wait +ExecStop=/usr/local/bin/{{docker_container_name}}.sh stop +RestartSec=30 diff --git a/files/build_templates/init_cfg.json.j2 b/files/build_templates/init_cfg.json.j2 index 3025da5487..7ec71a3589 100644 --- a/files/build_templates/init_cfg.json.j2 +++ b/files/build_templates/init_cfg.json.j2 @@ -54,6 +54,7 @@ {%- if include_restapi == "y" %}{% do features.append(("restapi", "enabled", false, "enabled")) %}{% endif %} {%- if include_sflow == "y" %}{% do features.append(("sflow", "disabled", true, "enabled")) %}{% endif %} {%- if include_macsec == "y" %}{% do features.append(("macsec", "{% if 'type' in DEVICE_METADATA['localhost'] and DEVICE_METADATA['localhost']['type'] == 'SpineRouter' and DEVICE_RUNTIME_METADATA['MACSEC_SUPPORTED'] %}enabled{% else %}disabled{% endif %}", false, "enabled")) %}{% endif %} +{%- if include_system_gnmi == "y" %}{% do features.append(("gnmi", "enabled", true, "enabled")) %}{% endif %} {%- if include_system_telemetry == "y" %}{% do features.append(("telemetry", "enabled", true, "enabled")) %}{% endif %} "FEATURE": { {# delayed field if set, will start the feature systemd .timer unit instead of .service unit #} @@ -76,7 +77,7 @@ "check_up_status" : "false", {%- endif %} {%- if include_kubernetes == "y" %} -{%- if feature in ["lldp", "pmon", "radv", "eventd", "snmp", "telemetry"] %} +{%- if feature in ["lldp", "pmon", "radv", "eventd", "snmp", "telemetry", "gnmi"] %} "set_owner": "kube", {% else %} "set_owner": "local", {% endif %} {% endif %} "high_mem_alert": "disabled" diff --git a/files/build_templates/sonic_debian_extension.j2 b/files/build_templates/sonic_debian_extension.j2 index 848911433c..b4ecda9942 100644 --- a/files/build_templates/sonic_debian_extension.j2 +++ b/files/build_templates/sonic_debian_extension.j2 @@ -909,6 +909,7 @@ sudo LANG=C cp $SCRIPTS_DIR/radv.sh $FILESYSTEM_ROOT/usr/local/bin/radv.sh sudo LANG=C cp $SCRIPTS_DIR/database.sh $FILESYSTEM_ROOT/usr/local/bin/database.sh sudo LANG=C cp $SCRIPTS_DIR/snmp.sh $FILESYSTEM_ROOT/usr/local/bin/snmp.sh sudo LANG=C cp $SCRIPTS_DIR/telemetry.sh $FILESYSTEM_ROOT/usr/local/bin/telemetry.sh +sudo LANG=C cp $SCRIPTS_DIR/gnmi.sh $FILESYSTEM_ROOT/usr/local/bin/gnmi.sh sudo LANG=C cp $SCRIPTS_DIR/mgmt-framework.sh $FILESYSTEM_ROOT/usr/local/bin/mgmt-framework.sh sudo LANG=C cp $SCRIPTS_DIR/asic_status.sh $FILESYSTEM_ROOT/usr/local/bin/asic_status.sh sudo LANG=C cp $SCRIPTS_DIR/asic_status.py $FILESYSTEM_ROOT/usr/local/bin/asic_status.py diff --git a/files/image_config/logrotate/rsyslog.j2 b/files/image_config/logrotate/rsyslog.j2 index d2b94ca2eb..77d950eb4f 100644 --- a/files/image_config/logrotate/rsyslog.j2 +++ b/files/image_config/logrotate/rsyslog.j2 @@ -28,6 +28,7 @@ /var/log/syslog /var/log/teamd.log /var/log/telemetry.log +/var/log/gnmi.log /var/log/frr/bgpd.log /var/log/frr/zebra.log /var/log/swss/sairedis*.rec diff --git a/files/image_config/monit/container_checker b/files/image_config/monit/container_checker index 8ca86c0653..f6be2cda9f 100755 --- a/files/image_config/monit/container_checker +++ b/files/image_config/monit/container_checker @@ -58,9 +58,9 @@ def get_expected_running_containers(): for container_name in feature_table.keys(): if feature_table[container_name]["state"] not in ["disabled", "always_disabled"]: if multi_asic.is_multi_asic(): - if feature_table[container_name]["has_global_scope"] == "True": + if feature_table[container_name].get("has_global_scope", "True") == "True": expected_running_containers.add(container_name) - if feature_table[container_name]["has_per_asic_scope"] == "True": + if feature_table[container_name].get("has_per_asic_scope", "False") == "True": num_asics = multi_asic.get_num_asics() for asic_id in range(num_asics): if asic_id in asics_id_presence or container_name in run_all_instance_list: @@ -69,9 +69,9 @@ def get_expected_running_containers(): expected_running_containers.add(container_name) if feature_table[container_name]["state"] == 'always_enabled': if multi_asic.is_multi_asic(): - if feature_table[container_name]["has_global_scope"] == "True": + if feature_table[container_name].get("has_global_scope", "True") == "True": always_running_containers.add(container_name) - if feature_table[container_name]["has_per_asic_scope"] == "True": + if feature_table[container_name].get("has_per_asic_scope", "False") == "True": num_asics = multi_asic.get_num_asics() for asic_id in range(num_asics): if asic_id in asics_id_presence or container_name in run_all_instance_list: diff --git a/files/image_config/rsyslog/rsyslog.d/00-sonic.conf b/files/image_config/rsyslog/rsyslog.d/00-sonic.conf index e6f26b21b5..e949365da4 100644 --- a/files/image_config/rsyslog/rsyslog.d/00-sonic.conf +++ b/files/image_config/rsyslog/rsyslog.d/00-sonic.conf @@ -32,6 +32,12 @@ if $programname contains "teamd_" then { stop } +## gnmi rules +if $msg startswith " gnmi-native" then { + /var/log/gnmi.log + stop +} + ## telemetry rules if $msg startswith " telemetry" or ($msg startswith " dialout" )then { /var/log/telemetry.log diff --git a/files/scripts/gnmi.sh b/files/scripts/gnmi.sh new file mode 120000 index 0000000000..ce97295f03 --- /dev/null +++ b/files/scripts/gnmi.sh @@ -0,0 +1 @@ +service_mgmt.sh \ No newline at end of file diff --git a/rules/config b/rules/config index ac6a30ba6e..bd77e91c59 100644 --- a/rules/config +++ b/rules/config @@ -124,9 +124,11 @@ SONIC_DPKG_CACHE_SOURCE ?= /var/cache/sonic/artifacts # Default VS build memory preparation DEFAULT_VS_PREPARE_MEM = yes +# INCLUDE_SYSTEM_GNMI - build docker-sonic-gnmi for system gnmi support +INCLUDE_SYSTEM_GNMI = y # INCLUDE_SYSTEM_TELEMETRY - build docker-sonic-telemetry for system telemetry support -INCLUDE_SYSTEM_TELEMETRY = y +INCLUDE_SYSTEM_TELEMETRY = n # INCLUDE_ICCPD - build docker-iccpd for mclag support INCLUDE_ICCPD = n @@ -138,7 +140,7 @@ INCLUDE_SFLOW = y INCLUDE_MGMT_FRAMEWORK = y # ENABLE_HOST_SERVICE_ON_START - enable sonic-host-server for mgmt-framework and/or -# telemetry containers to access host functionality by default +# gnmi containers to access host functionality by default ENABLE_HOST_SERVICE_ON_START = y # INCLUDE_RESTAPI - build docker-sonic-restapi for configuring the switch using REST APIs @@ -164,7 +166,6 @@ ENABLE_AUTO_TECH_SUPPORT = y # ENABLE_TRANSLIB_WRITE = y # ENABLE_NATIVE_WRITE - Enable native write/config operations via the gNMI interface. -# Uncomment to enable: ENABLE_NATIVE_WRITE = y # INCLUDE_MACSEC - build docker-macsec for macsec support diff --git a/rules/docker-gnmi.dep b/rules/docker-gnmi.dep new file mode 100644 index 0000000000..b3ccae6999 --- /dev/null +++ b/rules/docker-gnmi.dep @@ -0,0 +1,11 @@ + +DPATH := $($(DOCKER_GNMI)_PATH) +DEP_FILES := $(SONIC_COMMON_FILES_LIST) rules/docker-gnmi.mk rules/docker-gnmi.dep +DEP_FILES += $(SONIC_COMMON_BASE_FILES_LIST) +DEP_FILES += $(shell git ls-files $(DPATH)) + +$(DOCKER_GNMI)_CACHE_MODE := GIT_CONTENT_SHA +$(DOCKER_GNMI)_DEP_FLAGS := $(SONIC_COMMON_FLAGS_LIST) +$(DOCKER_GNMI)_DEP_FILES := $(DEP_FILES) + +$(eval $(call add_dbg_docker,$(DOCKER_GNMI),$(DOCKER_GNMI_DBG))) diff --git a/rules/docker-gnmi.mk b/rules/docker-gnmi.mk new file mode 100644 index 0000000000..f0b47676da --- /dev/null +++ b/rules/docker-gnmi.mk @@ -0,0 +1,37 @@ +# docker image for GNMI agent + +DOCKER_GNMI_STEM = docker-sonic-gnmi +DOCKER_GNMI = $(DOCKER_GNMI_STEM).gz +DOCKER_GNMI_DBG = $(DOCKER_GNMI_STEM)-$(DBG_IMAGE_MARK).gz + +$(DOCKER_GNMI)_PATH = $(DOCKERS_PATH)/$(DOCKER_GNMI_STEM) + +$(DOCKER_GNMI)_DEPENDS += $(SONIC_MGMT_COMMON) +$(DOCKER_GNMI)_DEPENDS += $(SONIC_TELEMETRY) +$(DOCKER_GNMI)_DBG_DEPENDS = $($(DOCKER_CONFIG_ENGINE_BULLSEYE)_DBG_DEPENDS) + +$(DOCKER_GNMI)_LOAD_DOCKERS += $(DOCKER_CONFIG_ENGINE_BULLSEYE) + +$(DOCKER_GNMI)_VERSION = 1.0.0 +$(DOCKER_GNMI)_PACKAGE_NAME = gnmi + +$(DOCKER_GNMI)_DBG_IMAGE_PACKAGES = $($(DOCKER_CONFIG_ENGINE_BULLSEYE)_DBG_IMAGE_PACKAGES) + +SONIC_DOCKER_IMAGES += $(DOCKER_GNMI) +ifeq ($(INCLUDE_SYSTEM_GNMI), y) +SONIC_INSTALL_DOCKER_IMAGES += $(DOCKER_GNMI) +endif + +SONIC_DOCKER_DBG_IMAGES += $(DOCKER_GNMI_DBG) +ifeq ($(INCLUDE_SYSTEM_GNMI), y) +SONIC_INSTALL_DOCKER_DBG_IMAGES += $(DOCKER_GNMI_DBG) +endif + +$(DOCKER_GNMI)_CONTAINER_NAME = gnmi +$(DOCKER_GNMI)_RUN_OPT += -t +$(DOCKER_GNMI)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro +$(DOCKER_GNMI)_RUN_OPT += -v /etc/timezone:/etc/timezone:ro +$(DOCKER_GNMI)_RUN_OPT += -v /var/run/dbus:/var/run/dbus:rw + +$(DOCKER_GNMI)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) +$(DOCKER_GNMI)_BASE_IMAGE_FILES += monit_gnmi:/etc/monit/conf.d diff --git a/rules/docker-telemetry.mk b/rules/docker-telemetry.mk index de36d25481..59743b6cbc 100644 --- a/rules/docker-telemetry.mk +++ b/rules/docker-telemetry.mk @@ -6,16 +6,14 @@ DOCKER_TELEMETRY_DBG = $(DOCKER_TELEMETRY_STEM)-$(DBG_IMAGE_MARK).gz $(DOCKER_TELEMETRY)_PATH = $(DOCKERS_PATH)/$(DOCKER_TELEMETRY_STEM) -$(DOCKER_TELEMETRY)_DEPENDS += $(SONIC_MGMT_COMMON) -$(DOCKER_TELEMETRY)_DEPENDS += $(SONIC_TELEMETRY) -$(DOCKER_TELEMETRY)_DBG_DEPENDS = $($(DOCKER_CONFIG_ENGINE_BULLSEYE)_DBG_DEPENDS) +$(DOCKER_TELEMETRY)_DBG_DEPENDS = $($(DOCKER_GNMI)_DBG_DEPENDS) -$(DOCKER_TELEMETRY)_LOAD_DOCKERS += $(DOCKER_CONFIG_ENGINE_BULLSEYE) +$(DOCKER_TELEMETRY)_LOAD_DOCKERS += $(DOCKER_GNMI) $(DOCKER_TELEMETRY)_VERSION = 1.0.0 $(DOCKER_TELEMETRY)_PACKAGE_NAME = telemetry -$(DOCKER_TELEMETRY)_DBG_IMAGE_PACKAGES = $($(DOCKER_CONFIG_ENGINE_BULLSEYE)_DBG_IMAGE_PACKAGES) +$(DOCKER_TELEMETRY)_DBG_IMAGE_PACKAGES = $($(DOCKER_GNMI)_DBG_IMAGE_PACKAGES) SONIC_DOCKER_IMAGES += $(DOCKER_TELEMETRY) ifeq ($(INCLUDE_SYSTEM_TELEMETRY), y) @@ -30,10 +28,7 @@ endif $(DOCKER_TELEMETRY)_CONTAINER_NAME = telemetry $(DOCKER_TELEMETRY)_RUN_OPT += -t $(DOCKER_TELEMETRY)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro -$(DOCKER_TELEMETRY)_RUN_OPT += -v /etc/timezone:/etc/timezone:ro -ifneq ($(INCLUDE_SYSTEM_GNMI), y) -$(DOCKER_TELEMETRY)_RUN_OPT += -v /var/run/dbus:/var/run/dbus:rw -endif +$(DOCKER_TELEMETRY)_RUN_OPT += -v /etc/timezone:/etc/timezone:ro $(DOCKER_TELEMETRY)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT) $(DOCKER_TELEMETRY)_BASE_IMAGE_FILES += monit_telemetry:/etc/monit/conf.d diff --git a/slave.mk b/slave.mk index 30689d4dc6..bf4f224631 100644 --- a/slave.mk +++ b/slave.mk @@ -157,6 +157,10 @@ ifeq ($(SONIC_INCLUDE_SYSTEM_TELEMETRY),y) INCLUDE_SYSTEM_TELEMETRY = y endif +ifeq ($(SONIC_INCLUDE_SYSTEM_GNMI),y) +INCLUDE_SYSTEM_GNMI = y +endif + ifeq ($(SONIC_INCLUDE_RESTAPI),y) INCLUDE_RESTAPI = y endif @@ -414,6 +418,7 @@ $(info "VS_PREPARE_MEM" : "$(VS_PREPARE_MEM)") $(info "INCLUDE_MGMT_FRAMEWORK" : "$(INCLUDE_MGMT_FRAMEWORK)") $(info "INCLUDE_ICCPD" : "$(INCLUDE_ICCPD)") $(info "INCLUDE_SYSTEM_TELEMETRY" : "$(INCLUDE_SYSTEM_TELEMETRY)") +$(info "INCLUDE_SYSTEM_GNMI" : "$(INCLUDE_SYSTEM_GNMI)") $(info "ENABLE_HOST_SERVICE_ON_START" : "$(ENABLE_HOST_SERVICE_ON_START)") $(info "INCLUDE_RESTAPI" : "$(INCLUDE_RESTAPI)") $(info "INCLUDE_SFLOW" : "$(INCLUDE_SFLOW)") @@ -1376,6 +1381,7 @@ $(addprefix $(TARGET_PATH)/, $(SONIC_INSTALLERS)) : $(TARGET_PATH)/% : \ export sonic_su_mode="$(SECURE_UPGRADE_MODE)" export sonic_su_prod_signing_tool="/sonic/scripts/$(shell basename -- $(SECURE_UPGRADE_PROD_SIGNING_TOOL))" export include_system_telemetry="$(INCLUDE_SYSTEM_TELEMETRY)" + export include_system_gnmi="$(INCLUDE_SYSTEM_GNMI)" export include_restapi="$(INCLUDE_RESTAPI)" export include_nat="$(INCLUDE_NAT)" export include_p4rt="$(INCLUDE_P4RT)" diff --git a/src/system-health/health_checker/service_checker.py b/src/system-health/health_checker/service_checker.py index 76b024388b..a310fbba1a 100644 --- a/src/system-health/health_checker/service_checker.py +++ b/src/system-health/health_checker/service_checker.py @@ -87,10 +87,10 @@ class ServiceChecker(HealthChecker): for feature_name, feature_entry in feature_table.items(): if feature_entry["state"] not in ["disabled", "always_disabled"]: if multi_asic.is_multi_asic(): - if feature_entry["has_global_scope"] == "True": + if feature_entry.get("has_global_scope", "True") == "True": expected_running_containers.add(feature_name) container_feature_dict[feature_name] = feature_name - if feature_entry["has_per_asic_scope"] == "True": + if feature_entry.get("has_per_asic_scope", "False") == "True": num_asics = multi_asic.get_num_asics() for asic_id in range(num_asics): if asic_id in asics_id_presence or feature_name in run_all_instance_list: