[Auto Techsupport] Event driven Techsupport Changes (#8670)

#### Why I did it

Changes required for feature "Event Driven TechSupport Invocation & CoreDump Mgmt". [HLD](https://github.com/Azure/SONiC/pull/818 )

Requires: https://github.com/Azure/sonic-utilities/pull/1796.
Merging in any order would be fine.

Summary of the changes:

- Added the YANG Models for the new tables introduces as a part of this feature.
- Enhanced init_cfg.json with the default config required
- Added a compile Time flag which enables/disables the config required for this feature inside the init_cfg.json
- Enhanced the supervisor-proc-exit-listener script to populate `<feature>:<critical_proc> = <comm>:<pid>` info in the STATE_DB when it observes an proc exit notification for the critical processes running inside the docker.
This commit is contained in:
Vivek Reddy 2021-11-15 21:56:37 -08:00 committed by GitHub
parent 1c4ca07cf5
commit ff32ac3ed4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 285 additions and 1 deletions

View File

@ -40,6 +40,9 @@
# * SONIC_DPKG_CACHE_SOURCE: Debian package cache location when cache enabled for debian packages
# * BUILD_LOG_TIMESTAMP: Set timestamp in the build log (simple/none)
# * DOCKER_EXTRA_OPTS: Extra command line arguments for dockerd running in slave container.
# * ENABLE_AUTO_TECH_SUPPORT: Enable the configuration for event-driven techsupport & coredump mgmt feature
# * Default: y
# * Values: y,n
#
###############################################################################
@ -282,6 +285,7 @@ SONIC_BUILD_INSTRUCTION := make \
SONIC_ENABLE_IMAGE_SIGNATURE=$(ENABLE_IMAGE_SIGNATURE) \
ENABLE_HOST_SERVICE_ON_START=$(ENABLE_HOST_SERVICE_ON_START) \
SLAVE_DIR=$(SLAVE_DIR) \
ENABLE_AUTO_TECH_SUPPORT=$(ENABLE_AUTO_TECH_SUPPORT) \
BUILD_MULTIASIC_KVM=$(BUILD_MULTIASIC_KVM) \
$(SONIC_OVERRIDE_BUILD_VARS)

View File

@ -61,5 +61,26 @@
"high_mem_alert": "disabled"
}{% if not loop.last %},{% endif -%}
{% endfor %}
},
"AUTO_TECHSUPPORT": {
"GLOBAL": {
{%- if enable_auto_tech_support == "y" %}
"state" : "enabled", {% else %}
"state" : "disabled", {% endif %}
"rate_limit_interval" : "180",
"max_techsupport_limit" : "10.0",
"max_core_limit" : "5.0",
"since" : "2 days ago"
}
},
"AUTO_TECHSUPPORT_FEATURE": {
{%- for feature, _, _, _ in features %}
"{{feature}}": {
{%- if enable_auto_tech_support == "y" %}
"state" : "enabled", {% else %}
"state" : "disabled", {% endif %}
"rate_limit_interval" : "600"
}{%if not loop.last %},{% endif -%}
{% endfor %}
}
}

View File

@ -144,6 +144,9 @@ INCLUDE_NAT = y
# INCLUDE_DHCP_RELAY - build and install dhcp-relay package
INCLUDE_DHCP_RELAY = y
# ENABLE_AUTO_TECH_SUPPORT - Enable the configuration for event-driven techsupport & coredump mgmt feature
ENABLE_AUTO_TECH_SUPPORT = y
# TELEMETRY_WRITABLE - Enable write/config operations via the gNMI interface.
# Uncomment to enable:
# TELEMETRY_WRITABLE = y

View File

@ -163,6 +163,10 @@ ifeq ($(SONIC_INCLUDE_MACSEC),y)
INCLUDE_MACSEC = y
endif
ifeq ($(ENABLE_AUTO_TECH_SUPPORT),y)
ENABLE_AUTO_TECH_SUPPORT = y
endif
ifeq ($(SONIC_INCLUDE_MUX),y)
INCLUDE_MUX = y
endif
@ -290,6 +294,7 @@ $(info "INCLUDE_KUBERNETES" : "$(INCLUDE_KUBERNETES)")
$(info "INCLUDE_MACSEC" : "$(INCLUDE_MACSEC)")
$(info "INCLUDE_MUX" : "$(INCLUDE_MUX)")
$(info "TELEMETRY_WRITABLE" : "$(TELEMETRY_WRITABLE)")
$(info "ENABLE_AUTO_TECH_SUPPORT" : "$(ENABLE_AUTO_TECH_SUPPORT)")
$(info "PDDF_SUPPORT" : "$(PDDF_SUPPORT)")
$(info "MULTIARCH_QEMU_ENVIRON" : "$(MULTIARCH_QEMU_ENVIRON)")
$(info "SONIC_VERSION_CONTROL_COMPONENTS": "$(SONIC_VERSION_CONTROL_COMPONENTS)")
@ -987,6 +992,7 @@ $(addprefix $(TARGET_PATH)/, $(SONIC_INSTALLERS)) : $(TARGET_PATH)/% : \
export include_restapi="$(INCLUDE_RESTAPI)"
export include_nat="$(INCLUDE_NAT)"
export include_sflow="$(INCLUDE_SFLOW)"
export enable_auto_tech_support="$(ENABLE_AUTO_TECH_SUPPORT)"
export include_macsec="$(INCLUDE_MACSEC)"
export include_mgmt_framework="$(INCLUDE_MGMT_FRAMEWORK)"
export include_iccpd="$(INCLUDE_ICCPD)"

View File

@ -78,6 +78,7 @@ setup(
cmdclass={'build_py': my_build_py},
data_files=[
('yang-models', ['./yang-models/sonic-acl.yang',
'./yang-models/sonic-auto_techsupport.yang',
'./yang-models/sonic-bgp-common.yang',
'./yang-models/sonic-bgp-global.yang',
'./yang-models/sonic-bgp-neighbor.yang',

View File

@ -1152,6 +1152,33 @@
"trap_group": "queue1_group1"
}
},
"AUTO_TECHSUPPORT": {
"GLOBAL": {
"state" : "enabled",
"rate_limit_interval" : "180",
"max_techsupport_limit" : "10.0",
"max_core_limit" : "0.0",
"since" : "2 days ago"
}
},
"AUTO_TECHSUPPORT_FEATURE": {
"bgp" :{
"state" : "disabled",
"rate_limit_interval" : "600"
},
"swss" : {
"state" : "enabled",
"rate_limit_interval" : "600"
},
"snmp" : {
"state" : "enabled",
"rate_limit_interval" : "500"
},
"dhcp_relay" : {
"state" : "disabled",
"rate_limit_interval" : "1000"
}
},
"LLDP": {
"GLOBAL": {
"mode": "TRANSMIT",
@ -1371,7 +1398,6 @@
}
}
},
"SAMPLE_CONFIG_DB_UNKNOWN": {
"UNKNOWN_TABLE": {
"Error": "This Table is for testing, This Table does not have YANG models."

View File

@ -0,0 +1,27 @@
{
"AUTO_TECHSUPPORT_TEST":{
"desc": "Configure auto techsupport params in AUTO_TECHSUPPORT Table"
},
"AUTO_TECHSUPPORT_WRONG_STATE_VALUE": {
"desc": "Configure state key with invalid value",
"eStrKey": "InvalidValue"
},
"AUTO_TECHSUPPORT_INVALID_RATE_LIMIT_FORMAT": {
"desc" : "Configure cooloff with a value of invalid format",
"eStrKey": "InvalidValue"
},
"AUTO_TECHSUPPORT_OUT_OF_RANGE_DECIMAL": {
"desc" : "Configure a value for core-uage outside the range [0, 100)",
"eStr": "Value \"100.00\" does not satisfy the constraint \"0..99.99\" (range, length, or pattern)."
},
"AUTO_TECHSUPPORT_VALID_DECIMAL_VALUE": {
"desc" : "Configure a value for max_techsupport_size inside the range [0, 100)"
},
"AUTO_TECHSUPPORT_INVALID_FRACTION_DIGITS": {
"desc" : "Configure a value for max_techsupport_size inside the range [0, 100) but with 3 fractional digits",
"eStrKey": "InvalidValue"
},
"AUTO_TECHSUPPORT_RATE_LIMIT_INTERVAL_TEST": {
"desc" : "Configure and test the valid configuration"
}
}

View File

@ -0,0 +1,85 @@
{
"AUTO_TECHSUPPORT_TEST": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"sonic-auto_techsupport:GLOBAL": {
"state" : "enabled",
"rate_limit_interval" : "180",
"max_techsupport_limit" : "10.0",
"max_core_limit" : "5.0",
"since" : "2 days ago"
}
}
}
},
"AUTO_TECHSUPPORT_WRONG_STATE_VALUE": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"sonic-auto_techsupport:GLOBAL": {
"state" : "start",
"rate_limit_interval" : "180",
"max_techsupport_limit" : "10.0",
"max_core_limit" : "5.0",
"since" : "2 days ago"
}
}
}
},
"AUTO_TECHSUPPORT_INVALID_RATE_LIMIT_FORMAT": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"sonic-auto_techsupport:GLOBAL": {
"rate_limit_interval" : "whatever"
}
}
}
},
"AUTO_TECHSUPPORT_OUT_OF_RANGE_DECIMAL": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"sonic-auto_techsupport:GLOBAL": {
"max_core_limit" : "100.00",
"rate_limit_interval" : "180"
}
}
}
},
"AUTO_TECHSUPPORT_VALID_DECIMAL_VALUE": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"sonic-auto_techsupport:GLOBAL": {
"max_techsupport_limit" : "11.23",
"max_core_limit" : "99.99"
}
}
}
},
"AUTO_TECHSUPPORT_INVALID_FRACTION_DIGITS": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
"sonic-auto_techsupport:GLOBAL": {
"max_techsupport_limit" : "11.111",
"max_core_limit" : "99.99"
}
}
}
},
"AUTO_TECHSUPPORT_RATE_LIMIT_INTERVAL_TEST": {
"sonic-auto_techsupport:sonic-auto_techsupport": {
"sonic-auto_techsupport:AUTO_TECHSUPPORT_FEATURE": {
"AUTO_TECHSUPPORT_FEATURE_LIST": [
{
"feature_name" : "bgp",
"state" : "enabled",
"rate_limit_interval" : "600"
},
{
"feature_name" : "swss",
"state" : "disabled",
"rate_limit_interval" : "400"
}
]
}
}
}
}

View File

@ -0,0 +1,111 @@
module sonic-auto_techsupport {
yang-version 1.1;
namespace "http://github.com/Azure/sonic-auto_techsupport";
prefix auto_techsupport;
import sonic-types {
prefix stypes;
}
description "Event Driven Techsupport & CoreDump Mgmt Capability in SONiC OS";
revision 2021-08-09 {
description "First Revision";
}
typedef decimal-repr {
type decimal64 {
fraction-digits 2;
range 0.0..99.99;
}
}
container sonic-auto_techsupport {
container AUTO_TECHSUPPORT {
description "AUTO_TECHSUPPORT part of config_db.json";
container GLOBAL {
leaf state {
description "Knob to make techsupport invocation event-driven based on core-dump generation";
type stypes:admin_mode;
}
leaf rate_limit_interval {
description "Minimum time in seconds between two successive techsupport invocations. Configure 0 to explicitly disable";
type uint16;
}
leaf max_techsupport_limit {
/*
A value between 0.0-99.99 should be specified.
The actual value in bytes is calculate based on the available space in the filesystem hosting /var/dump
When the limit is crossed, the older dump files are incrementally deleted
*/
description "Max Limit in percentage for the cummulative size of ts dumps. No cleanup is performed if the value isn't configured or is 0.0";
type decimal-repr;
}
leaf max_core_limit {
/*
A value between 0.0-99.99 should be specified.
The actual value in bytes is calculated based on the available space in the filesystem hosting /var/core
When the limit is crossed, the older core files are incrementally deleted
*/
description "Max Limit in percentage for the cummulative size of core dumps. No cleanup is performed if the value isn't congiured or is 0.0";
type decimal-repr;
}
leaf since {
/*
Any valid date string of the formats specified here (https://www.gnu.org/software/coreutils/manual/html_node/Date-input-formats.html)
can be used.
*/
description "Only collect the logs & core-dumps generated since the time provided. A default value of '2 days ago' is used if this value is not set explicitly or a non-valid string is provided";
type string {
length 1..255;
}
}
}
/* end of container GLOBAL */
}
/* end of container AUTO_TECHSUPPORT */
container AUTO_TECHSUPPORT_FEATURE {
description "AUTO_TECHSUPPORT_FEATURE part of config_db.json";
list AUTO_TECHSUPPORT_FEATURE_LIST {
key "feature_name";
leaf feature_name {
description "The name of this feature";
/* TODO: Leafref once the FEATURE YANG is added*/
type string {
length 1..255;
}
}
leaf state {
description "Enable auto techsupport invocation on the processes running inside this feature";
type stypes:admin_mode;
}
leaf rate_limit_interval {
description "Rate limit interval for the corresponding feature. Configure 0 to explicitly disable";
type uint16;
}
}
/* end of AUTO_TECHSUPPORT_FEATURE_LIST */
}
/* end of container AUTO_TECHSUPPORT_FEATURE */
}
/* end of top level container */
}