[Auto Techsupport] Event driven Techsupport Changes (#8670)
#### Why I did it Changes required for feature "Event Driven TechSupport Invocation & CoreDump Mgmt". [HLD](https://github.com/Azure/SONiC/pull/818 ) Requires: https://github.com/Azure/sonic-utilities/pull/1796. Merging in any order would be fine. Summary of the changes: - Added the YANG Models for the new tables introduces as a part of this feature. - Enhanced init_cfg.json with the default config required - Added a compile Time flag which enables/disables the config required for this feature inside the init_cfg.json - Enhanced the supervisor-proc-exit-listener script to populate `<feature>:<critical_proc> = <comm>:<pid>` info in the STATE_DB when it observes an proc exit notification for the critical processes running inside the docker.
This commit is contained in:
parent
1c4ca07cf5
commit
ff32ac3ed4
@ -40,6 +40,9 @@
|
||||
# * SONIC_DPKG_CACHE_SOURCE: Debian package cache location when cache enabled for debian packages
|
||||
# * BUILD_LOG_TIMESTAMP: Set timestamp in the build log (simple/none)
|
||||
# * DOCKER_EXTRA_OPTS: Extra command line arguments for dockerd running in slave container.
|
||||
# * ENABLE_AUTO_TECH_SUPPORT: Enable the configuration for event-driven techsupport & coredump mgmt feature
|
||||
# * Default: y
|
||||
# * Values: y,n
|
||||
#
|
||||
###############################################################################
|
||||
|
||||
@ -282,6 +285,7 @@ SONIC_BUILD_INSTRUCTION := make \
|
||||
SONIC_ENABLE_IMAGE_SIGNATURE=$(ENABLE_IMAGE_SIGNATURE) \
|
||||
ENABLE_HOST_SERVICE_ON_START=$(ENABLE_HOST_SERVICE_ON_START) \
|
||||
SLAVE_DIR=$(SLAVE_DIR) \
|
||||
ENABLE_AUTO_TECH_SUPPORT=$(ENABLE_AUTO_TECH_SUPPORT) \
|
||||
BUILD_MULTIASIC_KVM=$(BUILD_MULTIASIC_KVM) \
|
||||
$(SONIC_OVERRIDE_BUILD_VARS)
|
||||
|
||||
|
@ -61,5 +61,26 @@
|
||||
"high_mem_alert": "disabled"
|
||||
}{% if not loop.last %},{% endif -%}
|
||||
{% endfor %}
|
||||
},
|
||||
"AUTO_TECHSUPPORT": {
|
||||
"GLOBAL": {
|
||||
{%- if enable_auto_tech_support == "y" %}
|
||||
"state" : "enabled", {% else %}
|
||||
"state" : "disabled", {% endif %}
|
||||
"rate_limit_interval" : "180",
|
||||
"max_techsupport_limit" : "10.0",
|
||||
"max_core_limit" : "5.0",
|
||||
"since" : "2 days ago"
|
||||
}
|
||||
},
|
||||
"AUTO_TECHSUPPORT_FEATURE": {
|
||||
{%- for feature, _, _, _ in features %}
|
||||
"{{feature}}": {
|
||||
{%- if enable_auto_tech_support == "y" %}
|
||||
"state" : "enabled", {% else %}
|
||||
"state" : "disabled", {% endif %}
|
||||
"rate_limit_interval" : "600"
|
||||
}{%if not loop.last %},{% endif -%}
|
||||
{% endfor %}
|
||||
}
|
||||
}
|
||||
|
@ -144,6 +144,9 @@ INCLUDE_NAT = y
|
||||
# INCLUDE_DHCP_RELAY - build and install dhcp-relay package
|
||||
INCLUDE_DHCP_RELAY = y
|
||||
|
||||
# ENABLE_AUTO_TECH_SUPPORT - Enable the configuration for event-driven techsupport & coredump mgmt feature
|
||||
ENABLE_AUTO_TECH_SUPPORT = y
|
||||
|
||||
# TELEMETRY_WRITABLE - Enable write/config operations via the gNMI interface.
|
||||
# Uncomment to enable:
|
||||
# TELEMETRY_WRITABLE = y
|
||||
|
6
slave.mk
6
slave.mk
@ -163,6 +163,10 @@ ifeq ($(SONIC_INCLUDE_MACSEC),y)
|
||||
INCLUDE_MACSEC = y
|
||||
endif
|
||||
|
||||
ifeq ($(ENABLE_AUTO_TECH_SUPPORT),y)
|
||||
ENABLE_AUTO_TECH_SUPPORT = y
|
||||
endif
|
||||
|
||||
ifeq ($(SONIC_INCLUDE_MUX),y)
|
||||
INCLUDE_MUX = y
|
||||
endif
|
||||
@ -290,6 +294,7 @@ $(info "INCLUDE_KUBERNETES" : "$(INCLUDE_KUBERNETES)")
|
||||
$(info "INCLUDE_MACSEC" : "$(INCLUDE_MACSEC)")
|
||||
$(info "INCLUDE_MUX" : "$(INCLUDE_MUX)")
|
||||
$(info "TELEMETRY_WRITABLE" : "$(TELEMETRY_WRITABLE)")
|
||||
$(info "ENABLE_AUTO_TECH_SUPPORT" : "$(ENABLE_AUTO_TECH_SUPPORT)")
|
||||
$(info "PDDF_SUPPORT" : "$(PDDF_SUPPORT)")
|
||||
$(info "MULTIARCH_QEMU_ENVIRON" : "$(MULTIARCH_QEMU_ENVIRON)")
|
||||
$(info "SONIC_VERSION_CONTROL_COMPONENTS": "$(SONIC_VERSION_CONTROL_COMPONENTS)")
|
||||
@ -987,6 +992,7 @@ $(addprefix $(TARGET_PATH)/, $(SONIC_INSTALLERS)) : $(TARGET_PATH)/% : \
|
||||
export include_restapi="$(INCLUDE_RESTAPI)"
|
||||
export include_nat="$(INCLUDE_NAT)"
|
||||
export include_sflow="$(INCLUDE_SFLOW)"
|
||||
export enable_auto_tech_support="$(ENABLE_AUTO_TECH_SUPPORT)"
|
||||
export include_macsec="$(INCLUDE_MACSEC)"
|
||||
export include_mgmt_framework="$(INCLUDE_MGMT_FRAMEWORK)"
|
||||
export include_iccpd="$(INCLUDE_ICCPD)"
|
||||
|
@ -78,6 +78,7 @@ setup(
|
||||
cmdclass={'build_py': my_build_py},
|
||||
data_files=[
|
||||
('yang-models', ['./yang-models/sonic-acl.yang',
|
||||
'./yang-models/sonic-auto_techsupport.yang',
|
||||
'./yang-models/sonic-bgp-common.yang',
|
||||
'./yang-models/sonic-bgp-global.yang',
|
||||
'./yang-models/sonic-bgp-neighbor.yang',
|
||||
|
@ -1152,6 +1152,33 @@
|
||||
"trap_group": "queue1_group1"
|
||||
}
|
||||
},
|
||||
"AUTO_TECHSUPPORT": {
|
||||
"GLOBAL": {
|
||||
"state" : "enabled",
|
||||
"rate_limit_interval" : "180",
|
||||
"max_techsupport_limit" : "10.0",
|
||||
"max_core_limit" : "0.0",
|
||||
"since" : "2 days ago"
|
||||
}
|
||||
},
|
||||
"AUTO_TECHSUPPORT_FEATURE": {
|
||||
"bgp" :{
|
||||
"state" : "disabled",
|
||||
"rate_limit_interval" : "600"
|
||||
},
|
||||
"swss" : {
|
||||
"state" : "enabled",
|
||||
"rate_limit_interval" : "600"
|
||||
},
|
||||
"snmp" : {
|
||||
"state" : "enabled",
|
||||
"rate_limit_interval" : "500"
|
||||
},
|
||||
"dhcp_relay" : {
|
||||
"state" : "disabled",
|
||||
"rate_limit_interval" : "1000"
|
||||
}
|
||||
},
|
||||
"LLDP": {
|
||||
"GLOBAL": {
|
||||
"mode": "TRANSMIT",
|
||||
@ -1371,7 +1398,6 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
"SAMPLE_CONFIG_DB_UNKNOWN": {
|
||||
"UNKNOWN_TABLE": {
|
||||
"Error": "This Table is for testing, This Table does not have YANG models."
|
||||
|
@ -0,0 +1,27 @@
|
||||
{
|
||||
"AUTO_TECHSUPPORT_TEST":{
|
||||
"desc": "Configure auto techsupport params in AUTO_TECHSUPPORT Table"
|
||||
},
|
||||
"AUTO_TECHSUPPORT_WRONG_STATE_VALUE": {
|
||||
"desc": "Configure state key with invalid value",
|
||||
"eStrKey": "InvalidValue"
|
||||
},
|
||||
"AUTO_TECHSUPPORT_INVALID_RATE_LIMIT_FORMAT": {
|
||||
"desc" : "Configure cooloff with a value of invalid format",
|
||||
"eStrKey": "InvalidValue"
|
||||
},
|
||||
"AUTO_TECHSUPPORT_OUT_OF_RANGE_DECIMAL": {
|
||||
"desc" : "Configure a value for core-uage outside the range [0, 100)",
|
||||
"eStr": "Value \"100.00\" does not satisfy the constraint \"0..99.99\" (range, length, or pattern)."
|
||||
},
|
||||
"AUTO_TECHSUPPORT_VALID_DECIMAL_VALUE": {
|
||||
"desc" : "Configure a value for max_techsupport_size inside the range [0, 100)"
|
||||
},
|
||||
"AUTO_TECHSUPPORT_INVALID_FRACTION_DIGITS": {
|
||||
"desc" : "Configure a value for max_techsupport_size inside the range [0, 100) but with 3 fractional digits",
|
||||
"eStrKey": "InvalidValue"
|
||||
},
|
||||
"AUTO_TECHSUPPORT_RATE_LIMIT_INTERVAL_TEST": {
|
||||
"desc" : "Configure and test the valid configuration"
|
||||
}
|
||||
}
|
@ -0,0 +1,85 @@
|
||||
{
|
||||
"AUTO_TECHSUPPORT_TEST": {
|
||||
"sonic-auto_techsupport:sonic-auto_techsupport": {
|
||||
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
|
||||
"sonic-auto_techsupport:GLOBAL": {
|
||||
"state" : "enabled",
|
||||
"rate_limit_interval" : "180",
|
||||
"max_techsupport_limit" : "10.0",
|
||||
"max_core_limit" : "5.0",
|
||||
"since" : "2 days ago"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"AUTO_TECHSUPPORT_WRONG_STATE_VALUE": {
|
||||
"sonic-auto_techsupport:sonic-auto_techsupport": {
|
||||
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
|
||||
"sonic-auto_techsupport:GLOBAL": {
|
||||
"state" : "start",
|
||||
"rate_limit_interval" : "180",
|
||||
"max_techsupport_limit" : "10.0",
|
||||
"max_core_limit" : "5.0",
|
||||
"since" : "2 days ago"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"AUTO_TECHSUPPORT_INVALID_RATE_LIMIT_FORMAT": {
|
||||
"sonic-auto_techsupport:sonic-auto_techsupport": {
|
||||
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
|
||||
"sonic-auto_techsupport:GLOBAL": {
|
||||
"rate_limit_interval" : "whatever"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"AUTO_TECHSUPPORT_OUT_OF_RANGE_DECIMAL": {
|
||||
"sonic-auto_techsupport:sonic-auto_techsupport": {
|
||||
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
|
||||
"sonic-auto_techsupport:GLOBAL": {
|
||||
"max_core_limit" : "100.00",
|
||||
"rate_limit_interval" : "180"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"AUTO_TECHSUPPORT_VALID_DECIMAL_VALUE": {
|
||||
"sonic-auto_techsupport:sonic-auto_techsupport": {
|
||||
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
|
||||
"sonic-auto_techsupport:GLOBAL": {
|
||||
"max_techsupport_limit" : "11.23",
|
||||
"max_core_limit" : "99.99"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"AUTO_TECHSUPPORT_INVALID_FRACTION_DIGITS": {
|
||||
"sonic-auto_techsupport:sonic-auto_techsupport": {
|
||||
"sonic-auto_techsupport:AUTO_TECHSUPPORT": {
|
||||
"sonic-auto_techsupport:GLOBAL": {
|
||||
"max_techsupport_limit" : "11.111",
|
||||
"max_core_limit" : "99.99"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"AUTO_TECHSUPPORT_RATE_LIMIT_INTERVAL_TEST": {
|
||||
"sonic-auto_techsupport:sonic-auto_techsupport": {
|
||||
"sonic-auto_techsupport:AUTO_TECHSUPPORT_FEATURE": {
|
||||
"AUTO_TECHSUPPORT_FEATURE_LIST": [
|
||||
{
|
||||
"feature_name" : "bgp",
|
||||
"state" : "enabled",
|
||||
"rate_limit_interval" : "600"
|
||||
},
|
||||
{
|
||||
"feature_name" : "swss",
|
||||
"state" : "disabled",
|
||||
"rate_limit_interval" : "400"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
111
src/sonic-yang-models/yang-models/sonic-auto_techsupport.yang
Normal file
111
src/sonic-yang-models/yang-models/sonic-auto_techsupport.yang
Normal file
@ -0,0 +1,111 @@
|
||||
module sonic-auto_techsupport {
|
||||
|
||||
yang-version 1.1;
|
||||
|
||||
namespace "http://github.com/Azure/sonic-auto_techsupport";
|
||||
prefix auto_techsupport;
|
||||
|
||||
import sonic-types {
|
||||
prefix stypes;
|
||||
}
|
||||
|
||||
description "Event Driven Techsupport & CoreDump Mgmt Capability in SONiC OS";
|
||||
|
||||
revision 2021-08-09 {
|
||||
description "First Revision";
|
||||
}
|
||||
|
||||
typedef decimal-repr {
|
||||
type decimal64 {
|
||||
fraction-digits 2;
|
||||
range 0.0..99.99;
|
||||
}
|
||||
}
|
||||
|
||||
container sonic-auto_techsupport {
|
||||
|
||||
container AUTO_TECHSUPPORT {
|
||||
|
||||
description "AUTO_TECHSUPPORT part of config_db.json";
|
||||
|
||||
container GLOBAL {
|
||||
|
||||
leaf state {
|
||||
description "Knob to make techsupport invocation event-driven based on core-dump generation";
|
||||
type stypes:admin_mode;
|
||||
}
|
||||
|
||||
leaf rate_limit_interval {
|
||||
description "Minimum time in seconds between two successive techsupport invocations. Configure 0 to explicitly disable";
|
||||
type uint16;
|
||||
}
|
||||
|
||||
leaf max_techsupport_limit {
|
||||
/*
|
||||
A value between 0.0-99.99 should be specified.
|
||||
The actual value in bytes is calculate based on the available space in the filesystem hosting /var/dump
|
||||
When the limit is crossed, the older dump files are incrementally deleted
|
||||
*/
|
||||
description "Max Limit in percentage for the cummulative size of ts dumps. No cleanup is performed if the value isn't configured or is 0.0";
|
||||
type decimal-repr;
|
||||
}
|
||||
|
||||
leaf max_core_limit {
|
||||
/*
|
||||
A value between 0.0-99.99 should be specified.
|
||||
The actual value in bytes is calculated based on the available space in the filesystem hosting /var/core
|
||||
When the limit is crossed, the older core files are incrementally deleted
|
||||
*/
|
||||
description "Max Limit in percentage for the cummulative size of core dumps. No cleanup is performed if the value isn't congiured or is 0.0";
|
||||
type decimal-repr;
|
||||
}
|
||||
|
||||
leaf since {
|
||||
/*
|
||||
Any valid date string of the formats specified here (https://www.gnu.org/software/coreutils/manual/html_node/Date-input-formats.html)
|
||||
can be used.
|
||||
*/
|
||||
description "Only collect the logs & core-dumps generated since the time provided. A default value of '2 days ago' is used if this value is not set explicitly or a non-valid string is provided";
|
||||
type string {
|
||||
length 1..255;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* end of container GLOBAL */
|
||||
}
|
||||
/* end of container AUTO_TECHSUPPORT */
|
||||
|
||||
container AUTO_TECHSUPPORT_FEATURE {
|
||||
|
||||
description "AUTO_TECHSUPPORT_FEATURE part of config_db.json";
|
||||
|
||||
list AUTO_TECHSUPPORT_FEATURE_LIST {
|
||||
|
||||
key "feature_name";
|
||||
|
||||
leaf feature_name {
|
||||
description "The name of this feature";
|
||||
/* TODO: Leafref once the FEATURE YANG is added*/
|
||||
type string {
|
||||
length 1..255;
|
||||
}
|
||||
}
|
||||
|
||||
leaf state {
|
||||
description "Enable auto techsupport invocation on the processes running inside this feature";
|
||||
type stypes:admin_mode;
|
||||
}
|
||||
|
||||
leaf rate_limit_interval {
|
||||
description "Rate limit interval for the corresponding feature. Configure 0 to explicitly disable";
|
||||
type uint16;
|
||||
}
|
||||
|
||||
}
|
||||
/* end of AUTO_TECHSUPPORT_FEATURE_LIST */
|
||||
}
|
||||
/* end of container AUTO_TECHSUPPORT_FEATURE */
|
||||
}
|
||||
/* end of top level container */
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user