Add events to host and create rsyslog_plugin deb pkg (#12059)

Why I did it

Create rsyslog plugin deb for other containers/host to install
Add events for bgp and host events
This commit is contained in:
Zain Budhwani 2022-09-21 09:20:53 -07:00 committed by GitHub
parent 8211c850f1
commit fd6a1b0ce2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 343 additions and 35 deletions

View File

@ -41,7 +41,7 @@ dependent_startup_wait_for=rsyslogd:running
[program:eventd]
command=/usr/sbin/eventd
command=/usr/bin/eventd
priority=3
autostart=false
autorestart=false

View File

@ -56,14 +56,14 @@ COPY ["TS", "/usr/bin/TS"]
COPY ["files/supervisor-proc-exit-listener", "/usr/bin"]
COPY ["zsocket.sh", "/usr/bin/"]
COPY ["*.json", "/etc/rsyslog.d/"]
# COPY ["files/rsyslog_plugin.conf.j2", "/etc/rsyslog.d/"]
COPY ["files/rsyslog_plugin.conf.j2", "/etc/rsyslog.d/"]
RUN chmod a+x /usr/bin/TSA && \
chmod a+x /usr/bin/TSB && \
chmod a+x /usr/bin/TSC && \
chmod a+x /usr/bin/zsocket.sh
# RUN j2 -f json /etc/rsyslog.d/rsyslog_plugin.conf.j2 /etc/rsyslog.d/events_info.json > /etc/rsyslog.d/bgp_events.conf
# RUN rm -f /etc/rsyslog.d/rsyslog_plugin.conf.j2*
RUN rm -f /etc/rsyslog.d/events_info.json*
RUN j2 -f json /etc/rsyslog.d/rsyslog_plugin.conf.j2 /etc/rsyslog.d/events_info.json > /etc/rsyslog.d/bgp_events.conf
RUN rm -f /etc/rsyslog.d/rsyslog_plugin.conf.j2
RUN rm -f /etc/rsyslog.d/events_info.json
ENTRYPOINT ["/usr/bin/docker_init.sh"]

View File

@ -0,0 +1,17 @@
{
"yang_module": "sonic-events-host",
"proclist": [
{
"name": "monit",
"parse_json": "monit_regex.json"
},
{
"name": "sshd",
"parse_json": "sshd_regex.json"
},
{
"name": "systemd",
"parse_json": "systemd_regex.json"
}
]
}

View File

@ -39,7 +39,7 @@
("pmon", "enabled", false, "enabled"),
("radv", "enabled", false, "enabled"),
("snmp", "enabled", true, "enabled"),
("eventd", "enabled", true, "enabled"),
("eventd", "enabled", false, "enabled"),
("swss", "enabled", false, "enabled"),
("syncd", "enabled", false, "enabled"),
("teamd", "enabled", false, "enabled")] %}

View File

@ -0,0 +1,17 @@
[
{
"tag": "disk-usage",
"regex": ".([a-zA-Z0-9-_]*). space usage (\\d+\\.\\d+)% matches resource limit .space usage.(\\d+\\.\\d+)%.",
"params": [ "fs", "usage", "limit" ]
},
{
"tag": "memory-usage",
"regex": "mem usage of (\\d+\\.\\d+)% matches resource limit .mem usage>(\\d+\\.\\d+)%.",
"params": [ "usage", "limit" ]
},
{
"tag": "cpu-usage",
"regex": "cpu user usage of (\\d+\\.\\d+)% matches resource limit .cpu user usage>(\\d+\\.\\d+)%.",
"params": [ "usage", "limit" ]
}
]

View File

@ -12,7 +12,7 @@ $ModLoad omprog
{% for proc in proclist %}
if re_match($programname, "{{ proc.name }}") then {
action(type="omprog"
binary="/usr/share/sonic/scripts/rsyslog_plugin -r /etc/rsyslog.d/{{ proc.parse_json }} -m {{ yang_module }}"
binary="/usr/bin/rsyslog_plugin -r /etc/rsyslog.d/{{ proc.parse_json }} -m {{ yang_module }}"
output="/var/log/rsyslog_plugin.log"
template="prog_msg")
}

View File

@ -319,6 +319,16 @@ sudo dpkg --root=$FILESYSTEM_ROOT -i {{deb}} || sudo LANG=C DEBIAN_FRONTEND=noni
sudo dpkg --root=$FILESYSTEM_ROOT -i $debs_path/sonic-db-cli_*.deb || \
sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT apt-get -y install -f
# Install sonic-rsyslog-plugin
sudo dpkg --root=$FILESYSTEM_ROOT -i $debs_path/sonic-rsyslog-plugin_*.deb || \
sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT apt-get -y install -f
# Generate host conf for rsyslog_plugin
j2 -f json $BUILD_TEMPLATES/rsyslog_plugin.conf.j2 $BUILD_TEMPLATES/events_info.json | sudo tee $FILESYSTEM_ROOT_ETC/rsyslog.d/host_events.conf
sudo cp $BUILD_TEMPLATES/monit_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/
sudo cp $BUILD_TEMPLATES/sshd_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/
sudo cp $BUILD_TEMPLATES/systemd_regex.json $FILESYSTEM_ROOT_ETC/rsyslog.d/
# Install custom-built monit package and SONiC configuration files
sudo dpkg --root=$FILESYSTEM_ROOT -i $debs_path/monit_*.deb || \
sudo LANG=C DEBIAN_FRONTEND=noninteractive chroot $FILESYSTEM_ROOT apt-get -y install -f
@ -799,10 +809,6 @@ sudo bash -c "echo { > $FILESYSTEM_ROOT_USR_SHARE_SONIC_TEMPLATES/ctr_image_name
{% endfor %}
sudo bash -c "echo } >> $FILESYSTEM_ROOT_USR_SHARE_SONIC_TEMPLATES/ctr_image_names.json"
# copy rsyslog plugin binary for use by all dockers that use plugin to publish events.
# sudo mkdir -p ${FILESYSTEM_ROOT_USR_SHARE_SONIC_SCRIPTS}
# sudo cp ${files_path}/rsyslog_plugin ${FILESYSTEM_ROOT_USR_SHARE_SONIC_SCRIPTS}/
{% for script in installer_start_scripts.split(' ') -%}
if [ -f $TARGET_MACHINE"_{{script}}" ]; then
sudo cp $TARGET_MACHINE"_{{script}}" $FILESYSTEM_ROOT/usr/bin/{{script}}

View File

@ -0,0 +1,7 @@
[
{
"tag": "event-sshd",
"regex": "auth fail: Password Incorrect. user:.([a-zA-Z0-9-_]*)",
"params": [ "username" ]
}
]

View File

@ -0,0 +1,7 @@
[
{
"tag": "event-stopped-ctr",
"regex": "Stopped ([a-zA-Z-_\\s]*) container",
"params": [ "ctr-name" ]
}
]

View File

@ -22,6 +22,9 @@ import sys
from sonic_py_common import multi_asic, device_info
from swsscommon import swsscommon
EVENTS_PUBLISHER_SOURCE = "sonic-events-host"
EVENTS_PUBLISHER_TAG = "event-down-ctr"
def get_expected_running_containers():
"""
@summary: This function will get the expected running & always-enabled containers by following the rule:
@ -150,6 +153,17 @@ def get_current_running_containers(always_running_containers):
return current_running_containers
def publish_events(lst):
events_handle = swsscommon.events_init_publisher(EVENTS_PUBLISHER_SOURCE)
params = swsscommon.FieldValueMap()
for ctr in lst:
params["name"] = ctr;
swsscommon.event_publish(events_handle, EVENTS_PUBLISHER_TAG, params)
swsscommon.events_deinit_publisher(events_handle)
def main():
"""
@summary: This function will compare the difference between the current running containers
@ -162,6 +176,7 @@ def main():
expected_running_containers |= always_running_containers
not_running_containers = expected_running_containers.difference(current_running_containers)
if not_running_containers:
publish_events(not_running_containers)
print("Expected containers not running: " + ", ".join(not_running_containers))
sys.exit(3)

View File

@ -36,13 +36,3 @@ $(DOCKER_EVENTD)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
SONIC_BULLSEYE_DOCKERS += $(DOCKER_EVENTD)
SONIC_BULLSEYE_DBG_DOCKERS += $(DOCKER_EVENTD_DBG)
$(DOCKER_EVENTD)_FILESPATH = $($(SONIC_EVENTD)_SRC_PATH)/rsyslog_plugin
$(DOCKER_EVENTD)_PLUGIN = rsyslog_plugin
$($(DOCKER_EVENTD)_PLUGIN)_PATH = $($(DOCKER_EVENTD)_FILESPATH)
SONIC_COPY_FILES += $($(DOCKER_EVENTD)_PLUGIN)
# Some builds fails to find this file. Remove until we root cause it.
# $(DOCKER_EVENTD)_SHARED_FILES = $($(DOCKER_EVENTD)_PLUGIN)

View File

@ -7,10 +7,10 @@ DOCKER_FPM_FRR_DBG = $(DOCKER_FPM_FRR_STEM)-$(DBG_IMAGE_MARK).gz
$(DOCKER_FPM_FRR)_PATH = $(DOCKERS_PATH)/$(DOCKER_FPM_FRR_STEM)
$(DOCKER_FPM_FRR)_PYTHON_WHEELS += $(SONIC_BGPCFGD) $(SONIC_FRR_MGMT_FRAMEWORK)
$(DOCKER_FPM_FRR)_DEPENDS += $(FRR) $(FRR_SNMP) $(SWSS) $(LIBYANG2)
$(DOCKER_FPM_FRR)_DEPENDS += $(FRR) $(FRR_SNMP) $(SWSS) $(LIBYANG2) $(SONIC_RSYSLOG_PLUGIN)
$(DOCKER_FPM_FRR)_DBG_DEPENDS = $($(DOCKER_SWSS_LAYER_BULLSEYE)_DBG_DEPENDS)
$(DOCKER_FPM_FRR)_DBG_DEPENDS += $(SWSS_DBG) $(LIBSWSSCOMMON_DBG) \
$(FRR_DBG) $(FRR_SNMP_DBG) $(LIBYANG2_DBG)
$(FRR_DBG) $(FRR_SNMP_DBG) $(LIBYANG2_DBG) $(SONIC_RSYSLOG_PLUGIN)
$(DOCKER_FPM_FRR)_DBG_IMAGE_PACKAGES = $($(DOCKER_SWSS_LAYER_BULLSEYE)_DBG_IMAGE_PACKAGES)

View File

@ -11,9 +11,9 @@ SONIC_DPKG_DEBS += $(SONIC_EVENTD)
SONIC_EVENTD_DBG = sonic-$(SONIC_EVENTD_PKG_NAME)-dbgsym_$(SONIC_EVENTD_VERSION)_$(CONFIGURED_ARCH).deb
$(eval $(call add_derived_package,$(SONIC_EVENTD),$(SONIC_EVENTD_DBG)))
SONIC_RSYSLOG_PLUGIN = sonic-rsyslog-plugin_$(SONIC_EVENTD_VERSION)_$(CONFIGURED_ARCH).deb
$(eval $(call add_derived_package,$(SONIC_EVENTD),$(SONIC_RSYSLOG_PLUGIN)))
# The .c, .cpp, .h & .hpp files under src/{$DBG_SRC_ARCHIVE list}
# are archived into debug one image to facilitate debugging.
#
DBG_SRC_ARCHIVE += sonic-eventd

View File

@ -1129,6 +1129,7 @@ $(addprefix $(TARGET_PATH)/, $(SONIC_INSTALLERS)) : $(TARGET_PATH)/% : \
$(PYTHON_SWSSCOMMON) \
$(PYTHON3_SWSSCOMMON) \
$(SONIC_DB_CLI) \
$(SONIC_RSYSLOG_PLUGIN) \
$(SONIC_UTILITIES_DATA) \
$(SONIC_HOST_SERVICES_DATA) \
$(BASH) \
@ -1292,8 +1293,6 @@ $(addprefix $(TARGET_PATH)/, $(SONIC_INSTALLERS)) : $(TARGET_PATH)/% : \
$(if $($(docker:-dbg.gz=.gz)_MACHINE),\
mv $($(docker:-dbg.gz=.gz)_CONTAINER_NAME).sh $($(docker:-dbg.gz=.gz)_MACHINE)_$($(docker:-dbg.gz=.gz)_CONTAINER_NAME).sh
)
$(foreach file, $($(docker)_SHARED_FILES), \
{ cp $($(file)_PATH)/$(file) $(FILES_PATH)/ $(LOG) || exit 1 ; } ; )
)
# Exported variables are used by sonic_debian_extension.sh

View File

@ -5,6 +5,9 @@ EVENTD_TOOL := tools/events_tool
EVENTD_PUBLISH_TOOL := tools/events_publish_tool.py
RSYSLOG-PLUGIN_TARGET := rsyslog_plugin/rsyslog_plugin
RSYSLOG-PLUGIN_TEST := rsyslog_plugin_tests/tests
EVENTD_MONIT := tools/events_monit_test.py
EVENTD_MONIT_CONF := tools/monit_events
CP := cp
MKDIR := mkdir
CC := g++
@ -68,15 +71,18 @@ rsyslog-plugin-tests: $(RSYSLOG-PLUGIN-TEST_OBJS)
@echo ' '
install:
$(MKDIR) -p $(DESTDIR)/usr/sbin
$(CP) $(EVENTD_TARGET) $(DESTDIR)/usr/sbin
$(CP) $(EVENTD_TOOL) $(DESTDIR)/usr/sbin
$(CP) $(EVENTD_PUBLISH_TOOL) $(DESTDIR)/usr/sbin
$(MKDIR) -p $(DESTDIR)/usr/bin
$(MKDIR) -p $(DESTDIR)/etc/monit/conf.d
$(CP) $(EVENTD_TARGET) $(DESTDIR)/usr/bin
$(CP) $(EVENTD_TOOL) $(DESTDIR)/usr/bin
$(CP) $(EVENTD_PUBLISH_TOOL) $(DESTDIR)/usr/bin
$(CP) $(RSYSLOG-PLUGIN_TARGET) $(DESTDIR)/usr/bin
$(CP) $(EVENTD_MONIT) $(DESTDIR)/usr/bin
$(CP) $(EVENTD_MONIT_CONF) $(DESTDIR)/etc/monit/conf.d
deinstall:
$(RM) $(DESTDIR)/usr/sbin/$(EVENTD_TARGET)
$(RM) $(DESTDIR)/usr/sbin/$(RSYSLOG-PLUGIN_TARGET)
$(RM) -rf $(DESTDIR)/usr/sbin
$(RM) -rf $(DESTDIR)/usr
$(RM) -rf $(DESTDIR)/etc
clean:
-@echo ' '

View File

@ -2,7 +2,7 @@ Source: sonic-eventd
Section: devel
Priority: optional
Maintainer: Renuka Manavalan <remanava@microsoft.com>
Build-Depends: debhelper (>= 12.0.0), libevent-dev, libboost-thread-dev, libboost-system-dev, libswsscommon-dev
Build-Depends: debhelper (>= 12.0.0), libevent-dev, libboost-thread-dev, libboost-system-dev, libswsscommon-dev, liblua5.1-0
Standards-Version: 3.9.3
Homepage: https://github.com/Azure/sonic-buildimage
XS-Go-Import-Path: github.com/Azure/sonic-buildimage
@ -12,3 +12,9 @@ Architecture: any
Built-Using: ${misc:Built-Using}
Depends: ${shlibs:Depends}
Description: SONiC event service
Package: sonic-rsyslog-plugin
Architecture: any
Built-Using: ${misc:Built-Using}
Depends: ${shlibs:Depends}
Description: SONiC rsyslog plugin service

View File

@ -0,0 +1,3 @@
usr/bin/eventd
usr/bin/events_tool
usr/bin/events_publish_tool.py

View File

@ -0,0 +1,3 @@
usr/bin/rsyslog_plugin
usr/bin/events_monit_test.py
etc/monit/conf.d/monit_events

View File

@ -0,0 +1,226 @@
#!/usr/bin/env python3
from inspect import getframeinfo, stack
from swsscommon.swsscommon import events_init_publisher, event_publish, FieldValueMap
from swsscommon.swsscommon import event_receive_op_t, event_receive, events_init_subscriber
from swsscommon.swsscommon import events_deinit_subscriber, events_deinit_publisher
import argparse
import os
import threading
import time
import syslog
import uuid
chk_log_level = syslog.LOG_ERR
test_source = "sonic-host"
test_event_tag = "device-test-event"
test_event_key = "{}:{}".format(test_source, test_event_tag)
test_event_params = {
"sender": os.path.basename(__file__),
"reason": "monit periodic test",
"batch-id": str(uuid.uuid1()),
"index": "0"
}
# Async connection wait time in seconds.
ASYNC_CONN_WAIT = 0.3
RECEIVE_TIMEOUT = 1000
# Thread results
rc_test_receive = -1
publish_cnt = 0
def _log_msg(lvl, pfx, msg):
if lvl <= chk_log_level:
caller = getframeinfo(stack()[2][0])
fmsg = "{}:{}:{}".format(caller.function, caller.lineno, msg)
print("{}: {}".format(pfx, fmsg))
syslog.syslog(lvl, fmsg)
def log_err(m):
_log_msg(syslog.LOG_ERR, "Err", m)
def log_info(m):
_log_msg(syslog.LOG_INFO, "Info", m)
def log_notice(m):
_log_msg(syslog.LOG_NOTICE, "Notice", m)
def log_debug(m):
_log_msg(syslog.LOG_DEBUG, "Debug", m)
def map_dict_fvm(s, d):
for k, v in s.items():
d[k] = v
# Invoked in a separate thread
def test_receiver(event_obj, cnt):
global rc_test_receive
sh = events_init_subscriber(False, RECEIVE_TIMEOUT, None)
# Sleep ASYNC_CONN_WAIT to ensure async connectivity is complete.
time.sleep(ASYNC_CONN_WAIT)
exp_params = dict(test_event_params)
# Signal main thread that subscriber is ready to receive
event_obj.set()
cnt_done = 0
while cnt_done < cnt:
p = event_receive_op_t()
rc = event_receive(sh, p)
if rc > 0 and publish_cnt < 2:
# ignore timeout as test code has not yet published an event yet
continue
if rc != 0:
log_notice("Failed to receive. {}/{} rc={}".format(cnt_done, cnt, rc))
break
if test_event_key != p.key:
# received a different event than published
continue
exp_params["index"] = str(cnt_done)
rcv_params = {}
map_dict_fvm(p.params, rcv_params)
for k, v in exp_params.items():
if k in rcv_params:
if (rcv_params[k] != v):
log_notice("key:{} exp:{} != exist:{}".format(
k, v, rcv_params[k]))
rc = -1
else:
log_notice("key:{} is missing".format(k))
rc = -1
if (rc != 0):
log_notice("params mismatch {}/{}".format(cnt_done, cnt))
break
if p.missed_cnt != 0:
log_notice("Expect missed_cnt {} == 0 {}/{}".format(p.missed_cnt, cnt_done, cnt))
break
if p.publish_epoch_ms == 0:
log_notice("Expect publish_epoch_ms != 0 {}/{}".format(cnt_done, cnt))
break
cnt_done += 1
log_debug("Received {}/{}".format(cnt_done + 1, cnt))
if (cnt_done == cnt):
rc_test_receive = 0
else:
log_notice("test receive abort {}/{}".format(cnt_done, cnt))
# Signal main thread that subscriber thread is done
event_obj.set()
events_deinit_subscriber(sh)
def publish_events(cnt):
global publish_cnt
rc = -1
ph = events_init_publisher(test_source)
if not ph:
log_notice("Failed to get publisher handle")
return rc
# Sleep ASYNC_CONN_WAIT to ensure async connectivity is complete.
# Messages published before connection are silently dropped by ZMQ.
time.sleep(ASYNC_CONN_WAIT)
pub_params = dict(test_event_params)
for i in range(cnt):
pd = FieldValueMap()
pub_params["index"] = str(i)
map_dict_fvm(pub_params, pd)
rc = event_publish(ph, test_event_tag, pd)
if (rc != 0):
log_notice("Failed to publish. {}/{} rc={}".format(i, cnt, rc))
break
log_debug("published: {}/{}".format(i+1, cnt))
publish_cnt += 1
# Sleep ASYNC_CONN_WAIT to ensure publish complete, before closing channel.
time.sleep(ASYNC_CONN_WAIT)
events_deinit_publisher(ph)
log_debug("publish_events Done. cnt={}".format(cnt))
return rc
def run_test(cnt):
global rc_test_receive
# Initialising event objects
event_sub = threading.Event()
# Start subscriber thread
thread_sub = threading.Thread(target=test_receiver, args=(event_sub, cnt))
thread_sub.start()
# Wait until subscriber thread completes the async subscription
# Any event published prior to that could get lost
# Subscriber would wait for ASYNC_CONN_WAIT. Wait additional 200ms
# for signal from test_receiver as ready.
event_sub.wait(ASYNC_CONN_WAIT + 0.2)
event_sub.clear()
rc_pub = publish_events(cnt)
if (rc_pub != 0):
log_notice("Failed in publish_events")
else:
# Wait for subscriber to complete with 1 sec timeout.
event_sub.wait(1)
if (rc_test_receive != 0):
log_notice("Failed to receive events")
log_debug("run_test_DONE rc_pub={} rc_test_receive={}".format(
rc_pub, rc_test_receive))
if (rc_pub != 0):
return rc_pub
if (rc_test_receive == 0):
return rc_test_receive
return 0
def main():
global chk_log_level
parser=argparse.ArgumentParser(
description="Check events from publish to receive via gNMI")
parser.add_argument('-l', "--loglvl", default=syslog.LOG_ERR, type=int,
help="log level")
parser.add_argument('-n', "--cnt", default=5, type=int,
help="count of events to publish/receive")
args = parser.parse_args()
chk_log_level = args.loglvl
rc = run_test(args.cnt)
if(rc == 0):
log_info("eventd test succeeded")
else:
log_notice("eventd monit test failed rc={}".format(rc))
if __name__ == "__main__":
main()

View File

@ -0,0 +1,6 @@
###############################################################################
## Monit configuration for telemetry container
###############################################################################
check program container_eventd with path "/usr/bin/events_monit_test.py"
every 5 cycles
if status != 0 for 3 cycle then alert repeat every 1 cycles