sonic-buildimage/files/image_config/monit/restart_service
yozhao101 24e1cde1e6
[201911][Monit] Restart telemetry container if memory usage is beyond the threshold (#7618)
This PR aims to monitor the memory usage of streaming telemetry container and restart streaming telemetry container if memory usage is larger than the pre-defined threshold.
2021-05-17 16:51:13 -07:00

89 lines
2.7 KiB
Python
Executable File

#!/usr/bin/env python
import argparse
import sys
import syslog
import subprocess
def get_command_result(command):
"""Executes command and return the exit code, stdout and stderr.
Args:
command: A string contains the command to be executed.
Returns:
An integer contains the exit code.
A string contains the output of stdout.
A string contains the output of stderr.
"""
command_stdout = ""
command_stderr = ""
try:
proc_instance = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
shell=True, universal_newlines=True)
command_stdout, command_stderr = proc_instance.communicate()
if proc_instance.returncode != 0:
return 1, command_stdout.strip(), command_stderr.strip()
except (OSError, ValueError) as err:
return 2, command_stdout.strip(), err
return 0, command_stdout.strip(), command_stderr.strip()
def reset_failed_flag(service_name):
"""Reset the failed status of a service.
Args:
service_name: Name of the service.
Returns:
None
"""
reset_failed_command = "sudo systemctl reset-failed {}.service".format(service_name)
syslog.syslog(syslog.LOG_INFO, "Resetting failed status of service '{}' ..."
.format(service_name))
exit_code, command_stdout, command_stderr = get_command_result(reset_failed_command)
if exit_code == 0:
syslog.syslog(syslog.LOG_INFO, "Succeeded to reset failed status of service '{}.service'."
.format(service_name))
else:
syslog.syslog(syslog.LOG_ERR, "Failed to reset failed status of service '{}'. Error: {}"
.format(service_name, command_stderr))
def restart_service(service_name):
"""Reset the failed status of a service and then restart it.
Args:
service_name: Name of specified service.
Returns:
None.
"""
restart_command = "sudo systemctl restart {}.service".format(service_name)
reset_failed_flag(service_name)
syslog.syslog(syslog.LOG_INFO, "Restarting service '{}' ...".format(service_name))
exit_code, command_stdout, command_stderr = get_command_result(restart_command)
if exit_code != 0:
syslog.syslog(syslog.LOG_ERR, "Failed to restart the service '{}'. Error: {}"
.format(service_name, command_stderr))
def main():
parser = argparse.ArgumentParser(description="Restart a specific service",
usage="/usr/bin/restart_service <service_name>")
parser.add_argument("service_name", help="service name")
args = parser.parse_args()
restart_service(args.service_name)
if __name__ == "__main__":
main()