Add daemon which periodically pushes process and docker stats to State DB (#3525)

This commit is contained in:
pra-moh 2019-11-27 15:35:41 -08:00 committed by Joe LeVeque
parent 7622a30d98
commit bfa96bbce3
3 changed files with 232 additions and 0 deletions

View File

@ -253,6 +253,11 @@ sudo cp $IMAGE_CONFIGS/caclmgrd/caclmgrd.service $FILESYSTEM_ROOT/etc/systemd/s
echo "caclmgrd.service" | sudo tee -a $GENERATED_SERVICE_FILE
sudo cp $IMAGE_CONFIGS/caclmgrd/caclmgrd $FILESYSTEM_ROOT/usr/bin/
# Copy process/docker cpu/memory utilization data export daemon
sudo cp $IMAGE_CONFIGS/procdockerstatsd/procdockerstatsd.service $FILESYSTEM_ROOT/etc/systemd/system/
echo "procdockerstatsd.service" | sudo tee -a $GENERATED_SERVICE_FILE
sudo cp $IMAGE_CONFIGS/procdockerstatsd/procdockerstatsd $FILESYSTEM_ROOT/usr/bin/
# Copy process-reboot-cause service files
sudo cp $IMAGE_CONFIGS/process-reboot-cause/process-reboot-cause.service $FILESYSTEM_ROOT/etc/systemd/system/
echo "process-reboot-cause.service" | sudo tee -a $GENERATED_SERVICE_FILE

View File

@ -0,0 +1,214 @@
# !/usr/bin/env python
'''
procdockerstatsd
Daemon which periodically gathers process and docker statistics and pushes the data to STATE_DB
'''
import os
import re
import subprocess
import sys
import syslog
import time
from datetime import datetime
import swsssdk
VERSION = '1.0'
SYSLOG_IDENTIFIER = "procdockerstatsd"
REDIS_HOSTIP = "127.0.0.1"
# ========================== Syslog wrappers ==========================
def log_info(msg, also_print_to_console=False):
syslog.openlog(SYSLOG_IDENTIFIER)
syslog.syslog(syslog.LOG_INFO, msg)
syslog.closelog()
def log_warning(msg, also_print_to_console=False):
syslog.openlog(SYSLOG_IDENTIFIER)
syslog.syslog(syslog.LOG_WARNING, msg)
syslog.closelog()
def log_error(msg, also_print_to_console=False):
syslog.openlog(SYSLOG_IDENTIFIER)
syslog.syslog(syslog.LOG_ERR, msg)
syslog.closelog()
# ========================== ProcessDocker class ==========================
class ProcDockerStats:
def __init__(self):
self.state_db = swsssdk.SonicV2Connector(host=REDIS_HOSTIP)
self.state_db.connect("STATE_DB")
def run_command(self, cmd):
proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
(stdout, stderr) = proc.communicate()
if proc.returncode != 0:
log_error("Error running command '{}'".format(cmd))
return None
else:
return stdout
def format_docker_cmd_output(self, cmdout):
lines = re.split("\n", cmdout)
keys = re.split(" +", lines[0])
docker_data = dict()
docker_data_list = []
for item in lines[1:]:
values1 = re.split(" +", item)
docker_data = dict(zip(keys, values1))
docker_data_list.append(docker_data)
formatted_dict = self.create_docker_dict(docker_data_list)
return formatted_dict
def format_process_cmd_output(self, cmdout):
lines = re.split("\n", cmdout)
keys = re.split(" +", lines[0])
keylist = list(filter(None, keys))
process_data = dict()
process_data_list = []
for item in lines[1:]:
values1 = re.split(" +", str(item))
# To remove extra space before UID
val = list(filter(None, values1))
# Merging extra columns created due to space in cmd ouput
val[8:] = [''.join(val[8:])]
process_data = dict(zip(keylist, val))
process_data_list.append(process_data)
return process_data_list
def convert_to_bytes(self, value):
unit_value = re.search('[a-zA-Z]+', value)
value_to_convert = float(filter(str.isdigit, value))
unit = unit_value.group(0)
UNITS_B = 'B'
UNITS_KB = 'KB'
UNITS_MB = 'MB'
UNITS_MiB = 'MiB'
UNITS_GiB = 'GiB'
if unit.lower() == UNITS_B.lower():
return int(round(value_to_convert))
elif unit.lower() == UNITS_KB.lower():
value_converted = value_to_convert * 1000
return int(round(value_converted))
elif unit.lower() == UNITS_MB.lower():
value_converted = value_to_convert * 1000 * 1000
return int(round(value_converted))
elif unit.lower() == UNITS_MiB.lower():
value_converted = value_to_convert * 1024 * 1024
return int(round(value_converted))
elif unit.lower() == UNITS_GiB.lower():
value_converted = value_to_convert * 1024 * 1024 * 1024
return int(round(value_converted))
def create_docker_dict(self, dict_list):
dockerdict = {}
for row in dict_list[0:]:
cid = row.get('CONTAINER ID')
if cid:
key = 'DOCKER_STATS|' + str(cid)
dockerdict[key] = {}
dockerdict[key]['NAME'] = row.get('NAME')
splitcol = row.get('CPU %')
cpu = re.split("%", str(splitcol))
dockerdict[key]['CPU%'] = str(cpu[0])
splitcol = row.get('MEM USAGE / LIMIT')
memuse = re.split(" / ", str(splitcol))
# converting MiB and GiB to bytes
dockerdict[key]['MEM_BYTES'] = str(self.convert_to_bytes(memuse[0]))
dockerdict[key]['MEM_LIMIT_BYTES'] = str(self.convert_to_bytes(memuse[1]))
splitcol = row.get('MEM %')
mem = re.split("%", str(splitcol))
dockerdict[key]['MEM%'] = str(mem[0])
splitcol = row.get('NET I/O')
netio = re.split(" / ", str(splitcol))
dockerdict[key]['NET_IN_BYTES'] = str(self.convert_to_bytes(netio[0]))
dockerdict[key]['NET_OUT_BYTES'] = str(self.convert_to_bytes(netio[1]))
splitcol = row.get('BLOCK I/O')
blockio = re.split(" / ", str(splitcol))
dockerdict[key]['BLOCK_IN_BYTES'] = str(self.convert_to_bytes(blockio[0]))
dockerdict[key]['BLOCK_OUT_BYTES'] = str(self.convert_to_bytes(blockio[1]))
dockerdict[key]['PIDS'] = row.get('PIDS')
return dockerdict
def update_dockerstats_command(self):
cmd = "docker stats --no-stream -a"
data = self.run_command(cmd)
if not data:
log_error("'{}' returned null output".format(cmd))
return False
dockerdata = self.format_docker_cmd_output(data)
if not dockerdata:
log_error("formatting for docker output failed")
return False
# wipe out all data from state_db before updating
self.state_db.delete_all_by_pattern('STATE_DB', 'DOCKER_STATS|*')
for k1,v1 in dockerdata.iteritems():
for k2,v2 in v1.iteritems():
self.update_state_db(k1, k2, v2)
return True
def update_processstats_command(self):
data = self.run_command("ps -eo uid,pid,ppid,%mem,%cpu,stime,tty,time,cmd --sort -%cpu | head -1024")
processdata = self.format_process_cmd_output(data)
value = ""
# wipe out all data before updating with new values
self.state_db.delete_all_by_pattern('STATE_DB', 'PROCESS_STATS|*')
for row in processdata[0:]:
cid = row.get('PID')
if cid:
value = 'PROCESS_STATS|' + str(cid)
uid = row.get('UID')
self.update_state_db(value, 'UID', uid)
ppid = row.get('PPID')
self.update_state_db(value, 'PPID', ppid)
cpu = row.get('%CPU')
self.update_state_db(value, '%CPU', str(cpu))
mem = row.get('%MEM')
self.update_state_db(value, '%MEM', str(mem))
stime = row.get('STIME')
self.update_state_db(value, 'STIME', stime)
tty = row.get('TT')
self.update_state_db(value, 'TT', tty)
time = row.get('TIME')
self.update_state_db(value, 'TIME', time)
cmd = row.get('CMD')
self.update_state_db(value, 'CMD', cmd)
def update_state_db(self, key1, key2, value2):
self.state_db.set('STATE_DB', key1, key2, value2)
def run(self):
self.update_dockerstats_command()
datetimeobj = datetime.now()
# Adding key to store latest update time.
self.update_state_db('DOCKER_STATS|LastUpdateTime', 'lastupdate', datetimeobj)
self.update_processstats_command()
self.update_state_db('PROCESS_STATS|LastUpdateTime', 'lastupdate', datetimeobj)
# main start
def main():
log_info("process-docker stats daemon starting up..")
if not os.getuid() == 0:
log_error("Must be root to run process-docker daemon")
print "Error: Must be root to run process-docker daemon"
sys.exit(1)
pd = ProcDockerStats()
# Data need to be updated every 2 mins. hence adding delay of 120 seconds
while True:
pd.run()
time.sleep(120)
log_info("process-docker stats daemon exited")
if __name__ == '__main__':
main()

View File

@ -0,0 +1,13 @@
[Unit]
Description=Process and docker CPU/memory utilization data export daemon
Requires=database.service updategraph.service
After=database.service updategraph.service
[Service]
Type=simple
ExecStart=/usr/bin/procdockerstatsd
Restart=Always
[Install]
WantedBy=multi-user.target