[dpu]: Add DPU database service (#17161)

Sub PRs:

sonic-net/sonic-host-services#84
#17191

Why I did it
According to the design, the database instances of DPU will be kept in the NPU host.

Microsoft ADO (number only): 25072889

How I did it
To follow the multiple ASIC design, I assume a new platform environment variable NUM_DPU will be defined in the /usr/share/sonic/device/$PLATFORM/platform_env.conf. Based on this number, NPU host will launch a corresponding number of instances for the DPU database.

Signed-off-by: Ze Gan <ganze718@gmail.com>
This commit is contained in:
Ze Gan 2023-11-18 01:10:03 +08:00 committed by GitHub
parent 00a9412880
commit 9f08f88a0d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 119 additions and 15 deletions

View File

@ -32,7 +32,8 @@ RUN apt-get clean -y && \
s/^# unixsocket/unixsocket/; \
s/redis-server.sock/redis.sock/g; \
s/^client-output-buffer-limit pubsub [0-9]+mb [0-9]+mb [0-9]+/client-output-buffer-limit pubsub 0 0 0/; \
s/^notify-keyspace-events ""$/notify-keyspace-events AKE/ \
s/^notify-keyspace-events ""$/notify-keyspace-events AKE/; \
s/^databases [0-9]+$/databases 100/ \
' /etc/redis/redis.conf
COPY ["supervisord.conf.j2", "/usr/share/sonic/templates/"]

View File

@ -2,9 +2,12 @@
"INSTANCES": {
"redis":{
"hostname" : "{{HOST_IP}}",
"port" : 6379,
"unix_socket_path" : "/var/run/redis{{NAMESPACE_ID}}/redis.sock",
"port" : {{REDIS_PORT}},
"unix_socket_path" : "/var/run/redis{{DEV}}/redis.sock",
"persistence_for_warm_boot" : "yes"
{% if DATABASE_TYPE is defined and DATABASE_TYPE != "" %}
,"database_type": "{{DATABASE_TYPE}}"
{% endif %}
},
"redis_chassis":{
"hostname" : "redis_chassis.server",
@ -94,6 +97,30 @@
"separator": ":",
"instance" : "redis"
}
{% if DATABASE_TYPE is defined and DATABASE_TYPE == "dpudb" %}
,
"DPU_APPL_DB" : {
"id" : 15,
"separator": ":",
"instance" : "redis",
"format": "proto"
},
"DPU_APPL_STATE_DB" : {
"id" : 16,
"separator": "|",
"instance" : "redis"
},
"DPU_STATE_DB" : {
"id" : 17,
"separator": "|",
"instance" : "redis"
},
"DPU_COUNTERS_DB" : {
"id" : 18,
"separator": ":",
"instance" : "redis"
}
{% endif %}
},
"VERSION" : "1.0"
}

View File

@ -1,9 +1,15 @@
{% set namespace_cnt = NAMESPACE_COUNT|int %}
{% if NUM_DPU is defined %}
{% set dpu_cnt = NUM_DPU | int %}
{% else %}
{% set dpu_cnt = 0 %}
{% endif %}
{
"INCLUDES" : [
{
"include" : "../../redis/sonic-db/database_config.json"
},
{% if namespace_cnt > 1 %}
{% for ns in range(namespace_cnt) %}
{
@ -15,7 +21,22 @@
},
{% endif %}
{% endfor %}
{% endif %}
{% if dpu_cnt > 0 %}
{% for dpu in range(dpu_cnt) %}
{
"database_type" : "dpudb",
"include" : "../../redisdpu{{dpu}}/sonic-db/database_config.json"
{% if dpu == dpu_cnt-1 %}
}
{% else %}
},
{% endif %}
{% endfor %}
{% endif %}
],
"VERSION" : "1.0"
}
{% endif %}

View File

@ -17,6 +17,18 @@ then
host_ip=127.0.0.1
fi
redis_port=6379
if [[ $DATABASE_TYPE == "dpudb" ]]; then
host_ip="169.254.200.254"
if ! ip -4 -o addr | awk '{print $4}' | grep $host_ip; then
host_ip=127.0.0.1
fi
DPU_ID=`echo $DEV | tr -dc '0-9'`
redis_port=`expr 6381 + $DPU_ID`
fi
REDIS_DIR=/var/run/redis$NAMESPACE_ID
mkdir -p $REDIS_DIR/sonic-db
mkdir -p /etc/supervisor/conf.d/
@ -24,7 +36,7 @@ mkdir -p /etc/supervisor/conf.d/
if [ -f /etc/sonic/database_config$NAMESPACE_ID.json ]; then
cp /etc/sonic/database_config$NAMESPACE_ID.json $REDIS_DIR/sonic-db/database_config.json
else
HOST_IP=$host_ip j2 /usr/share/sonic/templates/database_config.json.j2 > $REDIS_DIR/sonic-db/database_config.json
HOST_IP=$host_ip REDIS_PORT=$redis_port DATABASE_TYPE=$DATABASE_TYPE j2 /usr/share/sonic/templates/database_config.json.j2 > $REDIS_DIR/sonic-db/database_config.json
fi
# on VoQ system, we only publish redis_chassis instance and CHASSIS_APP_DB when
@ -59,7 +71,7 @@ if [[ $DATABASE_TYPE == "chassisdb" ]]; then
fi
# copy/generate the database_global.json file if this is global database service in multi asic platform.
if [[ $NAMESPACE_ID == "" ]] && [[ $NAMESPACE_COUNT -gt 1 ]]
if [[ $DATABASE_TYPE == "" ]] && [[ $NAMESPACE_COUNT -gt 1 || $NUM_DPU -gt 1 ]]
then
if [ -f /etc/sonic/database_global.json ]; then
cp /etc/sonic/database_global.json $REDIS_DIR/sonic-db/database_global.json

View File

@ -58,7 +58,7 @@ function updateSyslogConf()
}
function ebtables_config()
{
if [ "$DEV" ]; then
if [[ "$DEV" && $DATABASE_TYPE != "dpudb" ]]; then
# Install ebtables filter in namespaces on multi-asic.
ip netns exec $NET_NS ebtables-restore < /etc/ebtables.filter.cfg
else
@ -169,7 +169,7 @@ function postStartAction()
{%- if docker_container_name == "database" %}
CHASSISDB_CONF="/usr/share/sonic/device/$PLATFORM/chassisdb.conf"
[ -f $CHASSISDB_CONF ] && source $CHASSISDB_CONF
if [ "$DEV" ]; then
if [[ "$DEV" && $DATABASE_TYPE != "dpudb" ]]; then
# Enable the forwarding on eth0 interface in namespace.
SYSCTL_NET_CONFIG="/etc/sysctl.d/sysctl-net.conf"
docker exec -i database$DEV sed -i -e "s/^net.ipv4.conf.eth0.forwarding=0/net.ipv4.conf.eth0.forwarding=1/;
@ -223,7 +223,7 @@ function postStartAction()
# databases are not availbale until database container is ready.
# also chassisdb doesn't support warm/fast reboot, its dump.rdb is deleted
# at service startup time, nothing need to be done here.
if [ "$DATABASE_TYPE" != "chassisdb" ]; then
if [[ "$DATABASE_TYPE" != "chassisdb" ]]; then
# Wait until supervisord and redis starts. This change is needed
# because now database_config.json is jinja2 templated based
# and by the time file gets generated if we do redis ping
@ -503,7 +503,7 @@ start() {
fi
{%- endif %}
if [ -z "$DEV" ]; then
if [[ -z "$DEV" || $DATABASE_TYPE == "dpudb" ]]; then
NET="host"
# For Multi-ASIC platform we have to mount the redis paths for database instances running in different
@ -528,6 +528,11 @@ start() {
DB_OPT=$DB_OPT" --env DATABASE_TYPE=$DATABASE_TYPE"
else
DB_OPT=$DB_OPT" -v /var/run/redis$DEV:/var/run/redis:rw "
DB_OPT=$DB_OPT" --env DATABASE_TYPE=$DATABASE_TYPE "
DB_OPT=$DB_OPT" --env NUM_DPU=$NUM_DPU "
if [[ "$DEV" ]]; then
DB_OPT=$DB_OPT" -v /var/run/redis$DEV:/var/run/redis$DEV:rw "
fi
fi
{%- endif %}
else
@ -557,6 +562,12 @@ start() {
fi
fi
{%- endif %}
NAMESPACE_ID="$DEV"
if [[ $DATABASE_TYPE == "dpudb" ]]; then
NAMESPACE_ID=""
fi
{%- if sonic_asic_platform == "mellanox" %}
# TODO: Mellanox will remove the --tmpfs exception after SDK socket path changed in new SDK version
{%- endif %}
@ -630,9 +641,10 @@ start() {
{%- if mount_default_tmpfs|default("n") == "y" %}
--tmpfs /var/tmp \
{%- endif %}
--env "NAMESPACE_ID"="$DEV" \
--env "NAMESPACE_ID"="$NAMESPACE_ID" \
--env "NAMESPACE_PREFIX"="$NAMESPACE_PREFIX" \
--env "NAMESPACE_COUNT"=$NUM_ASIC \
--env "NAMESPACE_COUNT"="$NUM_ASIC" \
--env "DEV"="$DEV" \
--env "CONTAINER_NAME"=$DOCKERNAME \
--name=$DOCKERNAME \
{%- if docker_container_name == "gbsyncd" %}
@ -668,7 +680,7 @@ wait() {
stop() {
{%- if docker_container_name == "database" %}
docker stop $DOCKERNAME
if [ "$DEV" ]; then
if [[ "$DEV" && $DATABASE_TYPE != "dpudb" ]]; then
ip netns delete "$NET_NS"
fi
{%- elif docker_container_name == "teamd" %}
@ -684,7 +696,7 @@ stop() {
kill() {
{%- if docker_container_name == "database" %}
docker kill $DOCKERNAME
if [ "$DEV" ]; then
if [[ "$DEV" && $DATABASE_TYPE != "dpudb" ]]; then
ip netns delete "$NET_NS"
fi
{%- else %}
@ -701,11 +713,16 @@ if [ "$DEV" == "chassisdb" ]; then
DOCKERNAME=$DOCKERNAME"-chassis"
unset DEV
fi
if [[ "$DEV" == *"dpu"* ]]; then
DATABASE_TYPE="dpudb"
fi
{%- endif %}
NAMESPACE_PREFIX="asic"
DOCKERNAME=$DOCKERNAME$DEV
CONTAINER_EXISTS="no"
if [ "$DEV" ]; then
if [[ "$DEV" && $DATABASE_TYPE != "dpudb" ]]; then
NET_NS="$NAMESPACE_PREFIX$DEV" #name of the network namespace
SONIC_CFGGEN="sonic-cfggen -n $NET_NS"

View File

@ -45,6 +45,9 @@ CHASSIS_INFO_SERIAL_FIELD = 'serial'
CHASSIS_INFO_MODEL_FIELD = 'model'
CHASSIS_INFO_REV_FIELD = 'revision'
# DPU constants
DPU_NAME_PREFIX = "dpu"
# Cacheable Objects
sonic_ver_info = {}
hw_info_dict = {}
@ -841,3 +844,26 @@ def is_frontend_port_present_in_host():
if not namespace_id:
return False
return True
def get_num_dpus():
# Todo: we should use platform api to get the dpu number
# instead of rely on the platform env config.
num_dpus = 0
platform_env_conf_file_path = get_platform_env_conf_file_path()
# platform_env.conf file not present for platform
if platform_env_conf_file_path is None:
return num_dpus
# Else open the file check for keyword - num_dpu -
with open(platform_env_conf_file_path) as platform_env_conf_file:
for line in platform_env_conf_file:
tokens = line.split('=')
if len(tokens) < 2:
continue
if tokens[0].lower() == 'num_dpu':
num_dpus = tokens[1].strip()
break
return int(num_dpus)