[202205] Implement zram compression for docker in RAM (#15137)
* [Arista] Fix boot0 code for docker_inram Enable docker_inram for all systems with 4GB or less of flash. This is mandatory to allow these systems to store 2 SONiC images. This change also fixes the missing docker_inram attribute when installing a new image from SONiC. Because the SWI image can ship with additional kernel parameters within such as `sonic_fips=` this lead to a conflict. To prevent the conflict, the extra kernel parameters from the SWI are now stored in the file `kernel-cmdline-append` which isn't used anywhere. * Add optional zram compression for docker_inram Some devices running SONiC have a small storage device (2G and 4G mainly) The SONiC image growth over time has made it impossible to install 2 images on a single device. Some mitigations have been implemented in the past for some devices but there is a need to do more. One such mitigation is `docker_inram` which creates a `tmpfs` and extracts `dockerfs.tar.gz` in it. This all happens in the SONiC initramfs and by ensuring the installation process does not extract `dockerfs.tar.gz` on the flash but keep the file as is. This mitigation does a tradeoff by using more RAM to reduce the disk footprint. It however creates new issues for devices with 4G of system memory since the extracted `dockerfs.tar.gz` nears the 1.6G. Considering debian upgrades (with dual base images) and the continuous stream of features this is only going to get bigger. This change introduces an alternative to the `tmpfs` by allowing a system to extract the `dockerfs.tar.gz` inside a `zram` device thus bringing compression in play at the detriment of performance. Introduce 2 new optional kernel parameters to be consumed by SONiC initramfs. - `docker_inram_size` which represent the max physical size of the `zram` or `tmpfs` volume (defaults to DOCKER_RAMFS_SIZE) - `docker_inram_algo` which is the method to use to extract the `dockerfs.tar.gz` (defaults to `tmpfs`) other values are considered to be compression algorithm for `zram` (e.g `zstd`, `zlo-rle`, `lz4`) Refactored the logic to mount the docker fs in the SONiC initramfs under the `union-mount` script. Moved the code into a function to make it cleaner and separated the inram volume creation and docker extraction. On Arista platform with a flash smaller or equal to 4GB set `docker_inram_algo` to `zstd` which produces the best compression ratio at the detriment of a slower write performance and a similar read performance to other `zram` compression algorithms.
This commit is contained in:
parent
336697d7f6
commit
feb8671601
@ -202,12 +202,12 @@ elif [ "$IMAGE_TYPE" = "aboot" ]; then
|
||||
zip -g $OUTPUT_ABOOT_IMAGE .platforms_asic
|
||||
|
||||
if [ "$ENABLE_FIPS" = "y" ]; then
|
||||
echo "sonic_fips=1" > kernel-cmdline
|
||||
echo "sonic_fips=1" >> kernel-cmdline-append
|
||||
else
|
||||
echo "sonic_fips=0" > kernel-cmdline
|
||||
echo "sonic_fips=0" >> kernel-cmdline-append
|
||||
fi
|
||||
zip -g $OUTPUT_ABOOT_IMAGE kernel-cmdline
|
||||
rm kernel-cmdline
|
||||
zip -g $OUTPUT_ABOOT_IMAGE kernel-cmdline-append
|
||||
rm kernel-cmdline-append
|
||||
|
||||
zip -g $OUTPUT_ABOOT_IMAGE $ABOOT_BOOT_IMAGE
|
||||
rm $ABOOT_BOOT_IMAGE
|
||||
|
@ -402,7 +402,7 @@ extract_image() {
|
||||
extract_image_secureboot() {
|
||||
info "Extracting necessary swi content"
|
||||
# NOTE: boot/ is not used by the boot process but only extracted for kdump
|
||||
unzip -oq "$swipath" 'boot/*' .imagehash -d "$image_path"
|
||||
unzip -oq "$swipath" 'boot/*' .imagehash kernel-cmdline-append -d "$image_path"
|
||||
|
||||
## Extract platform.tar.gz
|
||||
info "Extracting platform.tar.gz"
|
||||
@ -442,7 +442,7 @@ write_machine_config() {
|
||||
## Detect SKU and create a hardware description file
|
||||
aboot_version=$(cmdline_get Aboot | sed 's/^.*norcal.-//')
|
||||
if [ -x /bin/sysinit ]; then
|
||||
aboot_build_date=$(stat -c %y /bin/sysinit | sed 's/ /T/')
|
||||
aboot_build_date=$(stat -c %y /bin/sysinit | sed 's/ /T/g')
|
||||
else
|
||||
aboot_build_date="unknown"
|
||||
fi
|
||||
@ -650,10 +650,11 @@ write_platform_specific_cmdline() {
|
||||
else
|
||||
varlog_size=256
|
||||
cmdline_add logs_inram=on
|
||||
cmdline_add docker_inram=on
|
||||
cmdline_add docker_inram_algo=zstd
|
||||
if [ $flash_size -le 2000 ]; then
|
||||
# enable docker_inram for switches with less than 2G of flash
|
||||
varlog_size=128
|
||||
cmdline_add docker_inram=on
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
@ -749,13 +750,19 @@ write_cmdline() {
|
||||
fi
|
||||
fi
|
||||
|
||||
# FIXME: sonic sometimes adds extra kernel parameters from user space
|
||||
# this is unsafe but some will be kept as part of the regular boot
|
||||
if [ -f "$image_path/kernel-cmdline" ]; then
|
||||
for field in $cmdline_allowlist; do
|
||||
cat "$image_path/kernel-cmdline" | tr ' ' '\n' | grep -E "$field" | tail -n 1 | cmdline_append
|
||||
done
|
||||
fi
|
||||
# NOTE: SONiC might need to provide some extra kernel parameter to change the
|
||||
# next boot behavior. The following lines lookup allowed parameters and
|
||||
# append them to the cmdline.
|
||||
# - kernel-cmdline is still modified but its usage should ideally be deprecated over time
|
||||
# - kernel-cmdline-append is for the user (SONiC) to use.
|
||||
# this file can be either packaged in the swi or generated from userland
|
||||
for cpath in "$image_path/kernel-cmdline" "$image_path/kernel-cmdline-append"; do
|
||||
if [ -f "$cpath" ]; then
|
||||
for field in $cmdline_allowlist; do
|
||||
cat "$cpath" | tr ' ' '\n' | grep -E "$field" | tail -n 1 | cmdline_append
|
||||
done
|
||||
fi
|
||||
done
|
||||
|
||||
# FIXME: legacy configuration files used by fast-reboot and eos2sonic
|
||||
# these should be deprecated over time.
|
||||
@ -832,11 +839,15 @@ regular_install() {
|
||||
|
||||
mkdir -p $image_path
|
||||
|
||||
info "Generating boot-config, machine.conf and cmdline"
|
||||
write_regular_configs "$image_path"
|
||||
|
||||
info "Installing image under $image_path"
|
||||
extract_image
|
||||
|
||||
info "Generating boot-config, machine.conf and cmdline"
|
||||
write_regular_configs "$image_path"
|
||||
# NOTE: this call is necessary to process the kernel-cmdline-append file coming
|
||||
# from the just extracted swi
|
||||
write_cmdline
|
||||
|
||||
run_hooks post-install
|
||||
}
|
||||
|
@ -5,3 +5,4 @@ nls_ascii
|
||||
nls_cp437
|
||||
nls_utf8
|
||||
nvme
|
||||
zstd
|
||||
|
@ -12,6 +12,8 @@ case $1 in
|
||||
esac
|
||||
|
||||
docker_inram=false
|
||||
docker_inram_algo=tmpfs
|
||||
docker_inram_size={{ DOCKER_RAMFS_SIZE }}
|
||||
logs_inram=false
|
||||
secureboot=false
|
||||
bootloader=generic
|
||||
@ -27,6 +29,12 @@ for x in $(cat /proc/cmdline); do
|
||||
docker_inram=on)
|
||||
docker_inram=true
|
||||
;;
|
||||
docker_inram_algo=*)
|
||||
docker_inram_algo="${x#docker_inram_algo=}"
|
||||
;;
|
||||
docker_inram_size=*)
|
||||
docker_inram_size="${x#docker_inram_size=}"
|
||||
;;
|
||||
logs_inram=on)
|
||||
logs_inram=true
|
||||
;;
|
||||
@ -95,6 +103,63 @@ remove_not_in_allowlist_files()
|
||||
rm -f $allowlist_pattern_file
|
||||
}
|
||||
|
||||
mount_docker_inram()
|
||||
{
|
||||
if [ "$docker_inram_algo" = "tmpfs" ]; then
|
||||
echo "Creating tmpfs to extract {{ FILESYSTEM_DOCKERFS }}"
|
||||
mount -t tmpfs -o "rw,nodev,size=$docker_inram_size" tmpfs "${rootmnt}/var/lib/docker"
|
||||
else
|
||||
echo "Creating zram to extract {{ FILESYSTEM_DOCKERFS }}"
|
||||
modprobe zram num_devices=0
|
||||
# create new zram device
|
||||
local zid="$(cat /sys/class/zram-control/hot_add)"
|
||||
local zname="zram$zid"
|
||||
# attempt to use desired algorithm
|
||||
if ! echo $docker_inram_algo > /sys/block/$zname/comp_algorithm 2>/dev/null; then
|
||||
echo "zram algorithm $docker_inram_algo is not supported"
|
||||
echo "using default instead: $(cat /sys/block/$zname/comp_algorithm)"
|
||||
fi
|
||||
echo $docker_inram_size > /sys/block/$zname/disksize
|
||||
# create filesystem on the newly created zram block device
|
||||
mkfs.ext4 -m 0 -L dockerfs -O '^has_journal' -q /dev/$zname
|
||||
mount -o rw,nodev /dev/$zname "${rootmnt}/var/lib/docker"
|
||||
fi
|
||||
}
|
||||
|
||||
extract_dockerfs()
|
||||
{
|
||||
echo "Extracting {{ FILESYSTEM_DOCKERFS }}"
|
||||
if [ -f "${rootmnt}/host/$image_dir/{{ FILESYSTEM_DOCKERFS }}" ] && [ "$secureboot" = false ]; then
|
||||
# Extract dockerfs.tar.gz into /var/lib/docker unless the system booted with secureboot
|
||||
# In secureboot dockerfs.tar.gz cannot be trusted as it does not have a signature
|
||||
tar xz --numeric-owner -f ${rootmnt}/host/$image_dir/{{ FILESYSTEM_DOCKERFS }} -C ${rootmnt}/var/lib/docker
|
||||
elif [ "$bootloader" = "aboot" ] && unzip -l "$swi_path" | grep -q {{ FILESYSTEM_DOCKERFS }}; then
|
||||
# Aboot swi images also support extracting dockerfs.tar.gz directly from them
|
||||
unzip -qp "$swi_path" {{ FILESYSTEM_DOCKERFS }} | tar xz --numeric-owner -C ${rootmnt}/var/lib/docker
|
||||
else
|
||||
# Warn but allow the system to boot to at least have ssh access
|
||||
echo "No {{ FILESYSTEM_DOCKERFS }} to extract, SONiC will be broken"
|
||||
fi
|
||||
}
|
||||
|
||||
mount_docker()
|
||||
{
|
||||
if [ "$in_kdump" = true ]; then
|
||||
# There is no point in mounting the docker filesystem in kdump environment
|
||||
# Especially when there is some space mitigation in place
|
||||
return
|
||||
fi
|
||||
|
||||
if [ "$docker_inram" = true ]; then
|
||||
# Create an in memory filesystem (tmpfs, zram) and extract dockerfs.tar.gz
|
||||
mount_docker_inram
|
||||
extract_dockerfs
|
||||
else
|
||||
# Mount the working directory of docker engine in the raw partition, bypass the overlay
|
||||
mount --bind ${rootmnt}/host/$image_dir/{{ DOCKERFS_DIR }} ${rootmnt}/var/lib/docker
|
||||
fi
|
||||
}
|
||||
|
||||
## Mount the overlay file system: rw layer over squashfs
|
||||
image_dir=$(cat /proc/cmdline | sed -e 's/.*loop=\(\S*\)\/.*/\1/')
|
||||
rw_dir=${rootmnt}/host/$image_dir/rw
|
||||
@ -137,30 +202,14 @@ case "${ROOT}" in
|
||||
;;
|
||||
esac
|
||||
|
||||
## Mount the docker storage path
|
||||
mkdir -p ${rootmnt}/var/lib/docker
|
||||
if [ "$in_kdump" = false ]; then
|
||||
if [ "$secureboot" = true ]; then
|
||||
mount -t tmpfs -o rw,nodev,size={{ DOCKER_RAMFS_SIZE }} tmpfs ${rootmnt}/var/lib/docker
|
||||
if [ "$bootloader" = "aboot" ]; then
|
||||
unzip -qp "$swi_path" dockerfs.tar.gz | tar xz --numeric-owner -C ${rootmnt}/var/lib/docker
|
||||
## Boot folder is not extracted during secureboot since content would inherently become unsafe
|
||||
mkdir -p ${rootmnt}/host/$image_dir/boot
|
||||
else
|
||||
echo "secureboot unsupported for bootloader $bootloader" 1>&2
|
||||
exit 1
|
||||
fi
|
||||
elif [ -f ${rootmnt}/host/$image_dir/{{ FILESYSTEM_DOCKERFS }} ]; then
|
||||
## mount tmpfs and extract docker into it
|
||||
mount -t tmpfs -o rw,nodev,size={{ DOCKER_RAMFS_SIZE }} tmpfs ${rootmnt}/var/lib/docker
|
||||
tar xz --numeric-owner -f ${rootmnt}/host/$image_dir/{{ FILESYSTEM_DOCKERFS }} -C ${rootmnt}/var/lib/docker
|
||||
else
|
||||
## Mount the working directory of docker engine in the raw partition, bypass the overlay
|
||||
mount --bind ${rootmnt}/host/$image_dir/{{ DOCKERFS_DIR }} ${rootmnt}/var/lib/docker
|
||||
fi
|
||||
fi
|
||||
mount_docker
|
||||
|
||||
## Mount the boot directory in the raw partition, bypass the overlay
|
||||
mkdir -p ${rootmnt}/boot
|
||||
# make sure that the boot folder exists before attempting a mount
|
||||
mkdir -p ${rootmnt}/host/$image_dir/boot
|
||||
mount --bind ${rootmnt}/host/$image_dir/boot ${rootmnt}/boot
|
||||
|
||||
## Mount loop device or tmpfs for /var/log
|
||||
|
Reference in New Issue
Block a user