sonic-buildimage/scripts/build-optimize-fs-size.py

254 lines
7.2 KiB
Python
Raw Normal View History

Reduce SONiC image filesystem size (#16948) Why I did it Running SONiC releases past 202012 has become really challenging on system with small storage devices (4GB). Some of these devices can also be limited by only having 4GB of RAM which complicates mitigations. The main contributor to these issues is the SONiC image growth. Being able to reduce it by some decent amount should allow these systems to run SONiC longer. It would also reduce some impacts related to space savings mitigations. Work item tracking Microsoft ADO (number only): How I did it Add a build option to reduce the image size. The image reduction process is affecting the builds in 2 ways: change some packages that are installed in the rootfs apply a rootfs reduction script The script itself will perform a few steps: remove file duplication by leveraging hardlinks under /usr/share/sonic since the symlinks under the device folder are lost during the build. under /var/lib/docker since the files there will only be mounted ro remove some extra files (man, docs, licenses, ...) some image specific space reduction (only for aboot images currently) The script can later be improved but for now it's reducing the rootfs size by ~30%. How to verify it Compare the size of an image with this option enabled and this option enabled. Expect the fully extracted content to be ~30% less. Which release branch to backport (provide reason below if selected) This is a backport of #16729 Description for the changelog Add build option to reduce final image size
2023-10-24 08:08:38 -05:00
#!/usr/bin/env python3
import argparse
import hashlib
import os
import shutil
import subprocess
import sys
from collections import defaultdict
from functools import cached_property
DRY_RUN = False
def enable_dry_run(enabled):
global DRY_RUN # pylint: disable=global-statement
DRY_RUN = enabled
class File:
def __init__(self, path):
self.path = path
def __str__(self):
return self.path
def rmtree(self):
if DRY_RUN:
print(f'rmtree {self.path}')
return
shutil.rmtree(self.path)
def hardlink(self, src):
if DRY_RUN:
print(f'hardlink {self.path} {src}')
return
st = self.stats
os.remove(self.path)
os.link(src.path, self.path)
os.chmod(self.path, st.st_mode)
os.chown(self.path, st.st_uid, st.st_gid)
os.utime(self.path, times=(st.st_atime, st.st_mtime))
@property
def name(self):
return os.path.basename(self.path)
@cached_property
def stats(self):
return os.stat(self.path)
@cached_property
def size(self):
return self.stats.st_size
@cached_property
def checksum(self):
with open(self.path, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
class FileManager:
def __init__(self, path):
self.path = path
self.files = []
self.folders = []
self.nindex = defaultdict(list)
self.cindex = defaultdict(list)
def add_file(self, path):
if not os.path.isfile(path) or os.path.islink(path):
return
f = File(path)
self.files.append(f)
def load_tree(self):
self.files = []
self.folders = []
for root, _, files in os.walk(self.path):
self.folders.append(File(root))
for f in files:
self.add_file(os.path.join(root, f))
print(f'loaded {len(self.files)} files and {len(self.folders)} folders')
def generate_index(self):
print('Computing file hashes')
for f in self.files:
self.nindex[f.name].append(f)
self.cindex[(f.name, f.checksum)].append(f)
def create_hardlinks(self):
print('Creating hard links')
for files in self.cindex.values():
if len(files) <= 1:
continue
orig = files[0]
for f in files[1:]:
f.hardlink(orig)
class FsRoot:
def __init__(self, path):
self.path = path
def iter_fsroots(self):
yield self.path
dimgpath = os.path.join(self.path, 'var/lib/docker/overlay2')
for layer in os.listdir(dimgpath):
yield os.path.join(dimgpath, layer, 'diff')
def collect_fsroot_size(self):
cmd = ['du', '-sb', self.path]
p = subprocess.run(cmd, text=True, check=False,
stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
return int(p.stdout.split()[0])
def _remove_root_paths(self, relpaths):
for root in self.iter_fsroots():
for relpath in relpaths:
path = os.path.join(root, relpath)
if os.path.isdir(path):
if DRY_RUN:
print(f'rmtree {path}')
else:
shutil.rmtree(path)
def remove_docs(self):
self._remove_root_paths([
'usr/share/doc',
'usr/share/doc-base',
'usr/local/share/doc',
'usr/local/share/doc-base',
])
def remove_mans(self):
self._remove_root_paths([
'usr/share/man',
'usr/local/share/man',
])
def remove_licenses(self):
self._remove_root_paths([
'usr/share/common-licenses',
])
def hardlink_under(self, path):
fm = FileManager(os.path.join(self.path, path))
fm.load_tree()
fm.generate_index()
fm.create_hardlinks()
def remove_platforms(self, filter_func):
devpath = os.path.join(self.path, 'usr/share/sonic/device')
for platform in os.listdir(devpath):
if not filter_func(platform):
path = os.path.join(devpath, platform)
if DRY_RUN:
print(f'rmtree platform {path}')
else:
shutil.rmtree(path)
def remove_modules(self, modules):
modpath = os.path.join(self.path, 'lib/modules')
kversion = os.listdir(modpath)[0]
kmodpath = os.path.join(modpath, kversion)
for module in modules:
path = os.path.join(kmodpath, module)
if os.path.isdir(path):
if DRY_RUN:
print(f'rmtree module {path}')
else:
shutil.rmtree(path)
def remove_firmwares(self, firmwares):
fwpath = os.path.join(self.path, 'lib/firmware')
for fw in firmwares:
path = os.path.join(fwpath, fw)
if os.path.isdir(path):
if DRY_RUN:
print(f'rmtree firmware {path}')
else:
shutil.rmtree(path)
def specialize_aboot_image(self):
fp = lambda p: '-' not in p or 'arista' in p or 'common' in p
self.remove_platforms(fp)
self.remove_modules([
'kernel/drivers/gpu',
'kernel/drivers/infiniband',
])
self.remove_firmwares([
'amdgpu',
'i915',
'mediatek',
'nvidia',
'radeon',
])
def specialize_image(self, image_type):
if image_type == 'aboot':
self.specialize_aboot_image()
def parse_args(args):
parser = argparse.ArgumentParser()
parser.add_argument('fsroot',
help="path to the fsroot build folder")
parser.add_argument('-s', '--stats', action='store_true',
help="show space statistics")
parser.add_argument('--hardlinks', action='append',
help="path where similar files need to be hardlinked")
parser.add_argument('--remove-docs', action='store_true',
help="remove documentation")
parser.add_argument('--remove-licenses', action='store_true',
help="remove license files")
parser.add_argument('--remove-mans', action='store_true',
help="remove manpages")
parser.add_argument('--image-type', default=None,
help="type of image being built")
parser.add_argument('--dry-run', action='store_true',
help="only display what would happen")
return parser.parse_args(args)
def main(args):
args = parse_args(args)
enable_dry_run(args.dry_run)
fs = FsRoot(args.fsroot)
if args.stats:
begin = fs.collect_fsroot_size()
print(f'fsroot size is {begin} bytes')
if args.remove_docs:
fs.remove_docs()
if args.remove_mans:
fs.remove_mans()
if args.remove_licenses:
fs.remove_licenses()
if args.image_type:
fs.specialize_image(args.image_type)
for path in args.hardlinks:
fs.hardlink_under(path)
if args.stats:
end = fs.collect_fsroot_size()
pct = 100 - end / begin * 100
print(f'fsroot reduced to {end} from {begin} {pct:.2f}')
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))