#!/bin/sh -e #=============================================================== # Filename : bootstrap # Purpose : Bootstraps Adélie from source for any architecture. # Authors : Zach van Rijn # License : MIT # Revision : 20240114 #=============================================================== #=============================================================== # README #=============================================================== # # overview # -------- # # Given a basic development environment ("Baseline System") that # contains standard system utilities, this script bootstraps the # Adélie Linux distribution for any suported target architecture # (assuming that musl, gcc, etc. have been ported to it) WITHOUT # requiring 'root' privileges. This process is slow* and costly, # but demonstrates that bootstrapping from source is possible. # # Effort has been focused on correctness instead of performance. # This means, some files can be deleted sooner and some of the # binaries can run natively on the build machine. We know and do # appreciate there are different opinions on how to approach the # bootstrap process. This meets our needs and hopefully offers a # different perspective on how to simplify cross compilation. # # (*) See the "requirements" section for mitigations/discussion. # # # features # -------- # # * One-click bootstrap. Just start the script and walk away. # # $ ./bootstrap ppc64 /path/to/new/scratch/dir # # * Can be done without root privileges: no 'chroot(8)'. # # * This is the real deal. No "seed" binaries are required to # go from start to finish. The final output is what we use # to set up our official build servers totally* from source. # # * Can be done on a wide range of platforms, almost certainly # those supported by mcmtools (want to contribute???). # # * Minimal dependencies. Nothing fancy. Shell scripts only. # # * Can be adapted to assist with porting Adélie to new target # architectures. TODO: write a tool to automatically do so. # # (*) We don't count binaries in your starting environment, and # recommend https://www.gnu.org/software/mes/ for the brave. # # # design # ------ # # The goal of this script is to support as wide a range of uses # as possible. For example, a full cross-platform bootstrap that # needs to be done without target hardware or virtual machines. # # There are a few different strategies, depending on your needs # and what type of privileges/hardware you have access to. # # In this diagram, indentation refers to an output product, and # moving down vertically refers to an input step. CPU A is the # build architecture, and CPU B is the target architecture. Your # mileage may vary, and you can mix/match other approaches.. # # * CPU A --> CPU B (slowest: no privilege, no CPU B hardware) # --> Bootstrap 1 (Cross Libc + Partial Cross CPU B) # --> CPU A + CPU B Mixed Rootfs # --> Partial Emulation (PRoot) # --> Bootstrap 2 (Full Cross CPU B) # --> CPU B Host Rootfs # --> Full Emulation (PRoot) # --> Adélie Bootstrap # --> Adélie Rootfs for CPU B # # * CPU A --> CPU B (slower: privilege, no CPU B hardware) # --> Bootstrap 1 (Cross Libc + Partial Cross CPU B) # --> CPU A + CPU B Mixed Rootfs # --> Partial Emulation (registered binfmt_misc) # --> Bootstrap 2 (Full Cross CPU B) # --> CPU B Host Rootfs # --> Full Emulation (registered binfmt_misc) # --> Adélie Bootstrap # --> Adélie Rootfs for CPU B # # * CPU A --> CPU B (faster; no privilege, CPU B hardware) # --> Bootstrap 1 (Cross Libc + Partial Cross CPU B) # --> CPU A + CPU B Mixed Rootfs # --> Partial Emulation (PRoot) # --> Bootstrap 2 (Full Cross CPU B) # --> CPU B Host Rootfs # --> Copy to Native CPU B Hardware # --> Adélie Bootstrap # --> Adélie Rootfs for CPU B # # * CPU B --> CPU B (fastest; QEMU-system or native hardware) # --> Bootstrap 1 (Cross Libc) # --> CPU B Mixed Rootfs # --> Bootstrap 2 (Remove Contamination) # --> CPU B Host Rootfs # --> Adélie Bootstrap # --> Adélie Rootfs for CPU B # # The resulting Adélie Rootfs is capable of building the entire # Adélie Linux distribution from source. We use it to seed our # official build servers. This removes the dependency on using # previous binary releases of our distribution, which had been # manually bootstrapped way back from Gentoo and/or on a G5. # # # requirements # ------------ # # TL;DR: You must be able to run the mcmtools bootstrap script. # # Cross builds take up a lot of TIME and SPACE, and this cannot # be improved by throwing hardware at it. This is primarily due # to the tradeoff of not requiring root privileges at any point, # and the decision to emulate a native environment instead of to # force explicit cross-compilation at each step in the process. # # Targeting an architecture of opposite endianness will be even # slower; this is a QEMU limitation. # # If you wish to bootstrap to the same target CPU architecture, # or a compatible mode (e.g. i686 on x86_64), it will be faster. # # mcmtools is a hard dependency for our bootstrap process now: # # https://git.zv.io/toolchains/bootstrap # # It is a simple analog to Buildroot or Yocto. Those tools could # be used, too, and would provide the "host rootfs" environment. # # Internet access is required, but (as an exercise left to the # reader) it is possible to pre-download all required sources if # you provide your own rootfs for the static QEMU build process. # # If you are in a position to use native hardware, then you are # able to get away with only the final stages of bootstrapping. # To do this, you'd essentially comment out the first stages or # copy the results of the first stages elsewhere and continue. # # Other requirements that you should be aware of, estimated: # # * As many CPU cores as you can throw at it; # # * ~15 GB for each mcmtools rootfs ("seed" and "host"); # # * ~ 3 GB for toolchains; # # * ~ 2 GB for each intermediate rootfs; # # * ~ 2 GB for the "system/" package repository, when built; # # * Please refer to the README in 'bootstrap', linked above, # for more information about performance. Most of this can # be gained back if you adapt this script to use 'chroot(8)' # instead of 'PRoot', at the expense of requiring privilege, # with the correct registration of QEMU with 'binfmt_misc'. # # In brief, there is an approximate factor of 13 slowdown on # practical workloads when using QEMU user and 'PRoot' to do # dynamic binary translation and emulate a foreign machine. # # # process # ------- # # The illustration below outlines the complete bootstrap process # and roughly corresponds to the script layout/organization. Our # terminology is not perfectly consistent; please excuse this. # # The term "build" is shorthand for "build-native" CPU, which # is the machine on which you are performing the bootstrap. # # The term "native" is shorthand for "foreign-native" CPU, which # is the machine to which you are targeting the bootstrap, and # relates to the "host" CPU on which the code will run, but that # with the help of dynamic binary translation, runs "natively". # # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # build (unstable, everything provided by user) # # +-----------------+ User-provided tools. Dependency # | Baseline System | of 'mcmtools', which will verify # +-----------------+ that these tools are available. # | # +----------+ Script to build pinned versions # | mcmtools | of common system utilities, a # +----------+ host-arch host-libc toolchain, a # | host-arch musl-libc toolchain... # | # - - - - -|- - - - - - - - - - - - - - - - - - - - - - - - # mixed | (stable versions, unstable libc) # | # +-------------+ ...and a 'chroot'-able rootfs. A # | seed rootfs | sane, but not clean, environment # +-------------+ in which we begin the bootstrap. # | # +-----------+ Script to build 'PRoot' and its # | emulators | dependencies, as well as static # +-----------+ QEMU user binaries. Add to seed. # | # +-----------------+ Static musl toolchains targeting # | musl toolchains | a given architecture: cross from # +-----------------+ the host CPU & foreign "native". # | Output binaries will run via the # | 'binfmt_misc' mechanism + QEMU. # | # +-------------+ Script to build Alpine Package # | build tools | Keeper (APK) and dependencies. # +-------------+ All binaries are cross-compiled! # | This step is a sanity check. # | # +----------+ Script to build pinned versions # | mcmtools | of common system utilities. This # +----------+ time, all utilities are target- # | native. Build more toolchains... # | # - - - - -|- - - - - - - - - - - - - - - - - - - - - - - - # host | (stable versions, stable libc) # | # +-------------+ ...and a 'chroot'-able rootfs. A # | host rootfs | sane, clean, foreign "native" # +-------------+ rootfs that requires QEMU, or is # | able to run on native hardware. # | # +-------------+ Script to build Alpine Package # | build tools | Keeper (APK) and dependencies. # +-------------+ These binaries are native built! # | This step is required! # | # +-------------+ Script to build the Adélie Linux # | system repo | "system/" package repository. It # +-------------+ is used to build core packages. # | # +---------------+ Script to install packages into # | image creator | a clean rootfs. Carryover from # +---------------+ the mcmtools process is removed. # | # - - - - -|- - - - - - - - - - - - - - - - - - - - - - - - # verify | (optional verification) # | # ... The "host" stage can be repeated # | zero or more times to ensure the # | final image is not contaminated. # | # - - - - -|- - - - - - - - - - - - - - - - - - - - - - - - # output | (Welcome to Adélie Linux) # | # +---------------+ This is a minimal Adélie rootfs. # | adelie rootfs | Copy it to native hardware, then # +---------------+ use it as a builder 'chroot'. It # is used to seed official Adélie # build boxen (e.g. autobuilder). # # limitations # ----------- # # The builds are not hermetically sealed. That is not the point. # The build environment should not *need* anything from outside, # but it is not *prevented* from accessing anything. You should # run this on a clean, trusted machine. # # There is no guarantee of byte-for-byte reproducible builds at # this time. This is, in part, due to timestamps and tar headers # but may involve a lack of determinism in parallel builds. # # QEMU user emulation may cause subtle incompatibilites with the # target CPU hardware. For example, CPU feature (mis)detection. # # It is not currently possible to safely reuse any of the output # if the target architecture is changed. You will need to start # from scratch if you wish to change the target. This is a TODO. # # # todo # ---- # # * Do not inherit the build environment; avoid contamination. # # * Finish the "system/" build; the "bootstrap.sh" script will # require some work. Current status: preimage is finished. # HERE="$(dirname $(readlink -f ${0}))"; #--------------------------------------------------------------- # initialization ## # Haaaalp! # usage () { printf "Usage: %s ARCH BASE\n\n" "${0}"; cat </dev/null || \ cat >> "${MTOOLS}"/config.mak <<"EOF" # __SENTINEL__ # supported targets ifneq ($(findstring aarch64-,$(TARGET)),) GCC_CONFIG_FOR_TARGET += --with-arch=armv8-a --with-abi=lp64 --enable-fix-cortex-a53-835769 --enable-fix-cortex-a53-843419 endif ifneq ($(findstring armv7l-,$(TARGET)),) GCC_CONFIG_FOR_TARGET += --with-arch=armv7-a --with-tune=generic-armv7-a --with-fpu=vfpv3-d16 --with-float=hard --with-abi=aapcs-linux --with-mode=thumb endif ifneq ($(findstring powerpc64-,$(TARGET)),) GCC_CONFIG_FOR_TARGET += --enable-secureplt --enable-decimal-float=no --with-abi=elfv2 endif ifneq ($(findstring powerpc-,$(TARGET)),) GCC_CONFIG_FOR_TARGET += --enable-secureplt --enable-decimal-float=no endif ifneq ($(findstring x86_64-,$(TARGET)),) GCC_CONFIG_FOR_TARGET += endif ifneq ($(findstring i586-,$(TARGET)),) GCC_CONFIG_FOR_TARGET += --with-arch=i586 --with-tune=pentium2 --enable-cld --enable-mmx endif # experimental targets ifneq ($(findstring riscv64-,$(TARGET)),) GCC_CONFIG_FOR_TARGET += --with-arch=rv64gc --with-abi=lp64d --enable-autolink-libatomic endif ifneq ($(findstring s390x-,$(TARGET)),) GCC_CONFIG_FOR_TARGET += --with-arch=z196 --with-tune=zEC12 --with-zarch --with-long-double-128 --enable-decimal-float endif EOF sed -i "${MTOOLS}"/config.mak \ -e "s/__SENTINEL__/${sent}/g" \ ; # cleaning ( cd "${MTOOLS}"; rm -fr tmp; # save 10 GB ) # is any of this actually needed? ( cd "${MTOOLS}"/sys; mkdir -p dev; mkdir -p proc; mkdir -p sys; rm -fr usr; ln -s . usr; ) ## emulators # # Dependencies are built with the mcmtools host toolchain; a # reason to not force musl here is in the event that these # cannot be built statically and the host libc is different. # Our priority is to obtain a functioning 'PRoot' above all. # # QEMU itself is built inside an Alpine Linux rootfs; we do # this because we still need Python 3 to build it. You can # manually provide your own static QEMU user binaries and be # on your way without Alpine, but it is a good 'PRoot' test. # test -d "${MTOOLS}/sys/emus/bin" || \ PATH="${MTOOLS}/host/bin:${MTOOLS}/sys/bin" \ DEST="${MTOOLS}" \ ./prootemu \ ; fi #--------------------------------------------------------------- # musl toolchains ## # We have a musl-targeting toolchain that runs on the host, but # it is linked to the host libc and may not run in this chroot. # # We need to build the same toolchain, but static. There are two # possible directions to go: (1) a cross toolchain that runs at # full speed on the build machine, at the cost of having to tell # downstream build scripts how to cross compile, or (2) a native # toolchain for the foreign (target) architecture that runs slow # in QEMU and still requires the cross toolchain to build it. # # Build both. We will eventually need both toolchains, anyway. # # Note: these toolchains will use the updated 'config.mak' file. # if ! test -d "${MTOOLS}"/sys/tc/native; then # FIXME: no hard code cd "${BASE}"; test -d musl-cross-make \ || git clone ${CHAINS}/musl-cross-make.git; cd musl-cross-make; git pull; # always use the latest ## # Ensure consistent 'config.mak' for all toolchain builds. # Everything remains the same except for build/host/target. # cp "${MTOOLS}"/config.mak config.mak; sed -i \ -E 's/(--(build|host|target)=)[-_[:alnum:]]+(\s|$)/\1 /g' \ config.mak \ ; sed -i \ -E 's/(--(build|host|target)=)/\1__TARGET__/g' \ config.mak \ ; sed -i \ -e "s/__TARGET__/${TARGET}/g" \ config.mak \ ; ## musl toolchains # # Build these toolchains statically using the musl toolchain # from the seed rootfs so that it is known to work correctly # (the original musl toolchain itself may itself be linked # with glibc or be unsafe to use in some contexts). # # Note: "native" is for the foreign target CPU architecture. # MCMTOOLS="${MTOOLS}" \ ./scripts/build ${TARGET} \ ; for k in cross native; do rm -fr "${MTOOLS}"/sys/tc/${k}; mkdir "${MTOOLS}"/sys/tc/${k}; tar -C "${MTOOLS}"/sys/tc/${k} \ --strip-components=1 \ -xzf output/${TARGET}-${k}.tgz \ ; done fi #--------------------------------------------------------------- # build tools (cross) ## # Build 'abuild', its dependencies, and other utilities. # PROOT_NO_SECCOMP=1 \ PATH="/tc/cross/bin:${DEF_PATH}" \ SHELL=/bin/sh \ DEST=/usr/local \ CURL_CA_BUNDLE=/cacert.pem \ CC=${TARGET}-gcc \ CXX=${TARGET}-g++ \ AR=${TARGET}-ar \ LD=${TARGET}-ld \ CPP=${TARGET}-cpp \ RANLIB=${TARGET}-ranlib \ ${MTOOLS}/sys/emus/bin/proot \ -S "${MTOOLS}"/sys \ -q "${MTOOLS}"/sys/emus/bin/qemu-${q} \ -b "${HERE}" \ "${HERE}"/setup-abuild \ ; #--------------------------------------------------------------- # rootfs: build ## # Create a tarball of the build rootfs. The image creator could # use this as input if the target architecture matches. # cd "${BASE}"; if ! test -f rootfs-${BUILDS}.tgz; then tar -C mcmtools/sys \ -pczf rootfs-${BUILDS}.tgz \ . \ ; fi #--------------------------------------------------------------- # mcmtools (host) ## # NOTE: The 'PATH' order is *really* important. Foreign "native" # toolchains must be the first ones found; e.g. '/usr/bin/gcc' # is a symlink to 'ccache' and isn't a functional compiler. # # NOTE: 'LD_LIBRARY_PATH' is needed because of nonstandard path # of toolchain, which otherwise produces these errors: # # Error loading shared library libstdc++.so.6: # Exec format error (needed by ...) # Error loading shared library libgcc_s.so.1: # Exec format error (needed by ...) # # NOTE: We *do* need to build 'musl-cross-make' (even though it # may seem redundant) so that we can safely install the dynamic # loader. If we do not do this, we're on the hook to symlink it. # # Once 'coreutils' is built, 'uname' will return correctly, then # future software should(tm) build as if it is built natively. # # FIXME: is Linux 3.2.0 really appropriate here? # cd "${BASE}"; if ! test -d mcmtools-${TARGET}/sys/tc/musl/bin; then # FIXME: no hard code PROOT_NO_SECCOMP=1 \ PATH="/tc/native/bin:${DEF_PATH}" \ LD_LIBRARY_PATH=/tc/native/lib \ SHELL=/bin/sh \ BASE="${BASE}" \ DEST="${BASE}"/mcmtools-${TARGET} \ ARCH=${TARGET} \ CURL_CA_BUNDLE=/cacert.pem \ ${MTOOLS}/sys/emus/bin/proot \ -S "${MTOOLS}"/sys \ -q "${MTOOLS}"/sys/emus/bin/qemu-${q} \ -b "${HERE}" \ -k "3.2.0" \ "${BASE}"/bootstrap/bootstrap \ ; fi # cleaning ( cd mcmtools-${TARGET}; rm -fr tmp; # save 10 GB ) ## # Install the native static toolchain over the native rootfs for # a quick way to have the dynamic loader. Overwrite the symlink! # if test -h mcmtools-${TARGET}/sys/bin/gcc; then # FIXME: no hard code tar -C mcmtools-${TARGET}/sys \ --strip-components=1 \ -xzf musl-cross-make/output/${TARGET}-native.tgz \ ; fi #--------------------------------------------------------------- # build tools (host) ## # Build 'abuild', its dependencies, and other utilities. # PROOT_NO_SECCOMP=1 \ PATH="/tc/musl/bin:${DEF_PATH}" \ SHELL=/bin/sh \ DEST=/usr/local \ CURL_CA_BUNDLE=/cacert.pem \ CC=gcc \ CXX=g++ \ AR=ar \ LD=ld \ CPP=cpp \ RANLIB=ranlib \ ${MTOOLS}/sys/emus/bin/proot \ -S "${BASE}"/mcmtools-${TARGET}/sys \ -q "${MTOOLS}"/sys/emus/bin/qemu-${q} \ -b "${HERE}" \ "${HERE}"/setup-abuild \ ; #--------------------------------------------------------------- # rootfs: host ## # Create a tarball of the host rootfs. This is a safety measure. # cd "${BASE}"; if ! test -f rootfs-${TARGET}.tgz; then tar -C mcmtools-${TARGET}/sys \ -pczf rootfs-${TARGET}.tgz \ . \ ; fi #--------------------------------------------------------------- # patch rootfs ## # Some paths are hard-coded into various binaries. Since we need # to "relocate" everything to run at '/' but patching the '.tar' # file at once is apparently not valid, we must patch each file # individually and compress the output to a usable rootfs image. # cd "${BASE}"; if ! test -f rootfs-${TARGET}-patched.tgz; then t=$(mktemp -d); tar -C "${t}" -xf rootfs-${TARGET}.tgz; ## # Build 'binsub', a string patch tool. # "${MTOOLS}"/musl/bin/gcc -static -O3 \ -o binsub \ "${HERE}"/binsub.c \ ; ## # Replace needle with the empty string. # x="${BASE}"/mcmtools-${TARGET}/sys; grep -rl "${x}" ${t} | while read k; do # exceptions case "${k}" in "${t}"/bin/perl) continue; ;; # @INC fuckery esac # delete needle ./binsub ${k} "${x}"; # alternatively use "/" done ## # Create a compatibility symlink so that any tools excepted # above can still find the file(s) they need. # mkdir -p "${t}/${x%/*}"; ln -s / "${t}/${x}"; # compat symlink for exceptions tar -C "${t}" \ -pczf rootfs-${TARGET}-patched.tgz \ . \ ; rm -fr "${t}"; fi #--------------------------------------------------------------- # preimage ## # The preimage has some modifications to support using the real # "bootstrap.sh" script. Note that we need to bind-mount and use # a different working directory because PRoot has a few defaults # that would otherwise shadow paths we're trying to create. # cd "${BASE}"; if ! test -f rootfs-${TARGET}-preimage.tgz; then # FIXME: no hard code rm -fr rootfs-${TARGET}-preimage; mkdir rootfs-${TARGET}-preimage; tar -C rootfs-${TARGET}-preimage \ -xzf rootfs-${TARGET}-patched.tgz \ || true; # we're somehow messing up some bzip binaries? PROOT_NO_SECCOMP=1 \ PATH="${DEF_PATH}" \ SHELL=/bin/sh \ CURL_CA_BUNDLE=/cacert.pem \ ${MTOOLS}/sys/emus/bin/proot \ -S "${MTOOLS}"/sys \ -b "${BASE}"/rootfs-${TARGET}-preimage \ -w "${BASE}"/rootfs-${TARGET}-preimage \ -b "${HERE}" \ "${HERE}"/setup-preimage \ ; tar -C rootfs-${TARGET}-preimage \ -pczf rootfs-${TARGET}-preimage.tgz \ . \ ; fi #--------------------------------------------------------------- # packages ## # This is where the original "bootstrap.sh" script runs. The aim # of this step is to produce '.apk' files that were built using # non-Adélie-packaged tools, but would install Adélie tools. # if ! test -f rootfs-${TARGET}-packages.tgz; then # FIXME: no hard code rm -fr rootfs-${TARGET}-packages; mkdir rootfs-${TARGET}-packages; tar -C rootfs-${TARGET}-packages \ -xzf rootfs-${TARGET}-preimage.tgz \ ; PROOT_NO_SECCOMP=1 \ PATH="${DEF_PATH}" \ SHELL=/bin/sh \ CURL_CA_BUNDLE=/cacert.pem \ ${MTOOLS}/sys/emus/bin/proot \ -R "${BASE}"/rootfs-${TARGET}-packages \ -i 1000:300 \ -q "${MTOOLS}"/sys/emus/bin/qemu-${q} \ -b "${HERE}" \ "${HERE}"/setup-packages \ ; fi # TODO: create tarball of this directory #--------------------------------------------------------------- # image creator # TODO # apk --root /foo --arch armv7 --initdb add # apk --root /foo add adelie-core dash-binsh build-tools